Mercurial > libavcodec.hg
annotate alpha/dsputil_alpha.c @ 5062:2dd00b1cc94b libavcodec
Remove mdct.o and fft.o from fft-test prerequisites list.
Both objects were added to the link command, resulting in multiple definitions
of symbols. Now linking works in the general case when mdct.o and fft.o are
compiled into libavcodec.a.
author | diego |
---|---|
date | Tue, 22 May 2007 07:08:38 +0000 |
parents | d5ba514e3f4a |
children | f7cbb7733146 |
rev | line source |
---|---|
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
1 /* |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
2 * Alpha optimized DSP utils |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org> |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
429 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 * Lesser General Public License for more details. | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
16 * |
429 | 17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
20 */ |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
21 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
22 #include "asm.h" |
5010
d5ba514e3f4a
Add libavcodec to compiler include flags in order to simplify header
diego
parents:
3947
diff
changeset
|
23 #include "dsputil.h" |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
24 |
1333
a1cc1810d58f
build error on Alpha patch by (Sam Hocevar <sam at zoy dot org>)
michaelni
parents:
1324
diff
changeset
|
25 extern void simple_idct_axp(DCTELEM *block); |
1092 | 26 extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block); |
27 extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block); | |
28 | |
511
fa4425cf6b31
Assembly version of put_pixels. This is currently the function that
mellum
parents:
509
diff
changeset
|
29 void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, |
897 | 30 int line_size, int h); |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
31 void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
897 | 32 int line_size); |
2967 | 33 void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, |
897 | 34 int line_size); |
35 void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, | |
36 int line_size); | |
2967 | 37 void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, |
897 | 38 int line_size); |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
39 |
586 | 40 void get_pixels_mvi(DCTELEM *restrict block, |
41 const uint8_t *restrict pixels, int line_size); | |
42 void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, | |
43 int stride); | |
1708 | 44 int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); |
705
107a56aa74f5
Add Alpha assembly for pix_abs16x16. Optimized for pca56, no large win
mellum
parents:
670
diff
changeset
|
45 int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); |
1708 | 46 int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); |
47 int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); | |
48 int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); | |
586 | 49 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
50 #if 0 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
51 /* These functions were the base for the optimized assembler routines, |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
52 and remain here for documentation purposes. */ |
2967 | 53 static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
54 int line_size) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
55 { |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
56 int i = 8; |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
57 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ |
505
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
58 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
59 do { |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
60 uint64_t shorts0, shorts1; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
61 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
62 shorts0 = ldq(block); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
63 shorts0 = maxsw4(shorts0, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
64 shorts0 = minsw4(shorts0, clampmask); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
65 stl(pkwb(shorts0), pixels); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
66 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
67 shorts1 = ldq(block + 4); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
68 shorts1 = maxsw4(shorts1, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
69 shorts1 = minsw4(shorts1, clampmask); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
70 stl(pkwb(shorts1), pixels + 4); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
71 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
72 pixels += line_size; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
73 block += 8; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
74 } while (--i); |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
75 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
76 |
2967 | 77 void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
78 int line_size) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
79 { |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
80 int h = 8; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
81 /* Keep this function a leaf function by generating the constants |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
82 manually (mainly for the hack value ;-). */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
83 uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
84 uint64_t signmask = zap(-1, 0x33); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
85 signmask ^= signmask >> 1; /* 0x8000800080008000 */ |
505
7a976bf93394
Ugly hack to make the assembler accept MVI instructions.
mellum
parents:
429
diff
changeset
|
86 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
87 do { |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
88 uint64_t shorts0, pix0, signs0; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
89 uint64_t shorts1, pix1, signs1; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
90 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
91 shorts0 = ldq(block); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
92 shorts1 = ldq(block + 4); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
93 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
94 pix0 = unpkbw(ldl(pixels)); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
95 /* Signed subword add (MMX paddw). */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
96 signs0 = shorts0 & signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
97 shorts0 &= ~signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
98 shorts0 += pix0; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
99 shorts0 ^= signs0; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
100 /* Clamp. */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
101 shorts0 = maxsw4(shorts0, 0); |
2967 | 102 shorts0 = minsw4(shorts0, clampmask); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
103 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
104 /* Next 4. */ |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
105 pix1 = unpkbw(ldl(pixels + 4)); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
106 signs1 = shorts1 & signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
107 shorts1 &= ~signmask; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
108 shorts1 += pix1; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
109 shorts1 ^= signs1; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
110 shorts1 = maxsw4(shorts1, 0); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
111 shorts1 = minsw4(shorts1, clampmask); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
112 |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
113 stl(pkwb(shorts0), pixels); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
114 stl(pkwb(shorts1), pixels + 4); |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
115 |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
116 pixels += line_size; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
117 block += 8; |
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
118 } while (--h); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
119 } |
509
cab79946302f
Implement put_pixels_clamped and add_pixels_clamped in Assembler. This
mellum
parents:
505
diff
changeset
|
120 #endif |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
121 |
518 | 122 static void clear_blocks_axp(DCTELEM *blocks) { |
123 uint64_t *p = (uint64_t *) blocks; | |
124 int n = sizeof(DCTELEM) * 6 * 64; | |
125 | |
126 do { | |
127 p[0] = 0; | |
128 p[1] = 0; | |
129 p[2] = 0; | |
130 p[3] = 0; | |
131 p[4] = 0; | |
132 p[5] = 0; | |
133 p[6] = 0; | |
134 p[7] = 0; | |
135 p += 8; | |
136 n -= 8 * 8; | |
137 } while (n); | |
138 } | |
139 | |
513 | 140 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
141 { |
513 | 142 return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); |
143 } | |
144 | |
145 static inline uint64_t avg2(uint64_t a, uint64_t b) | |
146 { | |
2967 | 147 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
148 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
149 |
546 | 150 #if 0 |
151 /* The XY2 routines basically utilize this scheme, but reuse parts in | |
152 each iteration. */ | |
513 | 153 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
154 { |
513 | 155 uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) |
897 | 156 + ((l2 & ~BYTE_VEC(0x03)) >> 2) |
157 + ((l3 & ~BYTE_VEC(0x03)) >> 2) | |
158 + ((l4 & ~BYTE_VEC(0x03)) >> 2); | |
513 | 159 uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) |
897 | 160 + (l2 & BYTE_VEC(0x03)) |
161 + (l3 & BYTE_VEC(0x03)) | |
162 + (l4 & BYTE_VEC(0x03)) | |
163 + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); | |
513 | 164 return r1 + r2; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
165 } |
546 | 166 #endif |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
167 |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
168 #define OP(LOAD, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
169 do { \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
170 STORE(LOAD(pixels), block); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
171 pixels += line_size; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
172 block += line_size; \ |
513 | 173 } while (--h) |
174 | |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
175 #define OP_X2(LOAD, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
176 do { \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
177 uint64_t pix1, pix2; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
178 \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
179 pix1 = LOAD(pixels); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
180 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
181 STORE(AVG2(pix1, pix2), block); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
182 pixels += line_size; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
183 block += line_size; \ |
513 | 184 } while (--h) |
185 | |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
186 #define OP_Y2(LOAD, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
187 do { \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
188 uint64_t pix = LOAD(pixels); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
189 do { \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
190 uint64_t next_pix; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
191 \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
192 pixels += line_size; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
193 next_pix = LOAD(pixels); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
194 STORE(AVG2(pix, next_pix), block); \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
195 block += line_size; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
196 pix = next_pix; \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
197 } while (--h); \ |
513 | 198 } while (0) |
199 | |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
200 #define OP_XY2(LOAD, STORE) \ |
546 | 201 do { \ |
202 uint64_t pix1 = LOAD(pixels); \ | |
203 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
204 uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ | |
205 + (pix2 & BYTE_VEC(0x03)); \ | |
206 uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ | |
207 + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ | |
208 \ | |
209 do { \ | |
210 uint64_t npix1, npix2; \ | |
211 uint64_t npix_l, npix_h; \ | |
212 uint64_t avg; \ | |
213 \ | |
214 pixels += line_size; \ | |
215 npix1 = LOAD(pixels); \ | |
216 npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |
217 npix_l = (npix1 & BYTE_VEC(0x03)) \ | |
218 + (npix2 & BYTE_VEC(0x03)); \ | |
219 npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ | |
220 + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ | |
221 avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ | |
222 + pix_h + npix_h; \ | |
223 STORE(avg, block); \ | |
224 \ | |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
225 block += line_size; \ |
546 | 226 pix_l = npix_l; \ |
227 pix_h = npix_h; \ | |
228 } while (--h); \ | |
513 | 229 } while (0) |
230 | |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
231 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
232 static void OPNAME ## _pixels ## SUFF ## _axp \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
233 (uint8_t *restrict block, const uint8_t *restrict pixels, \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
234 int line_size, int h) \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
235 { \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
236 if ((size_t) pixels & 0x7) { \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
237 OPKIND(uldq, STORE); \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
238 } else { \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
239 OPKIND(ldq, STORE); \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
240 } \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
241 } \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
242 \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
243 static void OPNAME ## _pixels16 ## SUFF ## _axp \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
244 (uint8_t *restrict block, const uint8_t *restrict pixels, \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
245 int line_size, int h) \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
246 { \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
247 OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
248 OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
249 } |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
250 |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
251 #define PIXOP(OPNAME, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
252 MAKE_OP(OPNAME, , OP, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
253 MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
254 MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ |
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
255 MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) |
513 | 256 |
257 /* Rounding primitives. */ | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
258 #define AVG2 avg2 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
259 #define AVG4 avg4 |
546 | 260 #define AVG4_ROUNDER BYTE_VEC(0x02) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
261 #define STORE(l, b) stq(l, b) |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
262 PIXOP(put, STORE); |
513 | 263 |
264 #undef STORE | |
265 #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
266 PIXOP(avg, STORE); |
513 | 267 |
268 /* Not rounding primitives. */ | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
269 #undef AVG2 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
270 #undef AVG4 |
546 | 271 #undef AVG4_ROUNDER |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
272 #undef STORE |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
273 #define AVG2 avg2_no_rnd |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
274 #define AVG4 avg4_no_rnd |
546 | 275 #define AVG4_ROUNDER BYTE_VEC(0x01) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
276 #define STORE(l, b) stq(l, b) |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
277 PIXOP(put_no_rnd, STORE); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
278 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
279 #undef STORE |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
280 #define STORE(l, b) stq(AVG2(l, ldq(b)), b); |
548
3f05be811b5a
Remove support for variable BSIZE and INCR, as sub_pixels_* is no
mellum
parents:
546
diff
changeset
|
281 PIXOP(avg_no_rnd, STORE); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
282 |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
283 void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
284 int line_size, int h) |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
285 { |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
286 put_pixels_axp_asm(block, pixels, line_size, h); |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
287 put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
288 } |
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
289 |
986 | 290 static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride) |
291 { | |
292 return pix_abs16x16_mvi_asm(a, b, stride); | |
293 } | |
294 | |
1092 | 295 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
296 { |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
297 c->put_pixels_tab[0][0] = put_pixels16_axp_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
298 c->put_pixels_tab[0][1] = put_pixels16_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
299 c->put_pixels_tab[0][2] = put_pixels16_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
300 c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
301 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
302 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
303 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
304 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
305 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
306 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
307 c->avg_pixels_tab[0][0] = avg_pixels16_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
308 c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
309 c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
310 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
311 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
312 c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
313 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
314 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
315 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_axp; |
670
340e3ba84119
Synthesize pixels16 functions from pixels functions.
mellum
parents:
663
diff
changeset
|
316 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
317 c->put_pixels_tab[1][0] = put_pixels_axp_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
318 c->put_pixels_tab[1][1] = put_pixels_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
319 c->put_pixels_tab[1][2] = put_pixels_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
320 c->put_pixels_tab[1][3] = put_pixels_xy2_axp; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
321 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
322 c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
323 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
324 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
325 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
326 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
327 c->avg_pixels_tab[1][0] = avg_pixels_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
328 c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
329 c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
330 c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; |
513 | 331 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
332 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
333 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels_x2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
334 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels_y2_axp; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
335 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp; |
513 | 336 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
337 c->clear_blocks = clear_blocks_axp; |
518 | 338 |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
339 /* amask clears all bits that correspond to present features. */ |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
340 if (amask(AMASK_MVI) == 0) { |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
341 c->put_pixels_clamped = put_pixels_clamped_mvi_asm; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
342 c->add_pixels_clamped = add_pixels_clamped_mvi_asm; |
586 | 343 |
856
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
344 c->get_pixels = get_pixels_mvi; |
3c6df37177dd
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
744
diff
changeset
|
345 c->diff_pixels = diff_pixels_mvi; |
986 | 346 c->sad[0] = sad16x16_mvi; |
1708 | 347 c->sad[1] = pix_abs8x8_mvi; |
348 // c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed | |
349 c->pix_abs[0][0] = sad16x16_mvi; | |
350 c->pix_abs[1][0] = pix_abs8x8_mvi; | |
351 c->pix_abs[0][1] = pix_abs16x16_x2_mvi; | |
352 c->pix_abs[0][2] = pix_abs16x16_y2_mvi; | |
353 c->pix_abs[0][3] = pix_abs16x16_xy2_mvi; | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
354 } |
897 | 355 |
356 put_pixels_clamped_axp_p = c->put_pixels_clamped; | |
357 add_pixels_clamped_axp_p = c->add_pixels_clamped; | |
2967 | 358 |
1092 | 359 c->idct_put = simple_idct_put_axp; |
360 c->idct_add = simple_idct_add_axp; | |
1333
a1cc1810d58f
build error on Alpha patch by (Sam Hocevar <sam at zoy dot org>)
michaelni
parents:
1324
diff
changeset
|
361 c->idct = simple_idct_axp; |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
diff
changeset
|
362 } |