Mercurial > libavcodec.hg
annotate dsputil.c @ 625:bb6a69f9d409 libavcodec
slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
per context DCT selection
author | michaelni |
---|---|
date | Thu, 29 Aug 2002 23:55:32 +0000 |
parents | 92e99e506920 |
children | 23a093d6e450 |
rev | line source |
---|---|
0 | 1 /* |
2 * DSP utils | |
429 | 3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
0 | 4 * |
429 | 5 * This library is free software; you can redistribute it and/or |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
0 | 9 * |
429 | 10 * This library is distributed in the hope that it will be useful, |
0 | 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 * Lesser General Public License for more details. | |
0 | 14 * |
429 | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
256 | 18 * |
385 | 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
0 | 20 */ |
21 #include "avcodec.h" | |
22 #include "dsputil.h" | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
23 #include "simple_idct.h" |
0 | 24 |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
6
diff
changeset
|
25 void (*ff_idct)(DCTELEM *block); |
480 | 26 void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); |
27 void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); | |
0 | 28 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); |
324 | 29 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
0 | 30 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
31 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | |
255 | 32 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); |
296 | 33 void (*clear_blocks)(DCTELEM *blocks); |
612 | 34 int (*pix_sum)(UINT8 * pix, int line_size); |
35 int (*pix_norm1)(UINT8 * pix, int line_size); | |
0 | 36 |
37 op_pixels_abs_func pix_abs16x16; | |
38 op_pixels_abs_func pix_abs16x16_x2; | |
39 op_pixels_abs_func pix_abs16x16_y2; | |
40 op_pixels_abs_func pix_abs16x16_xy2; | |
41 | |
294 | 42 op_pixels_abs_func pix_abs8x8; |
43 op_pixels_abs_func pix_abs8x8_x2; | |
44 op_pixels_abs_func pix_abs8x8_y2; | |
45 op_pixels_abs_func pix_abs8x8_xy2; | |
46 | |
50 | 47 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
0 | 48 UINT32 squareTbl[512]; |
49 | |
533
3c07cf9595de
adding ff prefix to avoid global name conficts with xvid (patch by Marko Kreen <marko at l-t.ee>)
michaelni
parents:
517
diff
changeset
|
50 extern INT16 ff_mpeg1_default_intra_matrix[64]; |
3c07cf9595de
adding ff prefix to avoid global name conficts with xvid (patch by Marko Kreen <marko at l-t.ee>)
michaelni
parents:
517
diff
changeset
|
51 extern INT16 ff_mpeg1_default_non_intra_matrix[64]; |
435
9247ad420889
* compatibilized declaration with its original definition
kabi
parents:
429
diff
changeset
|
52 extern INT16 ff_mpeg4_default_intra_matrix[64]; |
9247ad420889
* compatibilized declaration with its original definition
kabi
parents:
429
diff
changeset
|
53 extern INT16 ff_mpeg4_default_non_intra_matrix[64]; |
34 | 54 |
55 UINT8 zigzag_direct[64] = { | |
56 0, 1, 8, 16, 9, 2, 3, 10, | |
57 17, 24, 32, 25, 18, 11, 4, 5, | |
58 12, 19, 26, 33, 40, 48, 41, 34, | |
59 27, 20, 13, 6, 7, 14, 21, 28, | |
60 35, 42, 49, 56, 57, 50, 43, 36, | |
61 29, 22, 15, 23, 30, 37, 44, 51, | |
62 58, 59, 52, 45, 38, 31, 39, 46, | |
63 53, 60, 61, 54, 47, 55, 62, 63 | |
64 }; | |
65 | |
220 | 66 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
67 UINT16 __align8 inv_zigzag_direct16[64]; | |
68 | |
69 /* not permutated zigzag_direct for MMX quantizer */ | |
70 UINT8 zigzag_direct_noperm[64]; | |
71 | |
34 | 72 UINT8 ff_alternate_horizontal_scan[64] = { |
73 0, 1, 2, 3, 8, 9, 16, 17, | |
74 10, 11, 4, 5, 6, 7, 15, 14, | |
75 13, 12, 19, 18, 24, 25, 32, 33, | |
76 26, 27, 20, 21, 22, 23, 28, 29, | |
77 30, 31, 34, 35, 40, 41, 48, 49, | |
78 42, 43, 36, 37, 38, 39, 44, 45, | |
79 46, 47, 50, 51, 56, 57, 58, 59, | |
80 52, 53, 54, 55, 60, 61, 62, 63, | |
81 }; | |
82 | |
83 UINT8 ff_alternate_vertical_scan[64] = { | |
84 0, 8, 16, 24, 1, 9, 2, 10, | |
85 17, 25, 32, 40, 48, 56, 57, 49, | |
86 41, 33, 26, 18, 3, 11, 4, 12, | |
87 19, 27, 34, 42, 50, 58, 35, 43, | |
88 51, 59, 20, 28, 5, 13, 6, 14, | |
89 21, 29, 36, 44, 52, 60, 37, 45, | |
90 53, 61, 22, 30, 7, 15, 23, 31, | |
91 38, 46, 54, 62, 39, 47, 55, 63, | |
92 }; | |
93 | |
320
cda7d0857baf
- ME setting moved to AVCodecContext/MpegEncContext, no longer a global.
pulento
parents:
312
diff
changeset
|
94 #ifdef SIMPLE_IDCT |
cda7d0857baf
- ME setting moved to AVCodecContext/MpegEncContext, no longer a global.
pulento
parents:
312
diff
changeset
|
95 |
209 | 96 /* Input permutation for the simple_idct_mmx */ |
190
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
97 static UINT8 simple_mmx_permutation[64]={ |
209 | 98 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
99 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
100 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
101 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
102 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
103 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
104 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
105 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
190
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
106 }; |
320
cda7d0857baf
- ME setting moved to AVCodecContext/MpegEncContext, no longer a global.
pulento
parents:
312
diff
changeset
|
107 #endif |
190
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
108 |
220 | 109 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ |
110 UINT32 inverse[256]={ | |
111 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, | |
112 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | |
113 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | |
114 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, | |
115 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, | |
116 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, | |
117 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, | |
118 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, | |
119 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, | |
120 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, | |
121 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, | |
122 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, | |
123 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, | |
124 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, | |
125 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, | |
126 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, | |
127 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, | |
128 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, | |
129 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, | |
130 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, | |
131 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, | |
132 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, | |
133 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, | |
134 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, | |
135 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, | |
136 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, | |
137 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, | |
138 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, | |
139 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | |
140 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | |
141 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | |
142 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, | |
143 }; | |
144 | |
200 | 145 /* used to skip zeros at the end */ |
146 UINT8 zigzag_end[64]; | |
147 | |
190
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
148 UINT8 permutation[64]; |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
149 //UINT8 invPermutation[64]; |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
150 |
468 | 151 static void build_zigzag_end(void) |
200 | 152 { |
153 int lastIndex; | |
154 int lastIndexAfterPerm=0; | |
155 for(lastIndex=0; lastIndex<64; lastIndex++) | |
156 { | |
157 if(zigzag_direct[lastIndex] > lastIndexAfterPerm) | |
158 lastIndexAfterPerm= zigzag_direct[lastIndex]; | |
159 zigzag_end[lastIndex]= lastIndexAfterPerm + 1; | |
160 } | |
161 } | |
162 | |
612 | 163 int pix_sum_c(UINT8 * pix, int line_size) |
164 { | |
165 int s, i, j; | |
166 | |
167 s = 0; | |
168 for (i = 0; i < 16; i++) { | |
169 for (j = 0; j < 16; j += 8) { | |
170 s += pix[0]; | |
171 s += pix[1]; | |
172 s += pix[2]; | |
173 s += pix[3]; | |
174 s += pix[4]; | |
175 s += pix[5]; | |
176 s += pix[6]; | |
177 s += pix[7]; | |
178 pix += 8; | |
179 } | |
180 pix += line_size - 16; | |
181 } | |
182 return s; | |
183 } | |
184 | |
185 int pix_norm1_c(UINT8 * pix, int line_size) | |
186 { | |
187 int s, i, j; | |
188 UINT32 *sq = squareTbl + 256; | |
189 | |
190 s = 0; | |
191 for (i = 0; i < 16; i++) { | |
192 for (j = 0; j < 16; j += 8) { | |
193 s += sq[pix[0]]; | |
194 s += sq[pix[1]]; | |
195 s += sq[pix[2]]; | |
196 s += sq[pix[3]]; | |
197 s += sq[pix[4]]; | |
198 s += sq[pix[5]]; | |
199 s += sq[pix[6]]; | |
200 s += sq[pix[7]]; | |
201 pix += 8; | |
202 } | |
203 pix += line_size - 16; | |
204 } | |
205 return s; | |
206 } | |
207 | |
208 | |
516 | 209 void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) |
0 | 210 { |
211 int i; | |
212 | |
213 /* read the pixels */ | |
214 for(i=0;i<8;i++) { | |
516 | 215 block[0] = pixels[0]; |
216 block[1] = pixels[1]; | |
217 block[2] = pixels[2]; | |
218 block[3] = pixels[3]; | |
219 block[4] = pixels[4]; | |
220 block[5] = pixels[5]; | |
221 block[6] = pixels[6]; | |
222 block[7] = pixels[7]; | |
223 pixels += line_size; | |
224 block += 8; | |
0 | 225 } |
226 } | |
227 | |
516 | 228 void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, |
229 int stride){ | |
324 | 230 int i; |
231 | |
232 /* read the pixels */ | |
233 for(i=0;i<8;i++) { | |
516 | 234 block[0] = s1[0] - s2[0]; |
235 block[1] = s1[1] - s2[1]; | |
236 block[2] = s1[2] - s2[2]; | |
237 block[3] = s1[3] - s2[3]; | |
238 block[4] = s1[4] - s2[4]; | |
239 block[5] = s1[5] - s2[5]; | |
240 block[6] = s1[6] - s2[6]; | |
241 block[7] = s1[7] - s2[7]; | |
324 | 242 s1 += stride; |
243 s2 += stride; | |
516 | 244 block += 8; |
324 | 245 } |
246 } | |
247 | |
248 | |
516 | 249 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
250 int line_size) | |
0 | 251 { |
252 int i; | |
253 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
254 | |
255 /* read the pixels */ | |
256 for(i=0;i<8;i++) { | |
516 | 257 pixels[0] = cm[block[0]]; |
258 pixels[1] = cm[block[1]]; | |
259 pixels[2] = cm[block[2]]; | |
260 pixels[3] = cm[block[3]]; | |
261 pixels[4] = cm[block[4]]; | |
262 pixels[5] = cm[block[5]]; | |
263 pixels[6] = cm[block[6]]; | |
264 pixels[7] = cm[block[7]]; | |
265 | |
266 pixels += line_size; | |
267 block += 8; | |
0 | 268 } |
269 } | |
270 | |
516 | 271 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, |
272 int line_size) | |
0 | 273 { |
274 int i; | |
275 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
276 | |
277 /* read the pixels */ | |
278 for(i=0;i<8;i++) { | |
516 | 279 pixels[0] = cm[pixels[0] + block[0]]; |
280 pixels[1] = cm[pixels[1] + block[1]]; | |
281 pixels[2] = cm[pixels[2] + block[2]]; | |
282 pixels[3] = cm[pixels[3] + block[3]]; | |
283 pixels[4] = cm[pixels[4] + block[4]]; | |
284 pixels[5] = cm[pixels[5] + block[5]]; | |
285 pixels[6] = cm[pixels[6] + block[6]]; | |
286 pixels[7] = cm[pixels[7] + block[7]]; | |
287 pixels += line_size; | |
288 block += 8; | |
0 | 289 } |
290 } | |
385 | 291 #if 0 |
292 | |
293 #define PIXOP2(OPNAME, OP) \ | |
294 void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
295 {\ | |
296 int i;\ | |
297 for(i=0; i<h; i++){\ | |
298 OP(*((uint64_t*)block), LD64(pixels));\ | |
299 pixels+=line_size;\ | |
300 block +=line_size;\ | |
301 }\ | |
302 }\ | |
303 \ | |
304 void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
305 {\ | |
306 int i;\ | |
307 for(i=0; i<h; i++){\ | |
308 const uint64_t a= LD64(pixels );\ | |
309 const uint64_t b= LD64(pixels+1);\ | |
310 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
311 pixels+=line_size;\ | |
312 block +=line_size;\ | |
313 }\ | |
314 }\ | |
315 \ | |
316 void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
317 {\ | |
318 int i;\ | |
319 for(i=0; i<h; i++){\ | |
320 const uint64_t a= LD64(pixels );\ | |
321 const uint64_t b= LD64(pixels+1);\ | |
322 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
323 pixels+=line_size;\ | |
324 block +=line_size;\ | |
325 }\ | |
326 }\ | |
327 \ | |
328 void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
329 {\ | |
330 int i;\ | |
331 for(i=0; i<h; i++){\ | |
332 const uint64_t a= LD64(pixels );\ | |
333 const uint64_t b= LD64(pixels+line_size);\ | |
334 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
335 pixels+=line_size;\ | |
336 block +=line_size;\ | |
337 }\ | |
338 }\ | |
339 \ | |
340 void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
341 {\ | |
342 int i;\ | |
343 for(i=0; i<h; i++){\ | |
344 const uint64_t a= LD64(pixels );\ | |
345 const uint64_t b= LD64(pixels+line_size);\ | |
346 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
347 pixels+=line_size;\ | |
348 block +=line_size;\ | |
349 }\ | |
350 }\ | |
351 \ | |
352 void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
353 {\ | |
354 int i;\ | |
355 const uint64_t a= LD64(pixels );\ | |
356 const uint64_t b= LD64(pixels+1);\ | |
357 uint64_t l0= (a&0x0303030303030303ULL)\ | |
358 + (b&0x0303030303030303ULL)\ | |
359 + 0x0202020202020202ULL;\ | |
360 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
361 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
362 uint64_t l1,h1;\ | |
363 \ | |
364 pixels+=line_size;\ | |
365 for(i=0; i<h; i+=2){\ | |
366 uint64_t a= LD64(pixels );\ | |
367 uint64_t b= LD64(pixels+1);\ | |
368 l1= (a&0x0303030303030303ULL)\ | |
369 + (b&0x0303030303030303ULL);\ | |
370 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
371 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
372 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
373 pixels+=line_size;\ | |
374 block +=line_size;\ | |
375 a= LD64(pixels );\ | |
376 b= LD64(pixels+1);\ | |
377 l0= (a&0x0303030303030303ULL)\ | |
378 + (b&0x0303030303030303ULL)\ | |
379 + 0x0202020202020202ULL;\ | |
380 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
381 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
382 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
383 pixels+=line_size;\ | |
384 block +=line_size;\ | |
385 }\ | |
386 }\ | |
387 \ | |
388 void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
389 {\ | |
390 int i;\ | |
391 const uint64_t a= LD64(pixels );\ | |
392 const uint64_t b= LD64(pixels+1);\ | |
393 uint64_t l0= (a&0x0303030303030303ULL)\ | |
394 + (b&0x0303030303030303ULL)\ | |
395 + 0x0101010101010101ULL;\ | |
396 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
397 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
398 uint64_t l1,h1;\ | |
399 \ | |
400 pixels+=line_size;\ | |
401 for(i=0; i<h; i+=2){\ | |
402 uint64_t a= LD64(pixels );\ | |
403 uint64_t b= LD64(pixels+1);\ | |
404 l1= (a&0x0303030303030303ULL)\ | |
405 + (b&0x0303030303030303ULL);\ | |
406 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
407 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
408 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
409 pixels+=line_size;\ | |
410 block +=line_size;\ | |
411 a= LD64(pixels );\ | |
412 b= LD64(pixels+1);\ | |
413 l0= (a&0x0303030303030303ULL)\ | |
414 + (b&0x0303030303030303ULL)\ | |
415 + 0x0101010101010101ULL;\ | |
416 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
417 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
418 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
419 pixels+=line_size;\ | |
420 block +=line_size;\ | |
421 }\ | |
422 }\ | |
423 \ | |
424 void (*OPNAME ## _pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\ | |
425 OPNAME ## _pixels,\ | |
426 OPNAME ## _pixels_x2,\ | |
427 OPNAME ## _pixels_y2,\ | |
428 OPNAME ## _pixels_xy2,\ | |
429 };\ | |
430 \ | |
431 void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\ | |
432 OPNAME ## _pixels,\ | |
433 OPNAME ## _no_rnd_pixels_x2,\ | |
434 OPNAME ## _no_rnd_pixels_y2,\ | |
435 OPNAME ## _no_rnd_pixels_xy2,\ | |
436 }; | |
437 | |
438 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | |
439 #else // 64 bit variant | |
440 | |
441 #define PIXOP2(OPNAME, OP) \ | |
442 void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
443 {\ | |
444 int i;\ | |
445 for(i=0; i<h; i++){\ | |
446 OP(*((uint32_t*)(block )), LD32(pixels ));\ | |
447 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ | |
448 pixels+=line_size;\ | |
449 block +=line_size;\ | |
450 }\ | |
451 }\ | |
452 \ | |
453 void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
454 {\ | |
455 int i;\ | |
456 for(i=0; i<h; i++){\ | |
457 int j;\ | |
458 for(j=0; j<2; j++){\ | |
459 const uint32_t a= LD32(pixels );\ | |
460 const uint32_t b= LD32(pixels+1);\ | |
461 OP(*((uint32_t*)block), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\ | |
462 pixels+=4;\ | |
463 block +=4;\ | |
464 }\ | |
465 pixels+=line_size-8;\ | |
466 block +=line_size-8;\ | |
467 }\ | |
468 }\ | |
469 \ | |
470 void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
471 {\ | |
472 int i;\ | |
473 for(i=0; i<h; i++){\ | |
474 int j;\ | |
475 for(j=0; j<2; j++){\ | |
476 const uint32_t a= LD32(pixels );\ | |
477 const uint32_t b= LD32(pixels+1);\ | |
478 OP(*((uint32_t*)block), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ | |
479 pixels+=4;\ | |
480 block +=4;\ | |
481 }\ | |
482 pixels+=line_size-8;\ | |
483 block +=line_size-8;\ | |
484 }\ | |
485 }\ | |
486 \ | |
487 void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
488 {\ | |
489 int i;\ | |
490 for(i=0; i<h; i++){\ | |
491 int j;\ | |
492 for(j=0; j<2; j++){\ | |
493 const uint32_t a= LD32(pixels );\ | |
494 const uint32_t b= LD32(pixels+line_size);\ | |
495 OP(*((uint32_t*)block), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\ | |
496 pixels+=4;\ | |
497 block +=4;\ | |
498 }\ | |
499 pixels+=line_size-8;\ | |
500 block +=line_size-8;\ | |
501 }\ | |
502 }\ | |
503 \ | |
504 void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
505 {\ | |
506 int i;\ | |
507 for(i=0; i<h; i++){\ | |
508 int j;\ | |
509 for(j=0; j<2; j++){\ | |
510 const uint32_t a= LD32(pixels );\ | |
511 const uint32_t b= LD32(pixels+line_size);\ | |
512 OP(*((uint32_t*)block), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ | |
513 pixels+=4;\ | |
514 block +=4;\ | |
515 }\ | |
516 pixels+=line_size-8;\ | |
517 block +=line_size-8;\ | |
518 }\ | |
519 }\ | |
520 \ | |
521 void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
522 {\ | |
523 int j;\ | |
524 for(j=0; j<2; j++){\ | |
525 int i;\ | |
526 const uint32_t a= LD32(pixels );\ | |
527 const uint32_t b= LD32(pixels+1);\ | |
528 uint32_t l0= (a&0x03030303UL)\ | |
529 + (b&0x03030303UL)\ | |
530 + 0x02020202UL;\ | |
531 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
532 + ((b&0xFCFCFCFCUL)>>2);\ | |
533 uint32_t l1,h1;\ | |
534 \ | |
535 pixels+=line_size;\ | |
536 for(i=0; i<h; i+=2){\ | |
537 uint32_t a= LD32(pixels );\ | |
538 uint32_t b= LD32(pixels+1);\ | |
539 l1= (a&0x03030303UL)\ | |
540 + (b&0x03030303UL);\ | |
541 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
542 + ((b&0xFCFCFCFCUL)>>2);\ | |
543 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
544 pixels+=line_size;\ | |
545 block +=line_size;\ | |
546 a= LD32(pixels );\ | |
547 b= LD32(pixels+1);\ | |
548 l0= (a&0x03030303UL)\ | |
549 + (b&0x03030303UL)\ | |
550 + 0x02020202UL;\ | |
551 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
552 + ((b&0xFCFCFCFCUL)>>2);\ | |
553 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
554 pixels+=line_size;\ | |
555 block +=line_size;\ | |
556 }\ | |
557 pixels+=4-line_size*(h+1);\ | |
558 block +=4-line_size*h;\ | |
559 }\ | |
560 }\ | |
561 \ | |
562 void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
563 {\ | |
564 int j;\ | |
565 for(j=0; j<2; j++){\ | |
566 int i;\ | |
567 const uint32_t a= LD32(pixels );\ | |
568 const uint32_t b= LD32(pixels+1);\ | |
569 uint32_t l0= (a&0x03030303UL)\ | |
570 + (b&0x03030303UL)\ | |
571 + 0x01010101UL;\ | |
572 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
573 + ((b&0xFCFCFCFCUL)>>2);\ | |
574 uint32_t l1,h1;\ | |
575 \ | |
576 pixels+=line_size;\ | |
577 for(i=0; i<h; i+=2){\ | |
578 uint32_t a= LD32(pixels );\ | |
579 uint32_t b= LD32(pixels+1);\ | |
580 l1= (a&0x03030303UL)\ | |
581 + (b&0x03030303UL);\ | |
582 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
583 + ((b&0xFCFCFCFCUL)>>2);\ | |
584 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
585 pixels+=line_size;\ | |
586 block +=line_size;\ | |
587 a= LD32(pixels );\ | |
588 b= LD32(pixels+1);\ | |
589 l0= (a&0x03030303UL)\ | |
590 + (b&0x03030303UL)\ | |
591 + 0x01010101UL;\ | |
592 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
593 + ((b&0xFCFCFCFCUL)>>2);\ | |
594 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
595 pixels+=line_size;\ | |
596 block +=line_size;\ | |
597 }\ | |
598 pixels+=4-line_size*(h+1);\ | |
599 block +=4-line_size*h;\ | |
600 }\ | |
601 }\ | |
602 \ | |
603 void (*OPNAME ## _pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\ | |
604 OPNAME ## _pixels,\ | |
605 OPNAME ## _pixels_x2,\ | |
606 OPNAME ## _pixels_y2,\ | |
607 OPNAME ## _pixels_xy2,\ | |
608 };\ | |
609 \ | |
610 void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\ | |
611 OPNAME ## _pixels,\ | |
612 OPNAME ## _no_rnd_pixels_x2,\ | |
613 OPNAME ## _no_rnd_pixels_y2,\ | |
614 OPNAME ## _no_rnd_pixels_xy2,\ | |
615 }; | |
616 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) | |
617 #endif | |
618 #define op_put(a, b) a = b | |
619 | |
620 PIXOP2(avg, op_avg) | |
621 PIXOP2(put, op_put) | |
622 #undef op_avg | |
623 #undef op_put | |
624 | |
403
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
625 #if 0 |
385 | 626 /* FIXME this stuff could be removed as its ot really used anymore */ |
0 | 627 #define PIXOP(BTYPE, OPNAME, OP, INCR) \ |
628 \ | |
629 static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ | |
630 { \ | |
631 BTYPE *p; \ | |
632 const UINT8 *pix; \ | |
633 \ | |
634 p = block; \ | |
635 pix = pixels; \ | |
636 do { \ | |
637 OP(p[0], pix[0]); \ | |
638 OP(p[1], pix[1]); \ | |
639 OP(p[2], pix[2]); \ | |
640 OP(p[3], pix[3]); \ | |
641 OP(p[4], pix[4]); \ | |
642 OP(p[5], pix[5]); \ | |
643 OP(p[6], pix[6]); \ | |
644 OP(p[7], pix[7]); \ | |
645 pix += line_size; \ | |
646 p += INCR; \ | |
647 } while (--h);; \ | |
648 } \ | |
649 \ | |
650 static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ | |
651 { \ | |
652 BTYPE *p; \ | |
653 const UINT8 *pix; \ | |
654 \ | |
655 p = block; \ | |
656 pix = pixels; \ | |
657 do { \ | |
658 OP(p[0], avg2(pix[0], pix[1])); \ | |
659 OP(p[1], avg2(pix[1], pix[2])); \ | |
660 OP(p[2], avg2(pix[2], pix[3])); \ | |
661 OP(p[3], avg2(pix[3], pix[4])); \ | |
662 OP(p[4], avg2(pix[4], pix[5])); \ | |
663 OP(p[5], avg2(pix[5], pix[6])); \ | |
664 OP(p[6], avg2(pix[6], pix[7])); \ | |
665 OP(p[7], avg2(pix[7], pix[8])); \ | |
666 pix += line_size; \ | |
667 p += INCR; \ | |
668 } while (--h); \ | |
669 } \ | |
670 \ | |
671 static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ | |
672 { \ | |
673 BTYPE *p; \ | |
674 const UINT8 *pix; \ | |
675 const UINT8 *pix1; \ | |
676 \ | |
677 p = block; \ | |
678 pix = pixels; \ | |
679 pix1 = pixels + line_size; \ | |
680 do { \ | |
681 OP(p[0], avg2(pix[0], pix1[0])); \ | |
682 OP(p[1], avg2(pix[1], pix1[1])); \ | |
683 OP(p[2], avg2(pix[2], pix1[2])); \ | |
684 OP(p[3], avg2(pix[3], pix1[3])); \ | |
685 OP(p[4], avg2(pix[4], pix1[4])); \ | |
686 OP(p[5], avg2(pix[5], pix1[5])); \ | |
687 OP(p[6], avg2(pix[6], pix1[6])); \ | |
688 OP(p[7], avg2(pix[7], pix1[7])); \ | |
689 pix += line_size; \ | |
690 pix1 += line_size; \ | |
691 p += INCR; \ | |
692 } while(--h); \ | |
693 } \ | |
694 \ | |
695 static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ | |
696 { \ | |
697 BTYPE *p; \ | |
698 const UINT8 *pix; \ | |
699 const UINT8 *pix1; \ | |
700 \ | |
701 p = block; \ | |
702 pix = pixels; \ | |
703 pix1 = pixels + line_size; \ | |
704 do { \ | |
705 OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \ | |
706 OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \ | |
707 OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \ | |
708 OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \ | |
709 OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \ | |
710 OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \ | |
711 OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \ | |
712 OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \ | |
713 pix += line_size; \ | |
714 pix1 += line_size; \ | |
715 p += INCR; \ | |
716 } while(--h); \ | |
717 } \ | |
718 \ | |
719 void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \ | |
720 OPNAME ## _pixels, \ | |
721 OPNAME ## _pixels_x2, \ | |
722 OPNAME ## _pixels_y2, \ | |
723 OPNAME ## _pixels_xy2, \ | |
724 }; | |
725 | |
726 /* rounding primitives */ | |
727 #define avg2(a,b) ((a+b+1)>>1) | |
728 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
729 | |
730 #define op_avg(a, b) a = avg2(a, b) | |
731 #define op_sub(a, b) a -= b | |
612 | 732 #define op_put(a, b) a = b |
0 | 733 |
734 PIXOP(DCTELEM, sub, op_sub, 8) | |
612 | 735 PIXOP(uint8_t, avg, op_avg, line_size) |
736 PIXOP(uint8_t, put, op_put, line_size) | |
0 | 737 |
738 /* not rounding primitives */ | |
739 #undef avg2 | |
740 #undef avg4 | |
741 #define avg2(a,b) ((a+b)>>1) | |
742 #define avg4(a,b,c,d) ((a+b+c+d+1)>>2) | |
743 | |
612 | 744 PIXOP(uint8_t, avg_no_rnd, op_avg, line_size) |
745 PIXOP(uint8_t, put_no_rnd, op_put, line_size) | |
0 | 746 /* motion estimation */ |
747 | |
748 #undef avg2 | |
749 #undef avg4 | |
403
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
750 #endif |
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
751 |
0 | 752 #define avg2(a,b) ((a+b+1)>>1) |
753 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
754 | |
255 | 755 static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder) |
756 { | |
757 const int A=(16-x16)*(16-y16); | |
758 const int B=( x16)*(16-y16); | |
759 const int C=(16-x16)*( y16); | |
760 const int D=( x16)*( y16); | |
761 int i; | |
762 rounder= 128 - rounder; | |
763 | |
764 for(i=0; i<h; i++) | |
765 { | |
766 dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8; | |
767 dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8; | |
768 dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8; | |
769 dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8; | |
770 dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8; | |
771 dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8; | |
772 dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8; | |
773 dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8; | |
774 dst+= srcStride; | |
775 src+= srcStride; | |
776 } | |
777 } | |
778 | |
779 static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r) | |
780 { | |
781 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
782 int i; | |
783 for(i=0; i<h; i++) | |
784 { | |
294 | 785 dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)]; |
786 dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)]; | |
787 dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)]; | |
788 dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)]; | |
789 dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)]; | |
790 dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)]; | |
791 dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)]; | |
792 dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)]; | |
255 | 793 dst+=dstStride; |
794 src+=srcStride; | |
795 } | |
796 } | |
797 | |
798 static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r) | |
799 { | |
800 UINT8 *cm = cropTbl + MAX_NEG_CROP; | |
801 int i; | |
802 for(i=0; i<w; i++) | |
803 { | |
804 const int src0= src[0*srcStride]; | |
805 const int src1= src[1*srcStride]; | |
806 const int src2= src[2*srcStride]; | |
807 const int src3= src[3*srcStride]; | |
808 const int src4= src[4*srcStride]; | |
809 const int src5= src[5*srcStride]; | |
810 const int src6= src[6*srcStride]; | |
811 const int src7= src[7*srcStride]; | |
812 const int src8= src[8*srcStride]; | |
294 | 813 dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)]; |
814 dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)]; | |
815 dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)]; | |
816 dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)]; | |
817 dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)]; | |
818 dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)]; | |
819 dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)]; | |
820 dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)]; | |
255 | 821 dst++; |
822 src++; | |
823 } | |
824 } | |
825 | |
826 static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride) | |
827 { | |
828 int i; | |
829 for(i=0; i<8; i++) | |
830 { | |
831 dst[0]= src[0]; | |
832 dst[1]= src[1]; | |
833 dst[2]= src[2]; | |
834 dst[3]= src[3]; | |
835 dst[4]= src[4]; | |
836 dst[5]= src[5]; | |
837 dst[6]= src[6]; | |
838 dst[7]= src[7]; | |
839 dst+=dstStride; | |
840 src+=srcStride; | |
841 } | |
842 } | |
843 | |
844 static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r) | |
845 { | |
846 int i; | |
847 for(i=0; i<8; i++) | |
848 { | |
849 dst[0]= (src1[0] + src2[0] + r)>>1; | |
850 dst[1]= (src1[1] + src2[1] + r)>>1; | |
851 dst[2]= (src1[2] + src2[2] + r)>>1; | |
852 dst[3]= (src1[3] + src2[3] + r)>>1; | |
853 dst[4]= (src1[4] + src2[4] + r)>>1; | |
854 dst[5]= (src1[5] + src2[5] + r)>>1; | |
855 dst[6]= (src1[6] + src2[6] + r)>>1; | |
856 dst[7]= (src1[7] + src2[7] + r)>>1; | |
857 dst+=dstStride; | |
858 src1+=srcStride; | |
859 src2+=8; | |
860 } | |
861 } | |
862 | |
863 static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r) | |
864 { | |
865 int i; | |
866 for(i=0; i<8; i++) | |
867 { | |
868 dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2; | |
869 dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2; | |
870 dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2; | |
871 dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2; | |
872 dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2; | |
873 dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2; | |
874 dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2; | |
875 dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2; | |
876 dst+=dstStride; | |
877 src1+=srcStride; | |
878 src2+=8; | |
256 | 879 src3+=8; |
255 | 880 src4+=8; |
881 } | |
882 } | |
883 | |
884 #define QPEL_MC(r, name) \ | |
885 static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
886 {\ | |
887 put_block(dst, src, dstStride, srcStride);\ | |
888 }\ | |
889 \ | |
890 static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
891 {\ | |
892 UINT8 half[64];\ | |
294 | 893 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\ |
255 | 894 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ |
895 }\ | |
896 \ | |
897 static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
898 {\ | |
294 | 899 qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\ |
255 | 900 }\ |
901 \ | |
902 static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
903 {\ | |
904 UINT8 half[64];\ | |
294 | 905 qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\ |
255 | 906 avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\ |
907 }\ | |
908 \ | |
909 static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
910 {\ | |
911 UINT8 half[64];\ | |
294 | 912 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\ |
255 | 913 avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ |
914 }\ | |
915 \ | |
916 static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
917 {\ | |
294 | 918 qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\ |
255 | 919 }\ |
920 \ | |
921 static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
922 {\ | |
923 UINT8 half[64];\ | |
294 | 924 qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\ |
255 | 925 avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\ |
926 }\ | |
927 static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
928 {\ | |
929 UINT8 halfH[72];\ | |
256 | 930 UINT8 halfV[64];\ |
255 | 931 UINT8 halfHV[64];\ |
294 | 932 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
933 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ | |
934 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ | |
255 | 935 avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ |
936 }\ | |
937 static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
938 {\ | |
939 UINT8 halfH[72];\ | |
256 | 940 UINT8 halfV[64];\ |
255 | 941 UINT8 halfHV[64];\ |
294 | 942 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
943 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ | |
944 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ | |
255 | 945 avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ |
946 }\ | |
947 static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
948 {\ | |
949 UINT8 halfH[72];\ | |
256 | 950 UINT8 halfV[64];\ |
255 | 951 UINT8 halfHV[64];\ |
294 | 952 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
953 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ | |
954 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ | |
256 | 955 avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ |
255 | 956 }\ |
957 static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
958 {\ | |
959 UINT8 halfH[72];\ | |
256 | 960 UINT8 halfV[64];\ |
255 | 961 UINT8 halfHV[64];\ |
294 | 962 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
963 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ | |
964 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ | |
256 | 965 avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ |
255 | 966 }\ |
967 static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
968 {\ | |
969 UINT8 halfH[72];\ | |
970 UINT8 halfHV[64];\ | |
294 | 971 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
972 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ | |
255 | 973 avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\ |
974 }\ | |
975 static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
976 {\ | |
977 UINT8 halfH[72];\ | |
978 UINT8 halfHV[64];\ | |
294 | 979 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
980 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ | |
255 | 981 avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\ |
982 }\ | |
983 static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
984 {\ | |
985 UINT8 halfH[72];\ | |
256 | 986 UINT8 halfV[64];\ |
255 | 987 UINT8 halfHV[64];\ |
294 | 988 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
989 qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ | |
990 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ | |
256 | 991 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ |
255 | 992 }\ |
993 static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
994 {\ | |
995 UINT8 halfH[72];\ | |
256 | 996 UINT8 halfV[64];\ |
255 | 997 UINT8 halfHV[64];\ |
294 | 998 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
999 qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ | |
1000 qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ | |
256 | 1001 avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ |
255 | 1002 }\ |
1003 static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ | |
1004 {\ | |
1005 UINT8 halfH[72];\ | |
294 | 1006 qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ |
1007 qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\ | |
255 | 1008 }\ |
1009 qpel_mc_func qpel_mc ## name ## _tab[16]={ \ | |
1010 qpel_mc00_c ## name, \ | |
1011 qpel_mc10_c ## name, \ | |
1012 qpel_mc20_c ## name, \ | |
1013 qpel_mc30_c ## name, \ | |
1014 qpel_mc01_c ## name, \ | |
1015 qpel_mc11_c ## name, \ | |
1016 qpel_mc21_c ## name, \ | |
1017 qpel_mc31_c ## name, \ | |
1018 qpel_mc02_c ## name, \ | |
1019 qpel_mc12_c ## name, \ | |
1020 qpel_mc22_c ## name, \ | |
1021 qpel_mc32_c ## name, \ | |
1022 qpel_mc03_c ## name, \ | |
1023 qpel_mc13_c ## name, \ | |
1024 qpel_mc23_c ## name, \ | |
1025 qpel_mc33_c ## name, \ | |
1026 }; | |
1027 | |
1028 QPEL_MC(0, _rnd) | |
1029 QPEL_MC(1, _no_rnd) | |
1030 | |
294 | 1031 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
0 | 1032 { |
1033 int s, i; | |
1034 | |
1035 s = 0; | |
294 | 1036 for(i=0;i<16;i++) { |
0 | 1037 s += abs(pix1[0] - pix2[0]); |
1038 s += abs(pix1[1] - pix2[1]); | |
1039 s += abs(pix1[2] - pix2[2]); | |
1040 s += abs(pix1[3] - pix2[3]); | |
1041 s += abs(pix1[4] - pix2[4]); | |
1042 s += abs(pix1[5] - pix2[5]); | |
1043 s += abs(pix1[6] - pix2[6]); | |
1044 s += abs(pix1[7] - pix2[7]); | |
1045 s += abs(pix1[8] - pix2[8]); | |
1046 s += abs(pix1[9] - pix2[9]); | |
1047 s += abs(pix1[10] - pix2[10]); | |
1048 s += abs(pix1[11] - pix2[11]); | |
1049 s += abs(pix1[12] - pix2[12]); | |
1050 s += abs(pix1[13] - pix2[13]); | |
1051 s += abs(pix1[14] - pix2[14]); | |
1052 s += abs(pix1[15] - pix2[15]); | |
1053 pix1 += line_size; | |
1054 pix2 += line_size; | |
1055 } | |
1056 return s; | |
1057 } | |
1058 | |
294 | 1059 int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
0 | 1060 { |
1061 int s, i; | |
1062 | |
1063 s = 0; | |
294 | 1064 for(i=0;i<16;i++) { |
0 | 1065 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
1066 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
1067 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
1068 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
1069 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
1070 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
1071 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
1072 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
1073 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | |
1074 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | |
1075 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | |
1076 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | |
1077 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | |
1078 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | |
1079 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | |
1080 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | |
1081 pix1 += line_size; | |
1082 pix2 += line_size; | |
1083 } | |
1084 return s; | |
1085 } | |
1086 | |
294 | 1087 int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
0 | 1088 { |
1089 int s, i; | |
1090 UINT8 *pix3 = pix2 + line_size; | |
1091 | |
1092 s = 0; | |
294 | 1093 for(i=0;i<16;i++) { |
0 | 1094 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
1095 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
1096 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
1097 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
1098 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
1099 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
1100 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
1101 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
1102 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | |
1103 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | |
1104 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | |
1105 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | |
1106 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | |
1107 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | |
1108 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | |
1109 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | |
1110 pix1 += line_size; | |
1111 pix2 += line_size; | |
1112 pix3 += line_size; | |
1113 } | |
1114 return s; | |
1115 } | |
1116 | |
294 | 1117 int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
0 | 1118 { |
1119 int s, i; | |
1120 UINT8 *pix3 = pix2 + line_size; | |
1121 | |
1122 s = 0; | |
294 | 1123 for(i=0;i<16;i++) { |
0 | 1124 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
1125 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
1126 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
1127 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
1128 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
1129 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
1130 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
1131 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
1132 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | |
1133 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | |
1134 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | |
1135 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | |
1136 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | |
1137 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | |
1138 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | |
1139 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | |
1140 pix1 += line_size; | |
1141 pix2 += line_size; | |
1142 pix3 += line_size; | |
1143 } | |
1144 return s; | |
1145 } | |
1146 | |
294 | 1147 int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) |
1148 { | |
1149 int s, i; | |
1150 | |
1151 s = 0; | |
1152 for(i=0;i<8;i++) { | |
1153 s += abs(pix1[0] - pix2[0]); | |
1154 s += abs(pix1[1] - pix2[1]); | |
1155 s += abs(pix1[2] - pix2[2]); | |
1156 s += abs(pix1[3] - pix2[3]); | |
1157 s += abs(pix1[4] - pix2[4]); | |
1158 s += abs(pix1[5] - pix2[5]); | |
1159 s += abs(pix1[6] - pix2[6]); | |
1160 s += abs(pix1[7] - pix2[7]); | |
1161 pix1 += line_size; | |
1162 pix2 += line_size; | |
1163 } | |
1164 return s; | |
1165 } | |
1166 | |
1167 int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |
1168 { | |
1169 int s, i; | |
1170 | |
1171 s = 0; | |
1172 for(i=0;i<8;i++) { | |
1173 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); | |
1174 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
1175 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
1176 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
1177 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
1178 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
1179 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
1180 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
1181 pix1 += line_size; | |
1182 pix2 += line_size; | |
1183 } | |
1184 return s; | |
1185 } | |
1186 | |
1187 int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |
1188 { | |
1189 int s, i; | |
1190 UINT8 *pix3 = pix2 + line_size; | |
1191 | |
1192 s = 0; | |
1193 for(i=0;i<8;i++) { | |
1194 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); | |
1195 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
1196 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
1197 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
1198 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
1199 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
1200 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
1201 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
1202 pix1 += line_size; | |
1203 pix2 += line_size; | |
1204 pix3 += line_size; | |
1205 } | |
1206 return s; | |
1207 } | |
1208 | |
1209 int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) | |
1210 { | |
1211 int s, i; | |
1212 UINT8 *pix3 = pix2 + line_size; | |
1213 | |
1214 s = 0; | |
1215 for(i=0;i<8;i++) { | |
1216 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); | |
1217 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
1218 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
1219 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
1220 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
1221 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
1222 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
1223 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
1224 pix1 += line_size; | |
1225 pix2 += line_size; | |
1226 pix3 += line_size; | |
1227 } | |
1228 return s; | |
1229 } | |
1230 | |
34 | 1231 /* permute block according so that it corresponds to the MMX idct |
1232 order */ | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1233 #ifdef SIMPLE_IDCT |
190
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
1234 /* general permutation, but perhaps slightly slower */ |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1235 void block_permute(INT16 *block) |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1236 { |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1237 int i; |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1238 INT16 temp[64]; |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1239 |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1240 for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1241 |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1242 for(i=0; i<64; i++) block[i] = temp[i]; |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1243 } |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1244 #else |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1245 |
34 | 1246 void block_permute(INT16 *block) |
1247 { | |
1248 int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | |
1249 int i; | |
1250 | |
1251 for(i=0;i<8;i++) { | |
1252 tmp1 = block[1]; | |
1253 tmp2 = block[2]; | |
1254 tmp3 = block[3]; | |
1255 tmp4 = block[4]; | |
1256 tmp5 = block[5]; | |
1257 tmp6 = block[6]; | |
1258 block[1] = tmp2; | |
1259 block[2] = tmp4; | |
1260 block[3] = tmp6; | |
1261 block[4] = tmp1; | |
1262 block[5] = tmp3; | |
1263 block[6] = tmp5; | |
1264 block += 8; | |
1265 } | |
1266 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1267 #endif |
34 | 1268 |
296 | 1269 void clear_blocks_c(DCTELEM *blocks) |
1270 { | |
1271 memset(blocks, 0, sizeof(DCTELEM)*6*64); | |
1272 } | |
1273 | |
480 | 1274 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
1275 converted */ | |
1276 void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block) | |
1277 { | |
1278 ff_idct (block); | |
1279 put_pixels_clamped(block, dest, line_size); | |
1280 } | |
1281 | |
1282 void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block) | |
1283 { | |
1284 ff_idct (block); | |
1285 add_pixels_clamped(block, dest, line_size); | |
1286 } | |
1287 | |
0 | 1288 void dsputil_init(void) |
1289 { | |
34 | 1290 int i, j; |
88 | 1291 int use_permuted_idct; |
0 | 1292 |
1293 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; | |
1294 for(i=0;i<MAX_NEG_CROP;i++) { | |
1295 cropTbl[i] = 0; | |
1296 cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
1297 } | |
1298 | |
1299 for(i=0;i<512;i++) { | |
1300 squareTbl[i] = (i - 256) * (i - 256); | |
1301 } | |
1302 | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1303 #ifdef SIMPLE_IDCT |
480 | 1304 ff_idct = NULL; |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1305 #else |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
6
diff
changeset
|
1306 ff_idct = j_rev_dct; |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1307 #endif |
0 | 1308 get_pixels = get_pixels_c; |
324 | 1309 diff_pixels = diff_pixels_c; |
0 | 1310 put_pixels_clamped = put_pixels_clamped_c; |
1311 add_pixels_clamped = add_pixels_clamped_c; | |
255 | 1312 gmc1= gmc1_c; |
296 | 1313 clear_blocks= clear_blocks_c; |
612 | 1314 pix_sum= pix_sum_c; |
1315 pix_norm1= pix_norm1_c; | |
0 | 1316 |
294 | 1317 pix_abs16x16 = pix_abs16x16_c; |
1318 pix_abs16x16_x2 = pix_abs16x16_x2_c; | |
1319 pix_abs16x16_y2 = pix_abs16x16_y2_c; | |
0 | 1320 pix_abs16x16_xy2 = pix_abs16x16_xy2_c; |
294 | 1321 pix_abs8x8 = pix_abs8x8_c; |
1322 pix_abs8x8_x2 = pix_abs8x8_x2_c; | |
1323 pix_abs8x8_y2 = pix_abs8x8_y2_c; | |
1324 pix_abs8x8_xy2 = pix_abs8x8_xy2_c; | |
0 | 1325 |
88 | 1326 use_permuted_idct = 1; |
34 | 1327 |
2 | 1328 #ifdef HAVE_MMX |
0 | 1329 dsputil_init_mmx(); |
1330 #endif | |
62 | 1331 #ifdef ARCH_ARMV4L |
1332 dsputil_init_armv4l(); | |
1333 #endif | |
88 | 1334 #ifdef HAVE_MLIB |
1335 dsputil_init_mlib(); | |
1336 use_permuted_idct = 0; | |
1337 #endif | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
209
diff
changeset
|
1338 #ifdef ARCH_ALPHA |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
209
diff
changeset
|
1339 dsputil_init_alpha(); |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
209
diff
changeset
|
1340 use_permuted_idct = 0; |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
209
diff
changeset
|
1341 #endif |
623
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
1342 #ifdef ARCH_POWERPC |
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
1343 dsputil_init_altivec(); |
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
1344 #endif |
88 | 1345 |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1346 #ifdef SIMPLE_IDCT |
480 | 1347 if (ff_idct == NULL) { |
1348 ff_idct_put = simple_idct_put; | |
1349 ff_idct_add = simple_idct_add; | |
1350 use_permuted_idct=0; | |
590 | 1351 } |
1352 #endif | |
1353 if(ff_idct != NULL) { | |
480 | 1354 ff_idct_put = gen_idct_put; |
1355 ff_idct_add = gen_idct_add; | |
1356 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
1357 |
190
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
1358 if(use_permuted_idct) |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
1359 #ifdef SIMPLE_IDCT |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
1360 for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i]; |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
1361 #else |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
1362 for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
1363 #endif |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
1364 else |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
1365 for(i=0; i<64; i++) permutation[i]=i; |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
1366 |
220 | 1367 for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1; |
1368 for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i]; | |
1369 | |
88 | 1370 if (use_permuted_idct) { |
1371 /* permute for IDCT */ | |
1372 for(i=0;i<64;i++) { | |
1373 j = zigzag_direct[i]; | |
1374 zigzag_direct[i] = block_permute_op(j); | |
1375 j = ff_alternate_horizontal_scan[i]; | |
1376 ff_alternate_horizontal_scan[i] = block_permute_op(j); | |
1377 j = ff_alternate_vertical_scan[i]; | |
1378 ff_alternate_vertical_scan[i] = block_permute_op(j); | |
1379 } | |
533
3c07cf9595de
adding ff prefix to avoid global name conficts with xvid (patch by Marko Kreen <marko at l-t.ee>)
michaelni
parents:
517
diff
changeset
|
1380 block_permute(ff_mpeg1_default_intra_matrix); |
3c07cf9595de
adding ff prefix to avoid global name conficts with xvid (patch by Marko Kreen <marko at l-t.ee>)
michaelni
parents:
517
diff
changeset
|
1381 block_permute(ff_mpeg1_default_non_intra_matrix); |
312 | 1382 block_permute(ff_mpeg4_default_intra_matrix); |
1383 block_permute(ff_mpeg4_default_non_intra_matrix); | |
88 | 1384 } |
200 | 1385 |
1386 build_zigzag_end(); | |
0 | 1387 } |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1388 |
403
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
1389 /* remove any non bit exact operation (testing purpose) */ |
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
1390 void avcodec_set_bit_exact(void) |
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
1391 { |
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
1392 #ifdef HAVE_MMX |
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
1393 dsputil_set_bit_exact_mmx(); |
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
1394 #endif |
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
1395 } |
2c3e25f4c496
removed unused stuff - added dsputil_set_bit_exact() support for easier testing
glantau
parents:
398
diff
changeset
|
1396 |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1397 void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1398 int orig_linesize[3], int coded_linesize, |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1399 AVCodecContext *avctx) |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1400 { |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1401 int quad, diff, x, y; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1402 UINT8 *orig, *coded; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1403 UINT32 *sq = squareTbl + 256; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1404 |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1405 quad = 0; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1406 diff = 0; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1407 |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1408 /* Luminance */ |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1409 orig = orig_image[0]; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1410 coded = coded_image[0]; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1411 |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1412 for (y=0;y<avctx->height;y++) { |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1413 for (x=0;x<avctx->width;x++) { |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1414 diff = *(orig + x) - *(coded + x); |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1415 quad += sq[diff]; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1416 } |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1417 orig += orig_linesize[0]; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1418 coded += coded_linesize; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1419 } |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1420 |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1421 avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height); |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1422 |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1423 if (avctx->psnr_y) { |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1424 avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1425 avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y); |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1426 } else |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1427 avctx->psnr_y = 99.99; |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1428 } |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
1429 |