Mercurial > audlegacy-plugins
comparison src/ffmpeg/libffwma/dsputil.c @ 806:74abcb9cafae trunk
[svn] - fork wma plugin
author | nenolod |
---|---|
date | Mon, 12 Mar 2007 10:59:21 -0700 |
parents | src/wma/libffwma/dsputil.c@3da1b8942b8b |
children |
comparison
equal
deleted
inserted
replaced
805:1ba5f86aeac9 | 806:74abcb9cafae |
---|---|
1 /* | |
2 * DSP utils | |
3 * Copyright (c) 2000, 2001 Fabrice Bellard. | |
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |
5 * | |
6 * This library is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2 of the License, or (at your option) any later version. | |
10 * | |
11 * This library is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with this library; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 * | |
20 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> | |
21 */ | |
22 | |
23 /** | |
24 * @file dsputil.c | |
25 * DSP utils | |
26 */ | |
27 | |
28 #include "avcodec.h" | |
29 #include "dsputil.h" | |
30 #include "simple_idct.h" | |
31 | |
32 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; | |
33 uint32_t squareTbl[512]; | |
34 | |
35 const uint8_t ff_zigzag_direct[64] = { | |
36 0, 1, 8, 16, 9, 2, 3, 10, | |
37 17, 24, 32, 25, 18, 11, 4, 5, | |
38 12, 19, 26, 33, 40, 48, 41, 34, | |
39 27, 20, 13, 6, 7, 14, 21, 28, | |
40 35, 42, 49, 56, 57, 50, 43, 36, | |
41 29, 22, 15, 23, 30, 37, 44, 51, | |
42 58, 59, 52, 45, 38, 31, 39, 46, | |
43 53, 60, 61, 54, 47, 55, 62, 63 | |
44 }; | |
45 | |
46 /* Specific zigzag scan for 248 idct. NOTE that unlike the | |
47 specification, we interleave the fields */ | |
48 const uint8_t ff_zigzag248_direct[64] = { | |
49 0, 8, 1, 9, 16, 24, 2, 10, | |
50 17, 25, 32, 40, 48, 56, 33, 41, | |
51 18, 26, 3, 11, 4, 12, 19, 27, | |
52 34, 42, 49, 57, 50, 58, 35, 43, | |
53 20, 28, 5, 13, 6, 14, 21, 29, | |
54 36, 44, 51, 59, 52, 60, 37, 45, | |
55 22, 30, 7, 15, 23, 31, 38, 46, | |
56 53, 61, 54, 62, 39, 47, 55, 63, | |
57 }; | |
58 | |
59 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | |
60 uint16_t __align8 inv_zigzag_direct16[64]; | |
61 | |
62 const uint8_t ff_alternate_horizontal_scan[64] = { | |
63 0, 1, 2, 3, 8, 9, 16, 17, | |
64 10, 11, 4, 5, 6, 7, 15, 14, | |
65 13, 12, 19, 18, 24, 25, 32, 33, | |
66 26, 27, 20, 21, 22, 23, 28, 29, | |
67 30, 31, 34, 35, 40, 41, 48, 49, | |
68 42, 43, 36, 37, 38, 39, 44, 45, | |
69 46, 47, 50, 51, 56, 57, 58, 59, | |
70 52, 53, 54, 55, 60, 61, 62, 63, | |
71 }; | |
72 | |
73 const uint8_t ff_alternate_vertical_scan[64] = { | |
74 0, 8, 16, 24, 1, 9, 2, 10, | |
75 17, 25, 32, 40, 48, 56, 57, 49, | |
76 41, 33, 26, 18, 3, 11, 4, 12, | |
77 19, 27, 34, 42, 50, 58, 35, 43, | |
78 51, 59, 20, 28, 5, 13, 6, 14, | |
79 21, 29, 36, 44, 52, 60, 37, 45, | |
80 53, 61, 22, 30, 7, 15, 23, 31, | |
81 38, 46, 54, 62, 39, 47, 55, 63, | |
82 }; | |
83 | |
84 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ | |
85 const uint32_t inverse[256]={ | |
86 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, | |
87 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | |
88 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | |
89 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, | |
90 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, | |
91 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, | |
92 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, | |
93 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, | |
94 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, | |
95 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, | |
96 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, | |
97 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, | |
98 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, | |
99 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, | |
100 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, | |
101 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, | |
102 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, | |
103 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, | |
104 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, | |
105 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, | |
106 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, | |
107 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, | |
108 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, | |
109 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, | |
110 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, | |
111 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, | |
112 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, | |
113 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, | |
114 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | |
115 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | |
116 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | |
117 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, | |
118 }; | |
119 | |
120 /* Input permutation for the simple_idct_mmx */ | |
121 static const uint8_t simple_mmx_permutation[64]={ | |
122 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |
123 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
124 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
125 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
126 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
127 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
128 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
129 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
130 }; | |
131 #if 0 | |
132 static int pix_sum_c(uint8_t * pix, int line_size) | |
133 { | |
134 int s, i, j; | |
135 | |
136 s = 0; | |
137 for (i = 0; i < 16; i++) { | |
138 for (j = 0; j < 16; j += 8) { | |
139 s += pix[0]; | |
140 s += pix[1]; | |
141 s += pix[2]; | |
142 s += pix[3]; | |
143 s += pix[4]; | |
144 s += pix[5]; | |
145 s += pix[6]; | |
146 s += pix[7]; | |
147 pix += 8; | |
148 } | |
149 pix += line_size - 16; | |
150 } | |
151 return s; | |
152 } | |
153 | |
154 static int pix_norm1_c(uint8_t * pix, int line_size) | |
155 { | |
156 int s, i, j; | |
157 uint32_t *sq = squareTbl + 256; | |
158 | |
159 s = 0; | |
160 for (i = 0; i < 16; i++) { | |
161 for (j = 0; j < 16; j += 8) { | |
162 #if 0 | |
163 s += sq[pix[0]]; | |
164 s += sq[pix[1]]; | |
165 s += sq[pix[2]]; | |
166 s += sq[pix[3]]; | |
167 s += sq[pix[4]]; | |
168 s += sq[pix[5]]; | |
169 s += sq[pix[6]]; | |
170 s += sq[pix[7]]; | |
171 #else | |
172 #if LONG_MAX > 2147483647 | |
173 register uint64_t x=*(uint64_t*)pix; | |
174 s += sq[x&0xff]; | |
175 s += sq[(x>>8)&0xff]; | |
176 s += sq[(x>>16)&0xff]; | |
177 s += sq[(x>>24)&0xff]; | |
178 s += sq[(x>>32)&0xff]; | |
179 s += sq[(x>>40)&0xff]; | |
180 s += sq[(x>>48)&0xff]; | |
181 s += sq[(x>>56)&0xff]; | |
182 #else | |
183 register uint32_t x=*(uint32_t*)pix; | |
184 s += sq[x&0xff]; | |
185 s += sq[(x>>8)&0xff]; | |
186 s += sq[(x>>16)&0xff]; | |
187 s += sq[(x>>24)&0xff]; | |
188 x=*(uint32_t*)(pix+4); | |
189 s += sq[x&0xff]; | |
190 s += sq[(x>>8)&0xff]; | |
191 s += sq[(x>>16)&0xff]; | |
192 s += sq[(x>>24)&0xff]; | |
193 #endif | |
194 #endif | |
195 pix += 8; | |
196 } | |
197 pix += line_size - 16; | |
198 } | |
199 return s; | |
200 } | |
201 | |
202 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ | |
203 int i; | |
204 | |
205 for(i=0; i+8<=w; i+=8){ | |
206 dst[i+0]= bswap_32(src[i+0]); | |
207 dst[i+1]= bswap_32(src[i+1]); | |
208 dst[i+2]= bswap_32(src[i+2]); | |
209 dst[i+3]= bswap_32(src[i+3]); | |
210 dst[i+4]= bswap_32(src[i+4]); | |
211 dst[i+5]= bswap_32(src[i+5]); | |
212 dst[i+6]= bswap_32(src[i+6]); | |
213 dst[i+7]= bswap_32(src[i+7]); | |
214 } | |
215 for(;i<w; i++){ | |
216 dst[i+0]= bswap_32(src[i+0]); | |
217 } | |
218 } | |
219 | |
220 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) | |
221 { | |
222 int s, i; | |
223 uint32_t *sq = squareTbl + 256; | |
224 | |
225 s = 0; | |
226 for (i = 0; i < h; i++) { | |
227 s += sq[pix1[0] - pix2[0]]; | |
228 s += sq[pix1[1] - pix2[1]]; | |
229 s += sq[pix1[2] - pix2[2]]; | |
230 s += sq[pix1[3] - pix2[3]]; | |
231 s += sq[pix1[4] - pix2[4]]; | |
232 s += sq[pix1[5] - pix2[5]]; | |
233 s += sq[pix1[6] - pix2[6]]; | |
234 s += sq[pix1[7] - pix2[7]]; | |
235 pix1 += line_size; | |
236 pix2 += line_size; | |
237 } | |
238 return s; | |
239 } | |
240 | |
241 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) | |
242 { | |
243 int s, i; | |
244 uint32_t *sq = squareTbl + 256; | |
245 | |
246 s = 0; | |
247 for (i = 0; i < h; i++) { | |
248 s += sq[pix1[ 0] - pix2[ 0]]; | |
249 s += sq[pix1[ 1] - pix2[ 1]]; | |
250 s += sq[pix1[ 2] - pix2[ 2]]; | |
251 s += sq[pix1[ 3] - pix2[ 3]]; | |
252 s += sq[pix1[ 4] - pix2[ 4]]; | |
253 s += sq[pix1[ 5] - pix2[ 5]]; | |
254 s += sq[pix1[ 6] - pix2[ 6]]; | |
255 s += sq[pix1[ 7] - pix2[ 7]]; | |
256 s += sq[pix1[ 8] - pix2[ 8]]; | |
257 s += sq[pix1[ 9] - pix2[ 9]]; | |
258 s += sq[pix1[10] - pix2[10]]; | |
259 s += sq[pix1[11] - pix2[11]]; | |
260 s += sq[pix1[12] - pix2[12]]; | |
261 s += sq[pix1[13] - pix2[13]]; | |
262 s += sq[pix1[14] - pix2[14]]; | |
263 s += sq[pix1[15] - pix2[15]]; | |
264 | |
265 pix1 += line_size; | |
266 pix2 += line_size; | |
267 } | |
268 return s; | |
269 } | |
270 | |
271 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) | |
272 { | |
273 int i; | |
274 | |
275 /* read the pixels */ | |
276 for(i=0;i<8;i++) { | |
277 block[0] = pixels[0]; | |
278 block[1] = pixels[1]; | |
279 block[2] = pixels[2]; | |
280 block[3] = pixels[3]; | |
281 block[4] = pixels[4]; | |
282 block[5] = pixels[5]; | |
283 block[6] = pixels[6]; | |
284 block[7] = pixels[7]; | |
285 pixels += line_size; | |
286 block += 8; | |
287 } | |
288 } | |
289 | |
290 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, | |
291 const uint8_t *s2, int stride){ | |
292 int i; | |
293 | |
294 /* read the pixels */ | |
295 for(i=0;i<8;i++) { | |
296 block[0] = s1[0] - s2[0]; | |
297 block[1] = s1[1] - s2[1]; | |
298 block[2] = s1[2] - s2[2]; | |
299 block[3] = s1[3] - s2[3]; | |
300 block[4] = s1[4] - s2[4]; | |
301 block[5] = s1[5] - s2[5]; | |
302 block[6] = s1[6] - s2[6]; | |
303 block[7] = s1[7] - s2[7]; | |
304 s1 += stride; | |
305 s2 += stride; | |
306 block += 8; | |
307 } | |
308 } | |
309 | |
310 | |
311 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | |
312 int line_size) | |
313 { | |
314 int i; | |
315 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
316 | |
317 /* read the pixels */ | |
318 for(i=0;i<8;i++) { | |
319 pixels[0] = cm[block[0]]; | |
320 pixels[1] = cm[block[1]]; | |
321 pixels[2] = cm[block[2]]; | |
322 pixels[3] = cm[block[3]]; | |
323 pixels[4] = cm[block[4]]; | |
324 pixels[5] = cm[block[5]]; | |
325 pixels[6] = cm[block[6]]; | |
326 pixels[7] = cm[block[7]]; | |
327 | |
328 pixels += line_size; | |
329 block += 8; | |
330 } | |
331 } | |
332 | |
333 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, | |
334 int line_size) | |
335 { | |
336 int i; | |
337 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
338 | |
339 /* read the pixels */ | |
340 for(i=0;i<8;i++) { | |
341 pixels[0] = cm[pixels[0] + block[0]]; | |
342 pixels[1] = cm[pixels[1] + block[1]]; | |
343 pixels[2] = cm[pixels[2] + block[2]]; | |
344 pixels[3] = cm[pixels[3] + block[3]]; | |
345 pixels[4] = cm[pixels[4] + block[4]]; | |
346 pixels[5] = cm[pixels[5] + block[5]]; | |
347 pixels[6] = cm[pixels[6] + block[6]]; | |
348 pixels[7] = cm[pixels[7] + block[7]]; | |
349 pixels += line_size; | |
350 block += 8; | |
351 } | |
352 } | |
353 #endif | |
354 #if 0 | |
355 | |
356 #define PIXOP2(OPNAME, OP) \ | |
357 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
358 {\ | |
359 int i;\ | |
360 for(i=0; i<h; i++){\ | |
361 OP(*((uint64_t*)block), LD64(pixels));\ | |
362 pixels+=line_size;\ | |
363 block +=line_size;\ | |
364 }\ | |
365 }\ | |
366 \ | |
367 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
368 {\ | |
369 int i;\ | |
370 for(i=0; i<h; i++){\ | |
371 const uint64_t a= LD64(pixels );\ | |
372 const uint64_t b= LD64(pixels+1);\ | |
373 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
374 pixels+=line_size;\ | |
375 block +=line_size;\ | |
376 }\ | |
377 }\ | |
378 \ | |
379 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
380 {\ | |
381 int i;\ | |
382 for(i=0; i<h; i++){\ | |
383 const uint64_t a= LD64(pixels );\ | |
384 const uint64_t b= LD64(pixels+1);\ | |
385 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
386 pixels+=line_size;\ | |
387 block +=line_size;\ | |
388 }\ | |
389 }\ | |
390 \ | |
391 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
392 {\ | |
393 int i;\ | |
394 for(i=0; i<h; i++){\ | |
395 const uint64_t a= LD64(pixels );\ | |
396 const uint64_t b= LD64(pixels+line_size);\ | |
397 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
398 pixels+=line_size;\ | |
399 block +=line_size;\ | |
400 }\ | |
401 }\ | |
402 \ | |
403 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
404 {\ | |
405 int i;\ | |
406 for(i=0; i<h; i++){\ | |
407 const uint64_t a= LD64(pixels );\ | |
408 const uint64_t b= LD64(pixels+line_size);\ | |
409 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
410 pixels+=line_size;\ | |
411 block +=line_size;\ | |
412 }\ | |
413 }\ | |
414 \ | |
415 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
416 {\ | |
417 int i;\ | |
418 const uint64_t a= LD64(pixels );\ | |
419 const uint64_t b= LD64(pixels+1);\ | |
420 uint64_t l0= (a&0x0303030303030303ULL)\ | |
421 + (b&0x0303030303030303ULL)\ | |
422 + 0x0202020202020202ULL;\ | |
423 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
424 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
425 uint64_t l1,h1;\ | |
426 \ | |
427 pixels+=line_size;\ | |
428 for(i=0; i<h; i+=2){\ | |
429 uint64_t a= LD64(pixels );\ | |
430 uint64_t b= LD64(pixels+1);\ | |
431 l1= (a&0x0303030303030303ULL)\ | |
432 + (b&0x0303030303030303ULL);\ | |
433 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
434 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
435 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
436 pixels+=line_size;\ | |
437 block +=line_size;\ | |
438 a= LD64(pixels );\ | |
439 b= LD64(pixels+1);\ | |
440 l0= (a&0x0303030303030303ULL)\ | |
441 + (b&0x0303030303030303ULL)\ | |
442 + 0x0202020202020202ULL;\ | |
443 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
444 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
445 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
446 pixels+=line_size;\ | |
447 block +=line_size;\ | |
448 }\ | |
449 }\ | |
450 \ | |
451 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
452 {\ | |
453 int i;\ | |
454 const uint64_t a= LD64(pixels );\ | |
455 const uint64_t b= LD64(pixels+1);\ | |
456 uint64_t l0= (a&0x0303030303030303ULL)\ | |
457 + (b&0x0303030303030303ULL)\ | |
458 + 0x0101010101010101ULL;\ | |
459 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
460 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
461 uint64_t l1,h1;\ | |
462 \ | |
463 pixels+=line_size;\ | |
464 for(i=0; i<h; i+=2){\ | |
465 uint64_t a= LD64(pixels );\ | |
466 uint64_t b= LD64(pixels+1);\ | |
467 l1= (a&0x0303030303030303ULL)\ | |
468 + (b&0x0303030303030303ULL);\ | |
469 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
470 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
471 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
472 pixels+=line_size;\ | |
473 block +=line_size;\ | |
474 a= LD64(pixels );\ | |
475 b= LD64(pixels+1);\ | |
476 l0= (a&0x0303030303030303ULL)\ | |
477 + (b&0x0303030303030303ULL)\ | |
478 + 0x0101010101010101ULL;\ | |
479 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
480 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
481 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
482 pixels+=line_size;\ | |
483 block +=line_size;\ | |
484 }\ | |
485 }\ | |
486 \ | |
487 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ | |
488 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | |
489 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | |
490 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | |
491 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | |
492 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | |
493 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | |
494 | |
495 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | |
496 #else // 64 bit variant | |
497 | |
498 #define PIXOP2(OPNAME, OP) \ | |
499 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
500 int i;\ | |
501 for(i=0; i<h; i++){\ | |
502 OP(*((uint16_t*)(block )), LD16(pixels ));\ | |
503 pixels+=line_size;\ | |
504 block +=line_size;\ | |
505 }\ | |
506 }\ | |
507 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
508 int i;\ | |
509 for(i=0; i<h; i++){\ | |
510 OP(*((uint32_t*)(block )), LD32(pixels ));\ | |
511 pixels+=line_size;\ | |
512 block +=line_size;\ | |
513 }\ | |
514 }\ | |
515 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
516 int i;\ | |
517 for(i=0; i<h; i++){\ | |
518 OP(*((uint32_t*)(block )), LD32(pixels ));\ | |
519 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ | |
520 pixels+=line_size;\ | |
521 block +=line_size;\ | |
522 }\ | |
523 }\ | |
524 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
525 OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | |
526 }\ | |
527 \ | |
528 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
529 int src_stride1, int src_stride2, int h){\ | |
530 int i;\ | |
531 for(i=0; i<h; i++){\ | |
532 uint32_t a,b;\ | |
533 a= LD32(&src1[i*src_stride1 ]);\ | |
534 b= LD32(&src2[i*src_stride2 ]);\ | |
535 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ | |
536 a= LD32(&src1[i*src_stride1+4]);\ | |
537 b= LD32(&src2[i*src_stride2+4]);\ | |
538 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ | |
539 }\ | |
540 }\ | |
541 \ | |
542 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
543 int src_stride1, int src_stride2, int h){\ | |
544 int i;\ | |
545 for(i=0; i<h; i++){\ | |
546 uint32_t a,b;\ | |
547 a= LD32(&src1[i*src_stride1 ]);\ | |
548 b= LD32(&src2[i*src_stride2 ]);\ | |
549 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | |
550 a= LD32(&src1[i*src_stride1+4]);\ | |
551 b= LD32(&src2[i*src_stride2+4]);\ | |
552 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ | |
553 }\ | |
554 }\ | |
555 \ | |
556 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
557 int src_stride1, int src_stride2, int h){\ | |
558 int i;\ | |
559 for(i=0; i<h; i++){\ | |
560 uint32_t a,b;\ | |
561 a= LD32(&src1[i*src_stride1 ]);\ | |
562 b= LD32(&src2[i*src_stride2 ]);\ | |
563 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | |
564 }\ | |
565 }\ | |
566 \ | |
567 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
568 int src_stride1, int src_stride2, int h){\ | |
569 int i;\ | |
570 for(i=0; i<h; i++){\ | |
571 uint32_t a,b;\ | |
572 a= LD16(&src1[i*src_stride1 ]);\ | |
573 b= LD16(&src2[i*src_stride2 ]);\ | |
574 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ | |
575 }\ | |
576 }\ | |
577 \ | |
578 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
579 int src_stride1, int src_stride2, int h){\ | |
580 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
581 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
582 }\ | |
583 \ | |
584 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
585 int src_stride1, int src_stride2, int h){\ | |
586 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
587 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
588 }\ | |
589 \ | |
590 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
591 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | |
592 }\ | |
593 \ | |
594 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
595 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | |
596 }\ | |
597 \ | |
598 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
599 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | |
600 }\ | |
601 \ | |
602 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
603 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | |
604 }\ | |
605 \ | |
606 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | |
607 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
608 int i;\ | |
609 for(i=0; i<h; i++){\ | |
610 uint32_t a, b, c, d, l0, l1, h0, h1;\ | |
611 a= LD32(&src1[i*src_stride1]);\ | |
612 b= LD32(&src2[i*src_stride2]);\ | |
613 c= LD32(&src3[i*src_stride3]);\ | |
614 d= LD32(&src4[i*src_stride4]);\ | |
615 l0= (a&0x03030303UL)\ | |
616 + (b&0x03030303UL)\ | |
617 + 0x02020202UL;\ | |
618 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
619 + ((b&0xFCFCFCFCUL)>>2);\ | |
620 l1= (c&0x03030303UL)\ | |
621 + (d&0x03030303UL);\ | |
622 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
623 + ((d&0xFCFCFCFCUL)>>2);\ | |
624 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
625 a= LD32(&src1[i*src_stride1+4]);\ | |
626 b= LD32(&src2[i*src_stride2+4]);\ | |
627 c= LD32(&src3[i*src_stride3+4]);\ | |
628 d= LD32(&src4[i*src_stride4+4]);\ | |
629 l0= (a&0x03030303UL)\ | |
630 + (b&0x03030303UL)\ | |
631 + 0x02020202UL;\ | |
632 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
633 + ((b&0xFCFCFCFCUL)>>2);\ | |
634 l1= (c&0x03030303UL)\ | |
635 + (d&0x03030303UL);\ | |
636 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
637 + ((d&0xFCFCFCFCUL)>>2);\ | |
638 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
639 }\ | |
640 }\ | |
641 \ | |
642 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
643 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | |
644 }\ | |
645 \ | |
646 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
647 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | |
648 }\ | |
649 \ | |
650 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
651 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ | |
652 }\ | |
653 \ | |
654 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | |
655 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ | |
656 }\ | |
657 \ | |
658 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | |
659 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
660 int i;\ | |
661 for(i=0; i<h; i++){\ | |
662 uint32_t a, b, c, d, l0, l1, h0, h1;\ | |
663 a= LD32(&src1[i*src_stride1]);\ | |
664 b= LD32(&src2[i*src_stride2]);\ | |
665 c= LD32(&src3[i*src_stride3]);\ | |
666 d= LD32(&src4[i*src_stride4]);\ | |
667 l0= (a&0x03030303UL)\ | |
668 + (b&0x03030303UL)\ | |
669 + 0x01010101UL;\ | |
670 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
671 + ((b&0xFCFCFCFCUL)>>2);\ | |
672 l1= (c&0x03030303UL)\ | |
673 + (d&0x03030303UL);\ | |
674 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
675 + ((d&0xFCFCFCFCUL)>>2);\ | |
676 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
677 a= LD32(&src1[i*src_stride1+4]);\ | |
678 b= LD32(&src2[i*src_stride2+4]);\ | |
679 c= LD32(&src3[i*src_stride3+4]);\ | |
680 d= LD32(&src4[i*src_stride4+4]);\ | |
681 l0= (a&0x03030303UL)\ | |
682 + (b&0x03030303UL)\ | |
683 + 0x01010101UL;\ | |
684 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
685 + ((b&0xFCFCFCFCUL)>>2);\ | |
686 l1= (c&0x03030303UL)\ | |
687 + (d&0x03030303UL);\ | |
688 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
689 + ((d&0xFCFCFCFCUL)>>2);\ | |
690 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
691 }\ | |
692 }\ | |
693 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | |
694 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
695 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
696 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
697 }\ | |
698 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | |
699 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
700 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
701 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
702 }\ | |
703 \ | |
704 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
705 {\ | |
706 int i, a0, b0, a1, b1;\ | |
707 a0= pixels[0];\ | |
708 b0= pixels[1] + 2;\ | |
709 a0 += b0;\ | |
710 b0 += pixels[2];\ | |
711 \ | |
712 pixels+=line_size;\ | |
713 for(i=0; i<h; i+=2){\ | |
714 a1= pixels[0];\ | |
715 b1= pixels[1];\ | |
716 a1 += b1;\ | |
717 b1 += pixels[2];\ | |
718 \ | |
719 block[0]= (a1+a0)>>2; /* FIXME non put */\ | |
720 block[1]= (b1+b0)>>2;\ | |
721 \ | |
722 pixels+=line_size;\ | |
723 block +=line_size;\ | |
724 \ | |
725 a0= pixels[0];\ | |
726 b0= pixels[1] + 2;\ | |
727 a0 += b0;\ | |
728 b0 += pixels[2];\ | |
729 \ | |
730 block[0]= (a1+a0)>>2;\ | |
731 block[1]= (b1+b0)>>2;\ | |
732 pixels+=line_size;\ | |
733 block +=line_size;\ | |
734 }\ | |
735 }\ | |
736 \ | |
737 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
738 {\ | |
739 int i;\ | |
740 const uint32_t a= LD32(pixels );\ | |
741 const uint32_t b= LD32(pixels+1);\ | |
742 uint32_t l0= (a&0x03030303UL)\ | |
743 + (b&0x03030303UL)\ | |
744 + 0x02020202UL;\ | |
745 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
746 + ((b&0xFCFCFCFCUL)>>2);\ | |
747 uint32_t l1,h1;\ | |
748 \ | |
749 pixels+=line_size;\ | |
750 for(i=0; i<h; i+=2){\ | |
751 uint32_t a= LD32(pixels );\ | |
752 uint32_t b= LD32(pixels+1);\ | |
753 l1= (a&0x03030303UL)\ | |
754 + (b&0x03030303UL);\ | |
755 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
756 + ((b&0xFCFCFCFCUL)>>2);\ | |
757 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
758 pixels+=line_size;\ | |
759 block +=line_size;\ | |
760 a= LD32(pixels );\ | |
761 b= LD32(pixels+1);\ | |
762 l0= (a&0x03030303UL)\ | |
763 + (b&0x03030303UL)\ | |
764 + 0x02020202UL;\ | |
765 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
766 + ((b&0xFCFCFCFCUL)>>2);\ | |
767 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
768 pixels+=line_size;\ | |
769 block +=line_size;\ | |
770 }\ | |
771 }\ | |
772 \ | |
773 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
774 {\ | |
775 int j;\ | |
776 for(j=0; j<2; j++){\ | |
777 int i;\ | |
778 const uint32_t a= LD32(pixels );\ | |
779 const uint32_t b= LD32(pixels+1);\ | |
780 uint32_t l0= (a&0x03030303UL)\ | |
781 + (b&0x03030303UL)\ | |
782 + 0x02020202UL;\ | |
783 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
784 + ((b&0xFCFCFCFCUL)>>2);\ | |
785 uint32_t l1,h1;\ | |
786 \ | |
787 pixels+=line_size;\ | |
788 for(i=0; i<h; i+=2){\ | |
789 uint32_t a= LD32(pixels );\ | |
790 uint32_t b= LD32(pixels+1);\ | |
791 l1= (a&0x03030303UL)\ | |
792 + (b&0x03030303UL);\ | |
793 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
794 + ((b&0xFCFCFCFCUL)>>2);\ | |
795 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
796 pixels+=line_size;\ | |
797 block +=line_size;\ | |
798 a= LD32(pixels );\ | |
799 b= LD32(pixels+1);\ | |
800 l0= (a&0x03030303UL)\ | |
801 + (b&0x03030303UL)\ | |
802 + 0x02020202UL;\ | |
803 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
804 + ((b&0xFCFCFCFCUL)>>2);\ | |
805 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
806 pixels+=line_size;\ | |
807 block +=line_size;\ | |
808 }\ | |
809 pixels+=4-line_size*(h+1);\ | |
810 block +=4-line_size*h;\ | |
811 }\ | |
812 }\ | |
813 \ | |
814 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ | |
815 {\ | |
816 int j;\ | |
817 for(j=0; j<2; j++){\ | |
818 int i;\ | |
819 const uint32_t a= LD32(pixels );\ | |
820 const uint32_t b= LD32(pixels+1);\ | |
821 uint32_t l0= (a&0x03030303UL)\ | |
822 + (b&0x03030303UL)\ | |
823 + 0x01010101UL;\ | |
824 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
825 + ((b&0xFCFCFCFCUL)>>2);\ | |
826 uint32_t l1,h1;\ | |
827 \ | |
828 pixels+=line_size;\ | |
829 for(i=0; i<h; i+=2){\ | |
830 uint32_t a= LD32(pixels );\ | |
831 uint32_t b= LD32(pixels+1);\ | |
832 l1= (a&0x03030303UL)\ | |
833 + (b&0x03030303UL);\ | |
834 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
835 + ((b&0xFCFCFCFCUL)>>2);\ | |
836 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
837 pixels+=line_size;\ | |
838 block +=line_size;\ | |
839 a= LD32(pixels );\ | |
840 b= LD32(pixels+1);\ | |
841 l0= (a&0x03030303UL)\ | |
842 + (b&0x03030303UL)\ | |
843 + 0x01010101UL;\ | |
844 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
845 + ((b&0xFCFCFCFCUL)>>2);\ | |
846 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
847 pixels+=line_size;\ | |
848 block +=line_size;\ | |
849 }\ | |
850 pixels+=4-line_size*(h+1);\ | |
851 block +=4-line_size*h;\ | |
852 }\ | |
853 }\ | |
854 \ | |
855 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ | |
856 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | |
857 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | |
858 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | |
859 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ | |
860 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ | |
861 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | |
862 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | |
863 | |
864 #define op_avg(a, b) a = rnd_avg32(a, b) | |
865 #endif | |
866 #define op_put(a, b) a = b | |
867 | |
868 //PIXOP2(avg, op_avg) | |
869 //PIXOP2(put, op_put) | |
870 #undef op_avg | |
871 #undef op_put | |
872 | |
873 #define avg2(a,b) ((a+b+1)>>1) | |
874 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
875 | |
876 /* init static data */ | |
877 void dsputil_static_init(void) | |
878 { | |
879 int i; | |
880 | |
881 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; | |
882 for(i=0;i<MAX_NEG_CROP;i++) { | |
883 cropTbl[i] = 0; | |
884 cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
885 } | |
886 | |
887 for(i=0;i<512;i++) { | |
888 squareTbl[i] = (i - 256) * (i - 256); | |
889 } | |
890 | |
891 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; | |
892 } |