137
|
1 /*
|
|
2 * DSP utils
|
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard.
|
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
|
5 *
|
|
6 * This library is free software; you can redistribute it and/or
|
|
7 * modify it under the terms of the GNU Lesser General Public
|
|
8 * License as published by the Free Software Foundation; either
|
|
9 * version 2 of the License, or (at your option) any later version.
|
|
10 *
|
|
11 * This library is distributed in the hope that it will be useful,
|
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14 * Lesser General Public License for more details.
|
|
15 *
|
|
16 * You should have received a copy of the GNU Lesser General Public
|
|
17 * License along with this library; if not, write to the Free Software
|
|
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19 *
|
|
20 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
|
|
21 */
|
|
22
|
|
23 /**
|
|
24 * @file dsputil.c
|
|
25 * DSP utils
|
|
26 */
|
|
27
|
|
28 #include "avcodec.h"
|
|
29 #include "dsputil.h"
|
|
30 #include "simple_idct.h"
|
|
31
|
|
32 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
|
|
33 uint32_t squareTbl[512];
|
|
34
|
|
35 const uint8_t ff_zigzag_direct[64] = {
|
|
36 0, 1, 8, 16, 9, 2, 3, 10,
|
|
37 17, 24, 32, 25, 18, 11, 4, 5,
|
|
38 12, 19, 26, 33, 40, 48, 41, 34,
|
|
39 27, 20, 13, 6, 7, 14, 21, 28,
|
|
40 35, 42, 49, 56, 57, 50, 43, 36,
|
|
41 29, 22, 15, 23, 30, 37, 44, 51,
|
|
42 58, 59, 52, 45, 38, 31, 39, 46,
|
|
43 53, 60, 61, 54, 47, 55, 62, 63
|
|
44 };
|
|
45
|
|
46 /* Specific zigzag scan for 248 idct. NOTE that unlike the
|
|
47 specification, we interleave the fields */
|
|
48 const uint8_t ff_zigzag248_direct[64] = {
|
|
49 0, 8, 1, 9, 16, 24, 2, 10,
|
|
50 17, 25, 32, 40, 48, 56, 33, 41,
|
|
51 18, 26, 3, 11, 4, 12, 19, 27,
|
|
52 34, 42, 49, 57, 50, 58, 35, 43,
|
|
53 20, 28, 5, 13, 6, 14, 21, 29,
|
|
54 36, 44, 51, 59, 52, 60, 37, 45,
|
|
55 22, 30, 7, 15, 23, 31, 38, 46,
|
|
56 53, 61, 54, 62, 39, 47, 55, 63,
|
|
57 };
|
|
58
|
|
59 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
|
|
60 uint16_t __align8 inv_zigzag_direct16[64];
|
|
61
|
|
62 const uint8_t ff_alternate_horizontal_scan[64] = {
|
|
63 0, 1, 2, 3, 8, 9, 16, 17,
|
|
64 10, 11, 4, 5, 6, 7, 15, 14,
|
|
65 13, 12, 19, 18, 24, 25, 32, 33,
|
|
66 26, 27, 20, 21, 22, 23, 28, 29,
|
|
67 30, 31, 34, 35, 40, 41, 48, 49,
|
|
68 42, 43, 36, 37, 38, 39, 44, 45,
|
|
69 46, 47, 50, 51, 56, 57, 58, 59,
|
|
70 52, 53, 54, 55, 60, 61, 62, 63,
|
|
71 };
|
|
72
|
|
73 const uint8_t ff_alternate_vertical_scan[64] = {
|
|
74 0, 8, 16, 24, 1, 9, 2, 10,
|
|
75 17, 25, 32, 40, 48, 56, 57, 49,
|
|
76 41, 33, 26, 18, 3, 11, 4, 12,
|
|
77 19, 27, 34, 42, 50, 58, 35, 43,
|
|
78 51, 59, 20, 28, 5, 13, 6, 14,
|
|
79 21, 29, 36, 44, 52, 60, 37, 45,
|
|
80 53, 61, 22, 30, 7, 15, 23, 31,
|
|
81 38, 46, 54, 62, 39, 47, 55, 63,
|
|
82 };
|
|
83
|
|
84 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
|
|
85 const uint32_t inverse[256]={
|
|
86 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
|
|
87 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
|
|
88 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
|
|
89 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
|
|
90 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
|
|
91 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
|
|
92 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
|
|
93 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
|
|
94 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
|
|
95 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
|
|
96 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
|
|
97 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
|
|
98 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
|
|
99 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
|
|
100 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
|
|
101 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
|
|
102 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
|
|
103 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
|
|
104 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
|
|
105 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
|
|
106 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
|
|
107 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
|
|
108 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
|
|
109 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
|
|
110 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
|
|
111 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
|
|
112 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
|
|
113 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
|
|
114 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
|
|
115 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
|
|
116 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
|
|
117 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
|
|
118 };
|
|
119
|
|
120 /* Input permutation for the simple_idct_mmx */
|
|
121 static const uint8_t simple_mmx_permutation[64]={
|
|
122 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
|
123 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
|
124 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
|
125 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
|
126 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
|
127 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
|
128 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
|
129 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
|
130 };
|
|
131 #if 0
|
|
132 static int pix_sum_c(uint8_t * pix, int line_size)
|
|
133 {
|
|
134 int s, i, j;
|
|
135
|
|
136 s = 0;
|
|
137 for (i = 0; i < 16; i++) {
|
|
138 for (j = 0; j < 16; j += 8) {
|
|
139 s += pix[0];
|
|
140 s += pix[1];
|
|
141 s += pix[2];
|
|
142 s += pix[3];
|
|
143 s += pix[4];
|
|
144 s += pix[5];
|
|
145 s += pix[6];
|
|
146 s += pix[7];
|
|
147 pix += 8;
|
|
148 }
|
|
149 pix += line_size - 16;
|
|
150 }
|
|
151 return s;
|
|
152 }
|
|
153
|
|
154 static int pix_norm1_c(uint8_t * pix, int line_size)
|
|
155 {
|
|
156 int s, i, j;
|
|
157 uint32_t *sq = squareTbl + 256;
|
|
158
|
|
159 s = 0;
|
|
160 for (i = 0; i < 16; i++) {
|
|
161 for (j = 0; j < 16; j += 8) {
|
|
162 #if 0
|
|
163 s += sq[pix[0]];
|
|
164 s += sq[pix[1]];
|
|
165 s += sq[pix[2]];
|
|
166 s += sq[pix[3]];
|
|
167 s += sq[pix[4]];
|
|
168 s += sq[pix[5]];
|
|
169 s += sq[pix[6]];
|
|
170 s += sq[pix[7]];
|
|
171 #else
|
|
172 #if LONG_MAX > 2147483647
|
|
173 register uint64_t x=*(uint64_t*)pix;
|
|
174 s += sq[x&0xff];
|
|
175 s += sq[(x>>8)&0xff];
|
|
176 s += sq[(x>>16)&0xff];
|
|
177 s += sq[(x>>24)&0xff];
|
|
178 s += sq[(x>>32)&0xff];
|
|
179 s += sq[(x>>40)&0xff];
|
|
180 s += sq[(x>>48)&0xff];
|
|
181 s += sq[(x>>56)&0xff];
|
|
182 #else
|
|
183 register uint32_t x=*(uint32_t*)pix;
|
|
184 s += sq[x&0xff];
|
|
185 s += sq[(x>>8)&0xff];
|
|
186 s += sq[(x>>16)&0xff];
|
|
187 s += sq[(x>>24)&0xff];
|
|
188 x=*(uint32_t*)(pix+4);
|
|
189 s += sq[x&0xff];
|
|
190 s += sq[(x>>8)&0xff];
|
|
191 s += sq[(x>>16)&0xff];
|
|
192 s += sq[(x>>24)&0xff];
|
|
193 #endif
|
|
194 #endif
|
|
195 pix += 8;
|
|
196 }
|
|
197 pix += line_size - 16;
|
|
198 }
|
|
199 return s;
|
|
200 }
|
|
201
|
|
202 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
|
|
203 int i;
|
|
204
|
|
205 for(i=0; i+8<=w; i+=8){
|
|
206 dst[i+0]= bswap_32(src[i+0]);
|
|
207 dst[i+1]= bswap_32(src[i+1]);
|
|
208 dst[i+2]= bswap_32(src[i+2]);
|
|
209 dst[i+3]= bswap_32(src[i+3]);
|
|
210 dst[i+4]= bswap_32(src[i+4]);
|
|
211 dst[i+5]= bswap_32(src[i+5]);
|
|
212 dst[i+6]= bswap_32(src[i+6]);
|
|
213 dst[i+7]= bswap_32(src[i+7]);
|
|
214 }
|
|
215 for(;i<w; i++){
|
|
216 dst[i+0]= bswap_32(src[i+0]);
|
|
217 }
|
|
218 }
|
|
219
|
|
220 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
|
|
221 {
|
|
222 int s, i;
|
|
223 uint32_t *sq = squareTbl + 256;
|
|
224
|
|
225 s = 0;
|
|
226 for (i = 0; i < h; i++) {
|
|
227 s += sq[pix1[0] - pix2[0]];
|
|
228 s += sq[pix1[1] - pix2[1]];
|
|
229 s += sq[pix1[2] - pix2[2]];
|
|
230 s += sq[pix1[3] - pix2[3]];
|
|
231 s += sq[pix1[4] - pix2[4]];
|
|
232 s += sq[pix1[5] - pix2[5]];
|
|
233 s += sq[pix1[6] - pix2[6]];
|
|
234 s += sq[pix1[7] - pix2[7]];
|
|
235 pix1 += line_size;
|
|
236 pix2 += line_size;
|
|
237 }
|
|
238 return s;
|
|
239 }
|
|
240
|
|
241 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
242 {
|
|
243 int s, i;
|
|
244 uint32_t *sq = squareTbl + 256;
|
|
245
|
|
246 s = 0;
|
|
247 for (i = 0; i < h; i++) {
|
|
248 s += sq[pix1[ 0] - pix2[ 0]];
|
|
249 s += sq[pix1[ 1] - pix2[ 1]];
|
|
250 s += sq[pix1[ 2] - pix2[ 2]];
|
|
251 s += sq[pix1[ 3] - pix2[ 3]];
|
|
252 s += sq[pix1[ 4] - pix2[ 4]];
|
|
253 s += sq[pix1[ 5] - pix2[ 5]];
|
|
254 s += sq[pix1[ 6] - pix2[ 6]];
|
|
255 s += sq[pix1[ 7] - pix2[ 7]];
|
|
256 s += sq[pix1[ 8] - pix2[ 8]];
|
|
257 s += sq[pix1[ 9] - pix2[ 9]];
|
|
258 s += sq[pix1[10] - pix2[10]];
|
|
259 s += sq[pix1[11] - pix2[11]];
|
|
260 s += sq[pix1[12] - pix2[12]];
|
|
261 s += sq[pix1[13] - pix2[13]];
|
|
262 s += sq[pix1[14] - pix2[14]];
|
|
263 s += sq[pix1[15] - pix2[15]];
|
|
264
|
|
265 pix1 += line_size;
|
|
266 pix2 += line_size;
|
|
267 }
|
|
268 return s;
|
|
269 }
|
|
270
|
|
271 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
|
|
272 {
|
|
273 int i;
|
|
274
|
|
275 /* read the pixels */
|
|
276 for(i=0;i<8;i++) {
|
|
277 block[0] = pixels[0];
|
|
278 block[1] = pixels[1];
|
|
279 block[2] = pixels[2];
|
|
280 block[3] = pixels[3];
|
|
281 block[4] = pixels[4];
|
|
282 block[5] = pixels[5];
|
|
283 block[6] = pixels[6];
|
|
284 block[7] = pixels[7];
|
|
285 pixels += line_size;
|
|
286 block += 8;
|
|
287 }
|
|
288 }
|
|
289
|
|
290 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
|
|
291 const uint8_t *s2, int stride){
|
|
292 int i;
|
|
293
|
|
294 /* read the pixels */
|
|
295 for(i=0;i<8;i++) {
|
|
296 block[0] = s1[0] - s2[0];
|
|
297 block[1] = s1[1] - s2[1];
|
|
298 block[2] = s1[2] - s2[2];
|
|
299 block[3] = s1[3] - s2[3];
|
|
300 block[4] = s1[4] - s2[4];
|
|
301 block[5] = s1[5] - s2[5];
|
|
302 block[6] = s1[6] - s2[6];
|
|
303 block[7] = s1[7] - s2[7];
|
|
304 s1 += stride;
|
|
305 s2 += stride;
|
|
306 block += 8;
|
|
307 }
|
|
308 }
|
|
309
|
|
310
|
|
311 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|
312 int line_size)
|
|
313 {
|
|
314 int i;
|
|
315 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
316
|
|
317 /* read the pixels */
|
|
318 for(i=0;i<8;i++) {
|
|
319 pixels[0] = cm[block[0]];
|
|
320 pixels[1] = cm[block[1]];
|
|
321 pixels[2] = cm[block[2]];
|
|
322 pixels[3] = cm[block[3]];
|
|
323 pixels[4] = cm[block[4]];
|
|
324 pixels[5] = cm[block[5]];
|
|
325 pixels[6] = cm[block[6]];
|
|
326 pixels[7] = cm[block[7]];
|
|
327
|
|
328 pixels += line_size;
|
|
329 block += 8;
|
|
330 }
|
|
331 }
|
|
332
|
|
333 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|
334 int line_size)
|
|
335 {
|
|
336 int i;
|
|
337 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
338
|
|
339 /* read the pixels */
|
|
340 for(i=0;i<8;i++) {
|
|
341 pixels[0] = cm[pixels[0] + block[0]];
|
|
342 pixels[1] = cm[pixels[1] + block[1]];
|
|
343 pixels[2] = cm[pixels[2] + block[2]];
|
|
344 pixels[3] = cm[pixels[3] + block[3]];
|
|
345 pixels[4] = cm[pixels[4] + block[4]];
|
|
346 pixels[5] = cm[pixels[5] + block[5]];
|
|
347 pixels[6] = cm[pixels[6] + block[6]];
|
|
348 pixels[7] = cm[pixels[7] + block[7]];
|
|
349 pixels += line_size;
|
|
350 block += 8;
|
|
351 }
|
|
352 }
|
|
353 #endif
|
|
354 #if 0
|
|
355
|
|
356 #define PIXOP2(OPNAME, OP) \
|
|
357 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
358 {\
|
|
359 int i;\
|
|
360 for(i=0; i<h; i++){\
|
|
361 OP(*((uint64_t*)block), LD64(pixels));\
|
|
362 pixels+=line_size;\
|
|
363 block +=line_size;\
|
|
364 }\
|
|
365 }\
|
|
366 \
|
|
367 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
368 {\
|
|
369 int i;\
|
|
370 for(i=0; i<h; i++){\
|
|
371 const uint64_t a= LD64(pixels );\
|
|
372 const uint64_t b= LD64(pixels+1);\
|
|
373 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
374 pixels+=line_size;\
|
|
375 block +=line_size;\
|
|
376 }\
|
|
377 }\
|
|
378 \
|
|
379 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
380 {\
|
|
381 int i;\
|
|
382 for(i=0; i<h; i++){\
|
|
383 const uint64_t a= LD64(pixels );\
|
|
384 const uint64_t b= LD64(pixels+1);\
|
|
385 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
386 pixels+=line_size;\
|
|
387 block +=line_size;\
|
|
388 }\
|
|
389 }\
|
|
390 \
|
|
391 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
392 {\
|
|
393 int i;\
|
|
394 for(i=0; i<h; i++){\
|
|
395 const uint64_t a= LD64(pixels );\
|
|
396 const uint64_t b= LD64(pixels+line_size);\
|
|
397 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
398 pixels+=line_size;\
|
|
399 block +=line_size;\
|
|
400 }\
|
|
401 }\
|
|
402 \
|
|
403 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
404 {\
|
|
405 int i;\
|
|
406 for(i=0; i<h; i++){\
|
|
407 const uint64_t a= LD64(pixels );\
|
|
408 const uint64_t b= LD64(pixels+line_size);\
|
|
409 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
410 pixels+=line_size;\
|
|
411 block +=line_size;\
|
|
412 }\
|
|
413 }\
|
|
414 \
|
|
415 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
416 {\
|
|
417 int i;\
|
|
418 const uint64_t a= LD64(pixels );\
|
|
419 const uint64_t b= LD64(pixels+1);\
|
|
420 uint64_t l0= (a&0x0303030303030303ULL)\
|
|
421 + (b&0x0303030303030303ULL)\
|
|
422 + 0x0202020202020202ULL;\
|
|
423 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
424 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
425 uint64_t l1,h1;\
|
|
426 \
|
|
427 pixels+=line_size;\
|
|
428 for(i=0; i<h; i+=2){\
|
|
429 uint64_t a= LD64(pixels );\
|
|
430 uint64_t b= LD64(pixels+1);\
|
|
431 l1= (a&0x0303030303030303ULL)\
|
|
432 + (b&0x0303030303030303ULL);\
|
|
433 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
434 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
435 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
436 pixels+=line_size;\
|
|
437 block +=line_size;\
|
|
438 a= LD64(pixels );\
|
|
439 b= LD64(pixels+1);\
|
|
440 l0= (a&0x0303030303030303ULL)\
|
|
441 + (b&0x0303030303030303ULL)\
|
|
442 + 0x0202020202020202ULL;\
|
|
443 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
444 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
445 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
446 pixels+=line_size;\
|
|
447 block +=line_size;\
|
|
448 }\
|
|
449 }\
|
|
450 \
|
|
451 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
452 {\
|
|
453 int i;\
|
|
454 const uint64_t a= LD64(pixels );\
|
|
455 const uint64_t b= LD64(pixels+1);\
|
|
456 uint64_t l0= (a&0x0303030303030303ULL)\
|
|
457 + (b&0x0303030303030303ULL)\
|
|
458 + 0x0101010101010101ULL;\
|
|
459 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
460 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
461 uint64_t l1,h1;\
|
|
462 \
|
|
463 pixels+=line_size;\
|
|
464 for(i=0; i<h; i+=2){\
|
|
465 uint64_t a= LD64(pixels );\
|
|
466 uint64_t b= LD64(pixels+1);\
|
|
467 l1= (a&0x0303030303030303ULL)\
|
|
468 + (b&0x0303030303030303ULL);\
|
|
469 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
470 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
471 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
472 pixels+=line_size;\
|
|
473 block +=line_size;\
|
|
474 a= LD64(pixels );\
|
|
475 b= LD64(pixels+1);\
|
|
476 l0= (a&0x0303030303030303ULL)\
|
|
477 + (b&0x0303030303030303ULL)\
|
|
478 + 0x0101010101010101ULL;\
|
|
479 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
480 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
481 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
482 pixels+=line_size;\
|
|
483 block +=line_size;\
|
|
484 }\
|
|
485 }\
|
|
486 \
|
|
487 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
|
|
488 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
|
|
489 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
|
|
490 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
|
|
491 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
|
|
492 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
|
|
493 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
|
|
494
|
|
495 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
|
|
496 #else // 64 bit variant
|
|
497
|
|
498 #define PIXOP2(OPNAME, OP) \
|
|
499 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
500 int i;\
|
|
501 for(i=0; i<h; i++){\
|
|
502 OP(*((uint16_t*)(block )), LD16(pixels ));\
|
|
503 pixels+=line_size;\
|
|
504 block +=line_size;\
|
|
505 }\
|
|
506 }\
|
|
507 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
508 int i;\
|
|
509 for(i=0; i<h; i++){\
|
|
510 OP(*((uint32_t*)(block )), LD32(pixels ));\
|
|
511 pixels+=line_size;\
|
|
512 block +=line_size;\
|
|
513 }\
|
|
514 }\
|
|
515 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
516 int i;\
|
|
517 for(i=0; i<h; i++){\
|
|
518 OP(*((uint32_t*)(block )), LD32(pixels ));\
|
|
519 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
|
|
520 pixels+=line_size;\
|
|
521 block +=line_size;\
|
|
522 }\
|
|
523 }\
|
|
524 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
525 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
|
|
526 }\
|
|
527 \
|
|
528 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
529 int src_stride1, int src_stride2, int h){\
|
|
530 int i;\
|
|
531 for(i=0; i<h; i++){\
|
|
532 uint32_t a,b;\
|
|
533 a= LD32(&src1[i*src_stride1 ]);\
|
|
534 b= LD32(&src2[i*src_stride2 ]);\
|
|
535 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
|
|
536 a= LD32(&src1[i*src_stride1+4]);\
|
|
537 b= LD32(&src2[i*src_stride2+4]);\
|
|
538 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
|
|
539 }\
|
|
540 }\
|
|
541 \
|
|
542 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
543 int src_stride1, int src_stride2, int h){\
|
|
544 int i;\
|
|
545 for(i=0; i<h; i++){\
|
|
546 uint32_t a,b;\
|
|
547 a= LD32(&src1[i*src_stride1 ]);\
|
|
548 b= LD32(&src2[i*src_stride2 ]);\
|
|
549 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
|
|
550 a= LD32(&src1[i*src_stride1+4]);\
|
|
551 b= LD32(&src2[i*src_stride2+4]);\
|
|
552 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
|
|
553 }\
|
|
554 }\
|
|
555 \
|
|
556 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
557 int src_stride1, int src_stride2, int h){\
|
|
558 int i;\
|
|
559 for(i=0; i<h; i++){\
|
|
560 uint32_t a,b;\
|
|
561 a= LD32(&src1[i*src_stride1 ]);\
|
|
562 b= LD32(&src2[i*src_stride2 ]);\
|
|
563 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
|
|
564 }\
|
|
565 }\
|
|
566 \
|
|
567 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
568 int src_stride1, int src_stride2, int h){\
|
|
569 int i;\
|
|
570 for(i=0; i<h; i++){\
|
|
571 uint32_t a,b;\
|
|
572 a= LD16(&src1[i*src_stride1 ]);\
|
|
573 b= LD16(&src2[i*src_stride2 ]);\
|
|
574 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
|
|
575 }\
|
|
576 }\
|
|
577 \
|
|
578 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
579 int src_stride1, int src_stride2, int h){\
|
|
580 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
|
|
581 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
|
|
582 }\
|
|
583 \
|
|
584 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
585 int src_stride1, int src_stride2, int h){\
|
|
586 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
|
|
587 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
|
|
588 }\
|
|
589 \
|
|
590 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
591 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
|
592 }\
|
|
593 \
|
|
594 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
595 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
|
596 }\
|
|
597 \
|
|
598 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
599 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
|
600 }\
|
|
601 \
|
|
602 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
603 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
|
604 }\
|
|
605 \
|
|
606 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
|
607 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
608 int i;\
|
|
609 for(i=0; i<h; i++){\
|
|
610 uint32_t a, b, c, d, l0, l1, h0, h1;\
|
|
611 a= LD32(&src1[i*src_stride1]);\
|
|
612 b= LD32(&src2[i*src_stride2]);\
|
|
613 c= LD32(&src3[i*src_stride3]);\
|
|
614 d= LD32(&src4[i*src_stride4]);\
|
|
615 l0= (a&0x03030303UL)\
|
|
616 + (b&0x03030303UL)\
|
|
617 + 0x02020202UL;\
|
|
618 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
619 + ((b&0xFCFCFCFCUL)>>2);\
|
|
620 l1= (c&0x03030303UL)\
|
|
621 + (d&0x03030303UL);\
|
|
622 h1= ((c&0xFCFCFCFCUL)>>2)\
|
|
623 + ((d&0xFCFCFCFCUL)>>2);\
|
|
624 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
625 a= LD32(&src1[i*src_stride1+4]);\
|
|
626 b= LD32(&src2[i*src_stride2+4]);\
|
|
627 c= LD32(&src3[i*src_stride3+4]);\
|
|
628 d= LD32(&src4[i*src_stride4+4]);\
|
|
629 l0= (a&0x03030303UL)\
|
|
630 + (b&0x03030303UL)\
|
|
631 + 0x02020202UL;\
|
|
632 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
633 + ((b&0xFCFCFCFCUL)>>2);\
|
|
634 l1= (c&0x03030303UL)\
|
|
635 + (d&0x03030303UL);\
|
|
636 h1= ((c&0xFCFCFCFCUL)>>2)\
|
|
637 + ((d&0xFCFCFCFCUL)>>2);\
|
|
638 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
639 }\
|
|
640 }\
|
|
641 \
|
|
642 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
643 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
|
644 }\
|
|
645 \
|
|
646 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
647 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
|
648 }\
|
|
649 \
|
|
650 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
651 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
|
652 }\
|
|
653 \
|
|
654 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
655 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
|
656 }\
|
|
657 \
|
|
658 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
|
659 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
660 int i;\
|
|
661 for(i=0; i<h; i++){\
|
|
662 uint32_t a, b, c, d, l0, l1, h0, h1;\
|
|
663 a= LD32(&src1[i*src_stride1]);\
|
|
664 b= LD32(&src2[i*src_stride2]);\
|
|
665 c= LD32(&src3[i*src_stride3]);\
|
|
666 d= LD32(&src4[i*src_stride4]);\
|
|
667 l0= (a&0x03030303UL)\
|
|
668 + (b&0x03030303UL)\
|
|
669 + 0x01010101UL;\
|
|
670 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
671 + ((b&0xFCFCFCFCUL)>>2);\
|
|
672 l1= (c&0x03030303UL)\
|
|
673 + (d&0x03030303UL);\
|
|
674 h1= ((c&0xFCFCFCFCUL)>>2)\
|
|
675 + ((d&0xFCFCFCFCUL)>>2);\
|
|
676 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
677 a= LD32(&src1[i*src_stride1+4]);\
|
|
678 b= LD32(&src2[i*src_stride2+4]);\
|
|
679 c= LD32(&src3[i*src_stride3+4]);\
|
|
680 d= LD32(&src4[i*src_stride4+4]);\
|
|
681 l0= (a&0x03030303UL)\
|
|
682 + (b&0x03030303UL)\
|
|
683 + 0x01010101UL;\
|
|
684 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
685 + ((b&0xFCFCFCFCUL)>>2);\
|
|
686 l1= (c&0x03030303UL)\
|
|
687 + (d&0x03030303UL);\
|
|
688 h1= ((c&0xFCFCFCFCUL)>>2)\
|
|
689 + ((d&0xFCFCFCFCUL)>>2);\
|
|
690 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
691 }\
|
|
692 }\
|
|
693 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
|
694 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
695 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
|
696 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
|
697 }\
|
|
698 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
|
699 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
700 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
|
701 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
|
702 }\
|
|
703 \
|
|
704 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
705 {\
|
|
706 int i, a0, b0, a1, b1;\
|
|
707 a0= pixels[0];\
|
|
708 b0= pixels[1] + 2;\
|
|
709 a0 += b0;\
|
|
710 b0 += pixels[2];\
|
|
711 \
|
|
712 pixels+=line_size;\
|
|
713 for(i=0; i<h; i+=2){\
|
|
714 a1= pixels[0];\
|
|
715 b1= pixels[1];\
|
|
716 a1 += b1;\
|
|
717 b1 += pixels[2];\
|
|
718 \
|
|
719 block[0]= (a1+a0)>>2; /* FIXME non put */\
|
|
720 block[1]= (b1+b0)>>2;\
|
|
721 \
|
|
722 pixels+=line_size;\
|
|
723 block +=line_size;\
|
|
724 \
|
|
725 a0= pixels[0];\
|
|
726 b0= pixels[1] + 2;\
|
|
727 a0 += b0;\
|
|
728 b0 += pixels[2];\
|
|
729 \
|
|
730 block[0]= (a1+a0)>>2;\
|
|
731 block[1]= (b1+b0)>>2;\
|
|
732 pixels+=line_size;\
|
|
733 block +=line_size;\
|
|
734 }\
|
|
735 }\
|
|
736 \
|
|
737 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
738 {\
|
|
739 int i;\
|
|
740 const uint32_t a= LD32(pixels );\
|
|
741 const uint32_t b= LD32(pixels+1);\
|
|
742 uint32_t l0= (a&0x03030303UL)\
|
|
743 + (b&0x03030303UL)\
|
|
744 + 0x02020202UL;\
|
|
745 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
746 + ((b&0xFCFCFCFCUL)>>2);\
|
|
747 uint32_t l1,h1;\
|
|
748 \
|
|
749 pixels+=line_size;\
|
|
750 for(i=0; i<h; i+=2){\
|
|
751 uint32_t a= LD32(pixels );\
|
|
752 uint32_t b= LD32(pixels+1);\
|
|
753 l1= (a&0x03030303UL)\
|
|
754 + (b&0x03030303UL);\
|
|
755 h1= ((a&0xFCFCFCFCUL)>>2)\
|
|
756 + ((b&0xFCFCFCFCUL)>>2);\
|
|
757 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
758 pixels+=line_size;\
|
|
759 block +=line_size;\
|
|
760 a= LD32(pixels );\
|
|
761 b= LD32(pixels+1);\
|
|
762 l0= (a&0x03030303UL)\
|
|
763 + (b&0x03030303UL)\
|
|
764 + 0x02020202UL;\
|
|
765 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
766 + ((b&0xFCFCFCFCUL)>>2);\
|
|
767 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
768 pixels+=line_size;\
|
|
769 block +=line_size;\
|
|
770 }\
|
|
771 }\
|
|
772 \
|
|
773 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
774 {\
|
|
775 int j;\
|
|
776 for(j=0; j<2; j++){\
|
|
777 int i;\
|
|
778 const uint32_t a= LD32(pixels );\
|
|
779 const uint32_t b= LD32(pixels+1);\
|
|
780 uint32_t l0= (a&0x03030303UL)\
|
|
781 + (b&0x03030303UL)\
|
|
782 + 0x02020202UL;\
|
|
783 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
784 + ((b&0xFCFCFCFCUL)>>2);\
|
|
785 uint32_t l1,h1;\
|
|
786 \
|
|
787 pixels+=line_size;\
|
|
788 for(i=0; i<h; i+=2){\
|
|
789 uint32_t a= LD32(pixels );\
|
|
790 uint32_t b= LD32(pixels+1);\
|
|
791 l1= (a&0x03030303UL)\
|
|
792 + (b&0x03030303UL);\
|
|
793 h1= ((a&0xFCFCFCFCUL)>>2)\
|
|
794 + ((b&0xFCFCFCFCUL)>>2);\
|
|
795 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
796 pixels+=line_size;\
|
|
797 block +=line_size;\
|
|
798 a= LD32(pixels );\
|
|
799 b= LD32(pixels+1);\
|
|
800 l0= (a&0x03030303UL)\
|
|
801 + (b&0x03030303UL)\
|
|
802 + 0x02020202UL;\
|
|
803 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
804 + ((b&0xFCFCFCFCUL)>>2);\
|
|
805 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
806 pixels+=line_size;\
|
|
807 block +=line_size;\
|
|
808 }\
|
|
809 pixels+=4-line_size*(h+1);\
|
|
810 block +=4-line_size*h;\
|
|
811 }\
|
|
812 }\
|
|
813 \
|
|
814 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
815 {\
|
|
816 int j;\
|
|
817 for(j=0; j<2; j++){\
|
|
818 int i;\
|
|
819 const uint32_t a= LD32(pixels );\
|
|
820 const uint32_t b= LD32(pixels+1);\
|
|
821 uint32_t l0= (a&0x03030303UL)\
|
|
822 + (b&0x03030303UL)\
|
|
823 + 0x01010101UL;\
|
|
824 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
825 + ((b&0xFCFCFCFCUL)>>2);\
|
|
826 uint32_t l1,h1;\
|
|
827 \
|
|
828 pixels+=line_size;\
|
|
829 for(i=0; i<h; i+=2){\
|
|
830 uint32_t a= LD32(pixels );\
|
|
831 uint32_t b= LD32(pixels+1);\
|
|
832 l1= (a&0x03030303UL)\
|
|
833 + (b&0x03030303UL);\
|
|
834 h1= ((a&0xFCFCFCFCUL)>>2)\
|
|
835 + ((b&0xFCFCFCFCUL)>>2);\
|
|
836 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
837 pixels+=line_size;\
|
|
838 block +=line_size;\
|
|
839 a= LD32(pixels );\
|
|
840 b= LD32(pixels+1);\
|
|
841 l0= (a&0x03030303UL)\
|
|
842 + (b&0x03030303UL)\
|
|
843 + 0x01010101UL;\
|
|
844 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
845 + ((b&0xFCFCFCFCUL)>>2);\
|
|
846 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
847 pixels+=line_size;\
|
|
848 block +=line_size;\
|
|
849 }\
|
|
850 pixels+=4-line_size*(h+1);\
|
|
851 block +=4-line_size*h;\
|
|
852 }\
|
|
853 }\
|
|
854 \
|
|
855 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
|
|
856 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
|
|
857 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
|
|
858 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
|
|
859 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
|
|
860 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
|
|
861 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
|
|
862 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
|
|
863
|
|
864 #define op_avg(a, b) a = rnd_avg32(a, b)
|
|
865 #endif
|
|
866 #define op_put(a, b) a = b
|
|
867
|
|
868 //PIXOP2(avg, op_avg)
|
|
869 //PIXOP2(put, op_put)
|
|
870 #undef op_avg
|
|
871 #undef op_put
|
|
872
|
|
873 #define avg2(a,b) ((a+b+1)>>1)
|
|
874 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
|
|
875
|
|
876 /* init static data */
|
|
877 void dsputil_static_init(void)
|
|
878 {
|
|
879 int i;
|
|
880
|
|
881 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
|
|
882 for(i=0;i<MAX_NEG_CROP;i++) {
|
|
883 cropTbl[i] = 0;
|
|
884 cropTbl[i + MAX_NEG_CROP + 256] = 255;
|
|
885 }
|
|
886
|
|
887 for(i=0;i<512;i++) {
|
|
888 squareTbl[i] = (i - 256) * (i - 256);
|
|
889 }
|
|
890
|
|
891 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
|
|
892 }
|