808
|
1 /*
|
|
2 * DSP utils
|
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard.
|
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
|
5 *
|
832
|
6 * This file is part of FFmpeg.
|
|
7 *
|
|
8 * FFmpeg is free software; you can redistribute it and/or
|
808
|
9 * modify it under the terms of the GNU Lesser General Public
|
|
10 * License as published by the Free Software Foundation; either
|
832
|
11 * version 2.1 of the License, or (at your option) any later version.
|
808
|
12 *
|
832
|
13 * FFmpeg is distributed in the hope that it will be useful,
|
808
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
16 * Lesser General Public License for more details.
|
|
17 *
|
|
18 * You should have received a copy of the GNU Lesser General Public
|
832
|
19 * License along with FFmpeg; if not, write to the Free Software
|
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
808
|
21 *
|
|
22 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
|
|
23 */
|
832
|
24
|
808
|
25 /**
|
|
26 * @file dsputil.c
|
|
27 * DSP utils
|
|
28 */
|
832
|
29
|
808
|
30 #include "avcodec.h"
|
|
31 #include "dsputil.h"
|
|
32 #include "simple_idct.h"
|
832
|
33 #include "faandct.h"
|
|
34 #include "snow.h"
|
808
|
35
|
832
|
36 /* snow.c */
|
|
37 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
|
|
38
|
|
39 /* vorbis.c */
|
|
40 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
|
|
41
|
|
42 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
|
|
43 uint32_t squareTbl[512] = {0, };
|
808
|
44
|
|
45 const uint8_t ff_zigzag_direct[64] = {
|
|
46 0, 1, 8, 16, 9, 2, 3, 10,
|
|
47 17, 24, 32, 25, 18, 11, 4, 5,
|
|
48 12, 19, 26, 33, 40, 48, 41, 34,
|
|
49 27, 20, 13, 6, 7, 14, 21, 28,
|
|
50 35, 42, 49, 56, 57, 50, 43, 36,
|
|
51 29, 22, 15, 23, 30, 37, 44, 51,
|
|
52 58, 59, 52, 45, 38, 31, 39, 46,
|
|
53 53, 60, 61, 54, 47, 55, 62, 63
|
|
54 };
|
|
55
|
|
56 /* Specific zigzag scan for 248 idct. NOTE that unlike the
|
|
57 specification, we interleave the fields */
|
|
58 const uint8_t ff_zigzag248_direct[64] = {
|
|
59 0, 8, 1, 9, 16, 24, 2, 10,
|
|
60 17, 25, 32, 40, 48, 56, 33, 41,
|
|
61 18, 26, 3, 11, 4, 12, 19, 27,
|
|
62 34, 42, 49, 57, 50, 58, 35, 43,
|
|
63 20, 28, 5, 13, 6, 14, 21, 29,
|
|
64 36, 44, 51, 59, 52, 60, 37, 45,
|
|
65 22, 30, 7, 15, 23, 31, 38, 46,
|
|
66 53, 61, 54, 62, 39, 47, 55, 63,
|
|
67 };
|
|
68
|
|
69 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
|
832
|
70 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, };
|
808
|
71
|
|
72 const uint8_t ff_alternate_horizontal_scan[64] = {
|
832
|
73 0, 1, 2, 3, 8, 9, 16, 17,
|
808
|
74 10, 11, 4, 5, 6, 7, 15, 14,
|
832
|
75 13, 12, 19, 18, 24, 25, 32, 33,
|
808
|
76 26, 27, 20, 21, 22, 23, 28, 29,
|
832
|
77 30, 31, 34, 35, 40, 41, 48, 49,
|
808
|
78 42, 43, 36, 37, 38, 39, 44, 45,
|
832
|
79 46, 47, 50, 51, 56, 57, 58, 59,
|
808
|
80 52, 53, 54, 55, 60, 61, 62, 63,
|
|
81 };
|
|
82
|
|
83 const uint8_t ff_alternate_vertical_scan[64] = {
|
832
|
84 0, 8, 16, 24, 1, 9, 2, 10,
|
808
|
85 17, 25, 32, 40, 48, 56, 57, 49,
|
832
|
86 41, 33, 26, 18, 3, 11, 4, 12,
|
808
|
87 19, 27, 34, 42, 50, 58, 35, 43,
|
832
|
88 51, 59, 20, 28, 5, 13, 6, 14,
|
808
|
89 21, 29, 36, 44, 52, 60, 37, 45,
|
832
|
90 53, 61, 22, 30, 7, 15, 23, 31,
|
808
|
91 38, 46, 54, 62, 39, 47, 55, 63,
|
|
92 };
|
|
93
|
|
94 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
|
|
95 const uint32_t inverse[256]={
|
832
|
96 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
|
|
97 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
|
|
98 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
|
|
99 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
|
|
100 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
|
|
101 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
|
|
102 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
|
|
103 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
|
|
104 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
|
|
105 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
|
|
106 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
|
|
107 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
|
|
108 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
|
|
109 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
|
|
110 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
|
|
111 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
|
|
112 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
|
|
113 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
|
|
114 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
|
|
115 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
|
|
116 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
|
|
117 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
|
|
118 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
|
|
119 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
|
|
120 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
|
|
121 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
|
|
122 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
|
|
123 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
|
|
124 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
|
|
125 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
|
|
126 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
|
808
|
127 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
|
|
128 };
|
|
129
|
|
130 /* Input permutation for the simple_idct_mmx */
|
|
131 static const uint8_t simple_mmx_permutation[64]={
|
832
|
132 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
|
133 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
|
134 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
|
135 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
|
136 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
|
137 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
|
138 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
|
139 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
808
|
140 };
|
832
|
141
|
808
|
142 static int pix_sum_c(uint8_t * pix, int line_size)
|
|
143 {
|
|
144 int s, i, j;
|
|
145
|
|
146 s = 0;
|
|
147 for (i = 0; i < 16; i++) {
|
832
|
148 for (j = 0; j < 16; j += 8) {
|
|
149 s += pix[0];
|
|
150 s += pix[1];
|
|
151 s += pix[2];
|
|
152 s += pix[3];
|
|
153 s += pix[4];
|
|
154 s += pix[5];
|
|
155 s += pix[6];
|
|
156 s += pix[7];
|
|
157 pix += 8;
|
|
158 }
|
|
159 pix += line_size - 16;
|
808
|
160 }
|
|
161 return s;
|
|
162 }
|
|
163
|
|
164 static int pix_norm1_c(uint8_t * pix, int line_size)
|
|
165 {
|
|
166 int s, i, j;
|
|
167 uint32_t *sq = squareTbl + 256;
|
|
168
|
|
169 s = 0;
|
|
170 for (i = 0; i < 16; i++) {
|
832
|
171 for (j = 0; j < 16; j += 8) {
|
808
|
172 #if 0
|
832
|
173 s += sq[pix[0]];
|
|
174 s += sq[pix[1]];
|
|
175 s += sq[pix[2]];
|
|
176 s += sq[pix[3]];
|
|
177 s += sq[pix[4]];
|
|
178 s += sq[pix[5]];
|
|
179 s += sq[pix[6]];
|
|
180 s += sq[pix[7]];
|
808
|
181 #else
|
|
182 #if LONG_MAX > 2147483647
|
832
|
183 register uint64_t x=*(uint64_t*)pix;
|
|
184 s += sq[x&0xff];
|
|
185 s += sq[(x>>8)&0xff];
|
|
186 s += sq[(x>>16)&0xff];
|
|
187 s += sq[(x>>24)&0xff];
|
808
|
188 s += sq[(x>>32)&0xff];
|
|
189 s += sq[(x>>40)&0xff];
|
|
190 s += sq[(x>>48)&0xff];
|
|
191 s += sq[(x>>56)&0xff];
|
|
192 #else
|
832
|
193 register uint32_t x=*(uint32_t*)pix;
|
|
194 s += sq[x&0xff];
|
|
195 s += sq[(x>>8)&0xff];
|
|
196 s += sq[(x>>16)&0xff];
|
|
197 s += sq[(x>>24)&0xff];
|
808
|
198 x=*(uint32_t*)(pix+4);
|
|
199 s += sq[x&0xff];
|
|
200 s += sq[(x>>8)&0xff];
|
|
201 s += sq[(x>>16)&0xff];
|
|
202 s += sq[(x>>24)&0xff];
|
|
203 #endif
|
|
204 #endif
|
832
|
205 pix += 8;
|
|
206 }
|
|
207 pix += line_size - 16;
|
808
|
208 }
|
|
209 return s;
|
|
210 }
|
|
211
|
|
212 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
|
|
213 int i;
|
832
|
214
|
808
|
215 for(i=0; i+8<=w; i+=8){
|
|
216 dst[i+0]= bswap_32(src[i+0]);
|
|
217 dst[i+1]= bswap_32(src[i+1]);
|
|
218 dst[i+2]= bswap_32(src[i+2]);
|
|
219 dst[i+3]= bswap_32(src[i+3]);
|
|
220 dst[i+4]= bswap_32(src[i+4]);
|
|
221 dst[i+5]= bswap_32(src[i+5]);
|
|
222 dst[i+6]= bswap_32(src[i+6]);
|
|
223 dst[i+7]= bswap_32(src[i+7]);
|
|
224 }
|
|
225 for(;i<w; i++){
|
|
226 dst[i+0]= bswap_32(src[i+0]);
|
|
227 }
|
|
228 }
|
|
229
|
832
|
230 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
|
|
231 {
|
|
232 int s, i;
|
|
233 uint32_t *sq = squareTbl + 256;
|
|
234
|
|
235 s = 0;
|
|
236 for (i = 0; i < h; i++) {
|
|
237 s += sq[pix1[0] - pix2[0]];
|
|
238 s += sq[pix1[1] - pix2[1]];
|
|
239 s += sq[pix1[2] - pix2[2]];
|
|
240 s += sq[pix1[3] - pix2[3]];
|
|
241 pix1 += line_size;
|
|
242 pix2 += line_size;
|
|
243 }
|
|
244 return s;
|
|
245 }
|
|
246
|
808
|
247 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
|
|
248 {
|
|
249 int s, i;
|
|
250 uint32_t *sq = squareTbl + 256;
|
|
251
|
|
252 s = 0;
|
|
253 for (i = 0; i < h; i++) {
|
|
254 s += sq[pix1[0] - pix2[0]];
|
|
255 s += sq[pix1[1] - pix2[1]];
|
|
256 s += sq[pix1[2] - pix2[2]];
|
|
257 s += sq[pix1[3] - pix2[3]];
|
|
258 s += sq[pix1[4] - pix2[4]];
|
|
259 s += sq[pix1[5] - pix2[5]];
|
|
260 s += sq[pix1[6] - pix2[6]];
|
|
261 s += sq[pix1[7] - pix2[7]];
|
|
262 pix1 += line_size;
|
|
263 pix2 += line_size;
|
|
264 }
|
|
265 return s;
|
|
266 }
|
|
267
|
|
268 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
269 {
|
|
270 int s, i;
|
|
271 uint32_t *sq = squareTbl + 256;
|
|
272
|
|
273 s = 0;
|
|
274 for (i = 0; i < h; i++) {
|
|
275 s += sq[pix1[ 0] - pix2[ 0]];
|
|
276 s += sq[pix1[ 1] - pix2[ 1]];
|
|
277 s += sq[pix1[ 2] - pix2[ 2]];
|
|
278 s += sq[pix1[ 3] - pix2[ 3]];
|
|
279 s += sq[pix1[ 4] - pix2[ 4]];
|
|
280 s += sq[pix1[ 5] - pix2[ 5]];
|
|
281 s += sq[pix1[ 6] - pix2[ 6]];
|
|
282 s += sq[pix1[ 7] - pix2[ 7]];
|
|
283 s += sq[pix1[ 8] - pix2[ 8]];
|
|
284 s += sq[pix1[ 9] - pix2[ 9]];
|
|
285 s += sq[pix1[10] - pix2[10]];
|
|
286 s += sq[pix1[11] - pix2[11]];
|
|
287 s += sq[pix1[12] - pix2[12]];
|
|
288 s += sq[pix1[13] - pix2[13]];
|
|
289 s += sq[pix1[14] - pix2[14]];
|
|
290 s += sq[pix1[15] - pix2[15]];
|
|
291
|
|
292 pix1 += line_size;
|
|
293 pix2 += line_size;
|
|
294 }
|
|
295 return s;
|
|
296 }
|
|
297
|
832
|
298
|
|
299 #ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c
|
|
300 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
|
|
301 int s, i, j;
|
|
302 const int dec_count= w==8 ? 3 : 4;
|
|
303 int tmp[32*32];
|
|
304 int level, ori;
|
|
305 static const int scale[2][2][4][4]={
|
|
306 {
|
|
307 {
|
|
308 // 9/7 8x8 dec=3
|
|
309 {268, 239, 239, 213},
|
|
310 { 0, 224, 224, 152},
|
|
311 { 0, 135, 135, 110},
|
|
312 },{
|
|
313 // 9/7 16x16 or 32x32 dec=4
|
|
314 {344, 310, 310, 280},
|
|
315 { 0, 320, 320, 228},
|
|
316 { 0, 175, 175, 136},
|
|
317 { 0, 129, 129, 102},
|
|
318 }
|
|
319 },{
|
|
320 {
|
|
321 // 5/3 8x8 dec=3
|
|
322 {275, 245, 245, 218},
|
|
323 { 0, 230, 230, 156},
|
|
324 { 0, 138, 138, 113},
|
|
325 },{
|
|
326 // 5/3 16x16 or 32x32 dec=4
|
|
327 {352, 317, 317, 286},
|
|
328 { 0, 328, 328, 233},
|
|
329 { 0, 180, 180, 140},
|
|
330 { 0, 132, 132, 105},
|
|
331 }
|
|
332 }
|
|
333 };
|
|
334
|
|
335 for (i = 0; i < h; i++) {
|
|
336 for (j = 0; j < w; j+=4) {
|
|
337 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
|
|
338 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
|
|
339 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
|
|
340 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
|
|
341 }
|
|
342 pix1 += line_size;
|
|
343 pix2 += line_size;
|
|
344 }
|
|
345
|
|
346 ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
|
|
347
|
|
348 s=0;
|
|
349 assert(w==h);
|
|
350 for(level=0; level<dec_count; level++){
|
|
351 for(ori= level ? 1 : 0; ori<4; ori++){
|
|
352 int size= w>>(dec_count-level);
|
|
353 int sx= (ori&1) ? size : 0;
|
|
354 int stride= 32<<(dec_count-level);
|
|
355 int sy= (ori&2) ? stride>>1 : 0;
|
|
356
|
|
357 for(i=0; i<size; i++){
|
|
358 for(j=0; j<size; j++){
|
|
359 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
|
|
360 s += FFABS(v);
|
|
361 }
|
|
362 }
|
|
363 }
|
|
364 }
|
|
365 assert(s>=0);
|
|
366 return s>>9;
|
|
367 }
|
|
368
|
|
369 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
|
370 return w_c(v, pix1, pix2, line_size, 8, h, 1);
|
|
371 }
|
|
372
|
|
373 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
|
374 return w_c(v, pix1, pix2, line_size, 8, h, 0);
|
|
375 }
|
|
376
|
|
377 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
|
378 return w_c(v, pix1, pix2, line_size, 16, h, 1);
|
|
379 }
|
|
380
|
|
381 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
|
382 return w_c(v, pix1, pix2, line_size, 16, h, 0);
|
|
383 }
|
|
384
|
|
385 int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
|
386 return w_c(v, pix1, pix2, line_size, 32, h, 1);
|
|
387 }
|
|
388
|
|
389 int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
|
|
390 return w_c(v, pix1, pix2, line_size, 32, h, 0);
|
|
391 }
|
|
392 #endif
|
|
393
|
808
|
394 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
|
|
395 {
|
|
396 int i;
|
|
397
|
|
398 /* read the pixels */
|
|
399 for(i=0;i<8;i++) {
|
|
400 block[0] = pixels[0];
|
|
401 block[1] = pixels[1];
|
|
402 block[2] = pixels[2];
|
|
403 block[3] = pixels[3];
|
|
404 block[4] = pixels[4];
|
|
405 block[5] = pixels[5];
|
|
406 block[6] = pixels[6];
|
|
407 block[7] = pixels[7];
|
|
408 pixels += line_size;
|
|
409 block += 8;
|
|
410 }
|
|
411 }
|
|
412
|
|
413 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
|
832
|
414 const uint8_t *s2, int stride){
|
808
|
415 int i;
|
|
416
|
|
417 /* read the pixels */
|
|
418 for(i=0;i<8;i++) {
|
|
419 block[0] = s1[0] - s2[0];
|
|
420 block[1] = s1[1] - s2[1];
|
|
421 block[2] = s1[2] - s2[2];
|
|
422 block[3] = s1[3] - s2[3];
|
|
423 block[4] = s1[4] - s2[4];
|
|
424 block[5] = s1[5] - s2[5];
|
|
425 block[6] = s1[6] - s2[6];
|
|
426 block[7] = s1[7] - s2[7];
|
|
427 s1 += stride;
|
|
428 s2 += stride;
|
|
429 block += 8;
|
|
430 }
|
|
431 }
|
|
432
|
|
433
|
|
434 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
832
|
435 int line_size)
|
808
|
436 {
|
|
437 int i;
|
|
438 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
832
|
439
|
808
|
440 /* read the pixels */
|
|
441 for(i=0;i<8;i++) {
|
|
442 pixels[0] = cm[block[0]];
|
|
443 pixels[1] = cm[block[1]];
|
|
444 pixels[2] = cm[block[2]];
|
|
445 pixels[3] = cm[block[3]];
|
|
446 pixels[4] = cm[block[4]];
|
|
447 pixels[5] = cm[block[5]];
|
|
448 pixels[6] = cm[block[6]];
|
|
449 pixels[7] = cm[block[7]];
|
|
450
|
|
451 pixels += line_size;
|
|
452 block += 8;
|
|
453 }
|
|
454 }
|
|
455
|
832
|
456 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|
457 int line_size)
|
|
458 {
|
|
459 int i;
|
|
460 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
461
|
|
462 /* read the pixels */
|
|
463 for(i=0;i<4;i++) {
|
|
464 pixels[0] = cm[block[0]];
|
|
465 pixels[1] = cm[block[1]];
|
|
466 pixels[2] = cm[block[2]];
|
|
467 pixels[3] = cm[block[3]];
|
|
468
|
|
469 pixels += line_size;
|
|
470 block += 8;
|
|
471 }
|
|
472 }
|
|
473
|
|
474 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|
475 int line_size)
|
|
476 {
|
|
477 int i;
|
|
478 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
479
|
|
480 /* read the pixels */
|
|
481 for(i=0;i<2;i++) {
|
|
482 pixels[0] = cm[block[0]];
|
|
483 pixels[1] = cm[block[1]];
|
|
484
|
|
485 pixels += line_size;
|
|
486 block += 8;
|
|
487 }
|
|
488 }
|
|
489
|
|
490 static void put_signed_pixels_clamped_c(const DCTELEM *block,
|
|
491 uint8_t *restrict pixels,
|
|
492 int line_size)
|
|
493 {
|
|
494 int i, j;
|
|
495
|
|
496 for (i = 0; i < 8; i++) {
|
|
497 for (j = 0; j < 8; j++) {
|
|
498 if (*block < -128)
|
|
499 *pixels = 0;
|
|
500 else if (*block > 127)
|
|
501 *pixels = 255;
|
|
502 else
|
|
503 *pixels = (uint8_t)(*block + 128);
|
|
504 block++;
|
|
505 pixels++;
|
|
506 }
|
|
507 pixels += (line_size - 8);
|
|
508 }
|
|
509 }
|
|
510
|
808
|
511 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|
512 int line_size)
|
|
513 {
|
|
514 int i;
|
|
515 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
832
|
516
|
808
|
517 /* read the pixels */
|
|
518 for(i=0;i<8;i++) {
|
|
519 pixels[0] = cm[pixels[0] + block[0]];
|
|
520 pixels[1] = cm[pixels[1] + block[1]];
|
|
521 pixels[2] = cm[pixels[2] + block[2]];
|
|
522 pixels[3] = cm[pixels[3] + block[3]];
|
|
523 pixels[4] = cm[pixels[4] + block[4]];
|
|
524 pixels[5] = cm[pixels[5] + block[5]];
|
|
525 pixels[6] = cm[pixels[6] + block[6]];
|
|
526 pixels[7] = cm[pixels[7] + block[7]];
|
|
527 pixels += line_size;
|
|
528 block += 8;
|
|
529 }
|
|
530 }
|
832
|
531
|
|
532 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|
533 int line_size)
|
|
534 {
|
|
535 int i;
|
|
536 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
537
|
|
538 /* read the pixels */
|
|
539 for(i=0;i<4;i++) {
|
|
540 pixels[0] = cm[pixels[0] + block[0]];
|
|
541 pixels[1] = cm[pixels[1] + block[1]];
|
|
542 pixels[2] = cm[pixels[2] + block[2]];
|
|
543 pixels[3] = cm[pixels[3] + block[3]];
|
|
544 pixels += line_size;
|
|
545 block += 8;
|
|
546 }
|
|
547 }
|
|
548
|
|
549 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
|
|
550 int line_size)
|
|
551 {
|
|
552 int i;
|
|
553 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
554
|
|
555 /* read the pixels */
|
|
556 for(i=0;i<2;i++) {
|
|
557 pixels[0] = cm[pixels[0] + block[0]];
|
|
558 pixels[1] = cm[pixels[1] + block[1]];
|
|
559 pixels += line_size;
|
|
560 block += 8;
|
|
561 }
|
|
562 }
|
|
563
|
|
564 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
|
|
565 {
|
|
566 int i;
|
|
567 for(i=0;i<8;i++) {
|
|
568 pixels[0] += block[0];
|
|
569 pixels[1] += block[1];
|
|
570 pixels[2] += block[2];
|
|
571 pixels[3] += block[3];
|
|
572 pixels[4] += block[4];
|
|
573 pixels[5] += block[5];
|
|
574 pixels[6] += block[6];
|
|
575 pixels[7] += block[7];
|
|
576 pixels += line_size;
|
|
577 block += 8;
|
|
578 }
|
|
579 }
|
|
580
|
|
581 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
|
|
582 {
|
|
583 int i;
|
|
584 for(i=0;i<4;i++) {
|
|
585 pixels[0] += block[0];
|
|
586 pixels[1] += block[1];
|
|
587 pixels[2] += block[2];
|
|
588 pixels[3] += block[3];
|
|
589 pixels += line_size;
|
|
590 block += 4;
|
|
591 }
|
|
592 }
|
|
593
|
808
|
594 #if 0
|
|
595
|
|
596 #define PIXOP2(OPNAME, OP) \
|
|
597 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
598 {\
|
|
599 int i;\
|
|
600 for(i=0; i<h; i++){\
|
|
601 OP(*((uint64_t*)block), LD64(pixels));\
|
|
602 pixels+=line_size;\
|
|
603 block +=line_size;\
|
|
604 }\
|
|
605 }\
|
|
606 \
|
|
607 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
608 {\
|
|
609 int i;\
|
|
610 for(i=0; i<h; i++){\
|
|
611 const uint64_t a= LD64(pixels );\
|
|
612 const uint64_t b= LD64(pixels+1);\
|
|
613 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
614 pixels+=line_size;\
|
|
615 block +=line_size;\
|
|
616 }\
|
|
617 }\
|
|
618 \
|
|
619 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
620 {\
|
|
621 int i;\
|
|
622 for(i=0; i<h; i++){\
|
|
623 const uint64_t a= LD64(pixels );\
|
|
624 const uint64_t b= LD64(pixels+1);\
|
|
625 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
626 pixels+=line_size;\
|
|
627 block +=line_size;\
|
|
628 }\
|
|
629 }\
|
|
630 \
|
|
631 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
632 {\
|
|
633 int i;\
|
|
634 for(i=0; i<h; i++){\
|
|
635 const uint64_t a= LD64(pixels );\
|
|
636 const uint64_t b= LD64(pixels+line_size);\
|
|
637 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
638 pixels+=line_size;\
|
|
639 block +=line_size;\
|
|
640 }\
|
|
641 }\
|
|
642 \
|
|
643 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
644 {\
|
|
645 int i;\
|
|
646 for(i=0; i<h; i++){\
|
|
647 const uint64_t a= LD64(pixels );\
|
|
648 const uint64_t b= LD64(pixels+line_size);\
|
|
649 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
|
650 pixels+=line_size;\
|
|
651 block +=line_size;\
|
|
652 }\
|
|
653 }\
|
|
654 \
|
|
655 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
656 {\
|
|
657 int i;\
|
|
658 const uint64_t a= LD64(pixels );\
|
|
659 const uint64_t b= LD64(pixels+1);\
|
|
660 uint64_t l0= (a&0x0303030303030303ULL)\
|
|
661 + (b&0x0303030303030303ULL)\
|
|
662 + 0x0202020202020202ULL;\
|
|
663 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
664 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
665 uint64_t l1,h1;\
|
|
666 \
|
|
667 pixels+=line_size;\
|
|
668 for(i=0; i<h; i+=2){\
|
|
669 uint64_t a= LD64(pixels );\
|
|
670 uint64_t b= LD64(pixels+1);\
|
|
671 l1= (a&0x0303030303030303ULL)\
|
|
672 + (b&0x0303030303030303ULL);\
|
|
673 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
674 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
675 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
676 pixels+=line_size;\
|
|
677 block +=line_size;\
|
|
678 a= LD64(pixels );\
|
|
679 b= LD64(pixels+1);\
|
|
680 l0= (a&0x0303030303030303ULL)\
|
|
681 + (b&0x0303030303030303ULL)\
|
|
682 + 0x0202020202020202ULL;\
|
|
683 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
684 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
685 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
686 pixels+=line_size;\
|
|
687 block +=line_size;\
|
|
688 }\
|
|
689 }\
|
|
690 \
|
|
691 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
692 {\
|
|
693 int i;\
|
|
694 const uint64_t a= LD64(pixels );\
|
|
695 const uint64_t b= LD64(pixels+1);\
|
|
696 uint64_t l0= (a&0x0303030303030303ULL)\
|
|
697 + (b&0x0303030303030303ULL)\
|
|
698 + 0x0101010101010101ULL;\
|
|
699 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
700 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
701 uint64_t l1,h1;\
|
|
702 \
|
|
703 pixels+=line_size;\
|
|
704 for(i=0; i<h; i+=2){\
|
|
705 uint64_t a= LD64(pixels );\
|
|
706 uint64_t b= LD64(pixels+1);\
|
|
707 l1= (a&0x0303030303030303ULL)\
|
|
708 + (b&0x0303030303030303ULL);\
|
|
709 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
710 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
711 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
712 pixels+=line_size;\
|
|
713 block +=line_size;\
|
|
714 a= LD64(pixels );\
|
|
715 b= LD64(pixels+1);\
|
|
716 l0= (a&0x0303030303030303ULL)\
|
|
717 + (b&0x0303030303030303ULL)\
|
|
718 + 0x0101010101010101ULL;\
|
|
719 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
|
720 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
|
721 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
|
722 pixels+=line_size;\
|
|
723 block +=line_size;\
|
|
724 }\
|
|
725 }\
|
|
726 \
|
|
727 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
|
|
728 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
|
|
729 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
|
|
730 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
|
|
731 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
|
|
732 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
|
|
733 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
|
|
734
|
|
735 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
|
|
736 #else // 64 bit variant
|
|
737
|
|
738 #define PIXOP2(OPNAME, OP) \
|
|
739 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
740 int i;\
|
|
741 for(i=0; i<h; i++){\
|
|
742 OP(*((uint16_t*)(block )), LD16(pixels ));\
|
|
743 pixels+=line_size;\
|
|
744 block +=line_size;\
|
|
745 }\
|
|
746 }\
|
|
747 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
748 int i;\
|
|
749 for(i=0; i<h; i++){\
|
|
750 OP(*((uint32_t*)(block )), LD32(pixels ));\
|
|
751 pixels+=line_size;\
|
|
752 block +=line_size;\
|
|
753 }\
|
|
754 }\
|
|
755 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
756 int i;\
|
|
757 for(i=0; i<h; i++){\
|
|
758 OP(*((uint32_t*)(block )), LD32(pixels ));\
|
|
759 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
|
|
760 pixels+=line_size;\
|
|
761 block +=line_size;\
|
|
762 }\
|
|
763 }\
|
|
764 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
765 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
|
|
766 }\
|
|
767 \
|
|
768 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
769 int src_stride1, int src_stride2, int h){\
|
|
770 int i;\
|
|
771 for(i=0; i<h; i++){\
|
|
772 uint32_t a,b;\
|
|
773 a= LD32(&src1[i*src_stride1 ]);\
|
|
774 b= LD32(&src2[i*src_stride2 ]);\
|
|
775 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
|
|
776 a= LD32(&src1[i*src_stride1+4]);\
|
|
777 b= LD32(&src2[i*src_stride2+4]);\
|
|
778 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
|
|
779 }\
|
|
780 }\
|
|
781 \
|
|
782 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
783 int src_stride1, int src_stride2, int h){\
|
|
784 int i;\
|
|
785 for(i=0; i<h; i++){\
|
|
786 uint32_t a,b;\
|
|
787 a= LD32(&src1[i*src_stride1 ]);\
|
|
788 b= LD32(&src2[i*src_stride2 ]);\
|
|
789 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
|
|
790 a= LD32(&src1[i*src_stride1+4]);\
|
|
791 b= LD32(&src2[i*src_stride2+4]);\
|
|
792 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
|
|
793 }\
|
|
794 }\
|
|
795 \
|
|
796 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
797 int src_stride1, int src_stride2, int h){\
|
|
798 int i;\
|
|
799 for(i=0; i<h; i++){\
|
|
800 uint32_t a,b;\
|
|
801 a= LD32(&src1[i*src_stride1 ]);\
|
|
802 b= LD32(&src2[i*src_stride2 ]);\
|
|
803 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
|
|
804 }\
|
|
805 }\
|
|
806 \
|
|
807 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
808 int src_stride1, int src_stride2, int h){\
|
|
809 int i;\
|
|
810 for(i=0; i<h; i++){\
|
|
811 uint32_t a,b;\
|
|
812 a= LD16(&src1[i*src_stride1 ]);\
|
|
813 b= LD16(&src2[i*src_stride2 ]);\
|
|
814 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
|
|
815 }\
|
|
816 }\
|
|
817 \
|
|
818 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
819 int src_stride1, int src_stride2, int h){\
|
|
820 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
|
|
821 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
|
|
822 }\
|
|
823 \
|
|
824 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
|
|
825 int src_stride1, int src_stride2, int h){\
|
|
826 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
|
|
827 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
|
|
828 }\
|
|
829 \
|
|
830 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
831 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
|
832 }\
|
|
833 \
|
|
834 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
835 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
|
836 }\
|
|
837 \
|
|
838 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
839 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
|
840 }\
|
|
841 \
|
|
842 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
843 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
|
844 }\
|
|
845 \
|
|
846 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
|
847 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
848 int i;\
|
|
849 for(i=0; i<h; i++){\
|
|
850 uint32_t a, b, c, d, l0, l1, h0, h1;\
|
|
851 a= LD32(&src1[i*src_stride1]);\
|
|
852 b= LD32(&src2[i*src_stride2]);\
|
|
853 c= LD32(&src3[i*src_stride3]);\
|
|
854 d= LD32(&src4[i*src_stride4]);\
|
|
855 l0= (a&0x03030303UL)\
|
|
856 + (b&0x03030303UL)\
|
|
857 + 0x02020202UL;\
|
|
858 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
859 + ((b&0xFCFCFCFCUL)>>2);\
|
|
860 l1= (c&0x03030303UL)\
|
|
861 + (d&0x03030303UL);\
|
|
862 h1= ((c&0xFCFCFCFCUL)>>2)\
|
|
863 + ((d&0xFCFCFCFCUL)>>2);\
|
|
864 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
865 a= LD32(&src1[i*src_stride1+4]);\
|
|
866 b= LD32(&src2[i*src_stride2+4]);\
|
|
867 c= LD32(&src3[i*src_stride3+4]);\
|
|
868 d= LD32(&src4[i*src_stride4+4]);\
|
|
869 l0= (a&0x03030303UL)\
|
|
870 + (b&0x03030303UL)\
|
|
871 + 0x02020202UL;\
|
|
872 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
873 + ((b&0xFCFCFCFCUL)>>2);\
|
|
874 l1= (c&0x03030303UL)\
|
|
875 + (d&0x03030303UL);\
|
|
876 h1= ((c&0xFCFCFCFCUL)>>2)\
|
|
877 + ((d&0xFCFCFCFCUL)>>2);\
|
|
878 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
879 }\
|
|
880 }\
|
|
881 \
|
|
882 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
883 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
|
884 }\
|
|
885 \
|
|
886 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
887 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
|
888 }\
|
|
889 \
|
|
890 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
891 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
|
|
892 }\
|
|
893 \
|
|
894 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
895 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
|
|
896 }\
|
|
897 \
|
|
898 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
|
899 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
900 int i;\
|
|
901 for(i=0; i<h; i++){\
|
|
902 uint32_t a, b, c, d, l0, l1, h0, h1;\
|
|
903 a= LD32(&src1[i*src_stride1]);\
|
|
904 b= LD32(&src2[i*src_stride2]);\
|
|
905 c= LD32(&src3[i*src_stride3]);\
|
|
906 d= LD32(&src4[i*src_stride4]);\
|
|
907 l0= (a&0x03030303UL)\
|
|
908 + (b&0x03030303UL)\
|
|
909 + 0x01010101UL;\
|
|
910 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
911 + ((b&0xFCFCFCFCUL)>>2);\
|
|
912 l1= (c&0x03030303UL)\
|
|
913 + (d&0x03030303UL);\
|
|
914 h1= ((c&0xFCFCFCFCUL)>>2)\
|
|
915 + ((d&0xFCFCFCFCUL)>>2);\
|
|
916 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
917 a= LD32(&src1[i*src_stride1+4]);\
|
|
918 b= LD32(&src2[i*src_stride2+4]);\
|
|
919 c= LD32(&src3[i*src_stride3+4]);\
|
|
920 d= LD32(&src4[i*src_stride4+4]);\
|
|
921 l0= (a&0x03030303UL)\
|
|
922 + (b&0x03030303UL)\
|
|
923 + 0x01010101UL;\
|
|
924 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
925 + ((b&0xFCFCFCFCUL)>>2);\
|
|
926 l1= (c&0x03030303UL)\
|
|
927 + (d&0x03030303UL);\
|
|
928 h1= ((c&0xFCFCFCFCUL)>>2)\
|
|
929 + ((d&0xFCFCFCFCUL)>>2);\
|
|
930 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
931 }\
|
|
932 }\
|
|
933 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
|
934 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
935 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
|
936 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
|
937 }\
|
|
938 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
|
|
939 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
|
|
940 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
|
941 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
|
|
942 }\
|
|
943 \
|
|
944 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
945 {\
|
|
946 int i, a0, b0, a1, b1;\
|
|
947 a0= pixels[0];\
|
|
948 b0= pixels[1] + 2;\
|
|
949 a0 += b0;\
|
|
950 b0 += pixels[2];\
|
|
951 \
|
|
952 pixels+=line_size;\
|
|
953 for(i=0; i<h; i+=2){\
|
|
954 a1= pixels[0];\
|
|
955 b1= pixels[1];\
|
|
956 a1 += b1;\
|
|
957 b1 += pixels[2];\
|
|
958 \
|
|
959 block[0]= (a1+a0)>>2; /* FIXME non put */\
|
|
960 block[1]= (b1+b0)>>2;\
|
|
961 \
|
|
962 pixels+=line_size;\
|
|
963 block +=line_size;\
|
|
964 \
|
|
965 a0= pixels[0];\
|
|
966 b0= pixels[1] + 2;\
|
|
967 a0 += b0;\
|
|
968 b0 += pixels[2];\
|
|
969 \
|
|
970 block[0]= (a1+a0)>>2;\
|
|
971 block[1]= (b1+b0)>>2;\
|
|
972 pixels+=line_size;\
|
|
973 block +=line_size;\
|
|
974 }\
|
|
975 }\
|
|
976 \
|
|
977 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
978 {\
|
|
979 int i;\
|
|
980 const uint32_t a= LD32(pixels );\
|
|
981 const uint32_t b= LD32(pixels+1);\
|
|
982 uint32_t l0= (a&0x03030303UL)\
|
|
983 + (b&0x03030303UL)\
|
|
984 + 0x02020202UL;\
|
|
985 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
986 + ((b&0xFCFCFCFCUL)>>2);\
|
|
987 uint32_t l1,h1;\
|
|
988 \
|
|
989 pixels+=line_size;\
|
|
990 for(i=0; i<h; i+=2){\
|
|
991 uint32_t a= LD32(pixels );\
|
|
992 uint32_t b= LD32(pixels+1);\
|
|
993 l1= (a&0x03030303UL)\
|
|
994 + (b&0x03030303UL);\
|
|
995 h1= ((a&0xFCFCFCFCUL)>>2)\
|
|
996 + ((b&0xFCFCFCFCUL)>>2);\
|
|
997 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
998 pixels+=line_size;\
|
|
999 block +=line_size;\
|
|
1000 a= LD32(pixels );\
|
|
1001 b= LD32(pixels+1);\
|
|
1002 l0= (a&0x03030303UL)\
|
|
1003 + (b&0x03030303UL)\
|
|
1004 + 0x02020202UL;\
|
|
1005 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
1006 + ((b&0xFCFCFCFCUL)>>2);\
|
|
1007 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
1008 pixels+=line_size;\
|
|
1009 block +=line_size;\
|
|
1010 }\
|
|
1011 }\
|
|
1012 \
|
|
1013 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
1014 {\
|
|
1015 int j;\
|
|
1016 for(j=0; j<2; j++){\
|
|
1017 int i;\
|
|
1018 const uint32_t a= LD32(pixels );\
|
|
1019 const uint32_t b= LD32(pixels+1);\
|
|
1020 uint32_t l0= (a&0x03030303UL)\
|
|
1021 + (b&0x03030303UL)\
|
|
1022 + 0x02020202UL;\
|
|
1023 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
1024 + ((b&0xFCFCFCFCUL)>>2);\
|
|
1025 uint32_t l1,h1;\
|
|
1026 \
|
|
1027 pixels+=line_size;\
|
|
1028 for(i=0; i<h; i+=2){\
|
|
1029 uint32_t a= LD32(pixels );\
|
|
1030 uint32_t b= LD32(pixels+1);\
|
|
1031 l1= (a&0x03030303UL)\
|
|
1032 + (b&0x03030303UL);\
|
|
1033 h1= ((a&0xFCFCFCFCUL)>>2)\
|
|
1034 + ((b&0xFCFCFCFCUL)>>2);\
|
|
1035 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
1036 pixels+=line_size;\
|
|
1037 block +=line_size;\
|
|
1038 a= LD32(pixels );\
|
|
1039 b= LD32(pixels+1);\
|
|
1040 l0= (a&0x03030303UL)\
|
|
1041 + (b&0x03030303UL)\
|
|
1042 + 0x02020202UL;\
|
|
1043 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
1044 + ((b&0xFCFCFCFCUL)>>2);\
|
|
1045 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
1046 pixels+=line_size;\
|
|
1047 block +=line_size;\
|
|
1048 }\
|
|
1049 pixels+=4-line_size*(h+1);\
|
|
1050 block +=4-line_size*h;\
|
|
1051 }\
|
|
1052 }\
|
|
1053 \
|
|
1054 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
|
1055 {\
|
|
1056 int j;\
|
|
1057 for(j=0; j<2; j++){\
|
|
1058 int i;\
|
|
1059 const uint32_t a= LD32(pixels );\
|
|
1060 const uint32_t b= LD32(pixels+1);\
|
|
1061 uint32_t l0= (a&0x03030303UL)\
|
|
1062 + (b&0x03030303UL)\
|
|
1063 + 0x01010101UL;\
|
|
1064 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
1065 + ((b&0xFCFCFCFCUL)>>2);\
|
|
1066 uint32_t l1,h1;\
|
|
1067 \
|
|
1068 pixels+=line_size;\
|
|
1069 for(i=0; i<h; i+=2){\
|
|
1070 uint32_t a= LD32(pixels );\
|
|
1071 uint32_t b= LD32(pixels+1);\
|
|
1072 l1= (a&0x03030303UL)\
|
|
1073 + (b&0x03030303UL);\
|
|
1074 h1= ((a&0xFCFCFCFCUL)>>2)\
|
|
1075 + ((b&0xFCFCFCFCUL)>>2);\
|
|
1076 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
1077 pixels+=line_size;\
|
|
1078 block +=line_size;\
|
|
1079 a= LD32(pixels );\
|
|
1080 b= LD32(pixels+1);\
|
|
1081 l0= (a&0x03030303UL)\
|
|
1082 + (b&0x03030303UL)\
|
|
1083 + 0x01010101UL;\
|
|
1084 h0= ((a&0xFCFCFCFCUL)>>2)\
|
|
1085 + ((b&0xFCFCFCFCUL)>>2);\
|
|
1086 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
|
|
1087 pixels+=line_size;\
|
|
1088 block +=line_size;\
|
|
1089 }\
|
|
1090 pixels+=4-line_size*(h+1);\
|
|
1091 block +=4-line_size*h;\
|
|
1092 }\
|
|
1093 }\
|
|
1094 \
|
|
1095 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
|
|
1096 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
|
|
1097 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
|
|
1098 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
|
|
1099 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
|
|
1100 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
|
|
1101 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
|
|
1102 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
|
|
1103
|
|
1104 #define op_avg(a, b) a = rnd_avg32(a, b)
|
|
1105 #endif
|
|
1106 #define op_put(a, b) a = b
|
|
1107
|
832
|
1108 PIXOP2(avg, op_avg)
|
|
1109 PIXOP2(put, op_put)
|
808
|
1110 #undef op_avg
|
|
1111 #undef op_put
|
|
1112
|
|
1113 #define avg2(a,b) ((a+b+1)>>1)
|
|
1114 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
|
|
1115
|
832
|
1116 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
|
|
1117 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
|
|
1118 }
|
|
1119
|
|
1120 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
|
|
1121 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
|
|
1122 }
|
|
1123
|
|
1124 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
|
|
1125 {
|
|
1126 const int A=(16-x16)*(16-y16);
|
|
1127 const int B=( x16)*(16-y16);
|
|
1128 const int C=(16-x16)*( y16);
|
|
1129 const int D=( x16)*( y16);
|
|
1130 int i;
|
|
1131
|
|
1132 for(i=0; i<h; i++)
|
|
1133 {
|
|
1134 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
|
|
1135 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
|
|
1136 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
|
|
1137 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
|
|
1138 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
|
|
1139 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
|
|
1140 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
|
|
1141 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
|
|
1142 dst+= stride;
|
|
1143 src+= stride;
|
|
1144 }
|
|
1145 }
|
|
1146
|
|
1147 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
|
|
1148 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
|
|
1149 {
|
|
1150 int y, vx, vy;
|
|
1151 const int s= 1<<shift;
|
|
1152
|
|
1153 width--;
|
|
1154 height--;
|
|
1155
|
|
1156 for(y=0; y<h; y++){
|
|
1157 int x;
|
|
1158
|
|
1159 vx= ox;
|
|
1160 vy= oy;
|
|
1161 for(x=0; x<8; x++){ //XXX FIXME optimize
|
|
1162 int src_x, src_y, frac_x, frac_y, index;
|
|
1163
|
|
1164 src_x= vx>>16;
|
|
1165 src_y= vy>>16;
|
|
1166 frac_x= src_x&(s-1);
|
|
1167 frac_y= src_y&(s-1);
|
|
1168 src_x>>=shift;
|
|
1169 src_y>>=shift;
|
|
1170
|
|
1171 if((unsigned)src_x < width){
|
|
1172 if((unsigned)src_y < height){
|
|
1173 index= src_x + src_y*stride;
|
|
1174 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
|
|
1175 + src[index +1]* frac_x )*(s-frac_y)
|
|
1176 + ( src[index+stride ]*(s-frac_x)
|
|
1177 + src[index+stride+1]* frac_x )* frac_y
|
|
1178 + r)>>(shift*2);
|
|
1179 }else{
|
|
1180 index= src_x + clip(src_y, 0, height)*stride;
|
|
1181 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
|
|
1182 + src[index +1]* frac_x )*s
|
|
1183 + r)>>(shift*2);
|
|
1184 }
|
|
1185 }else{
|
|
1186 if((unsigned)src_y < height){
|
|
1187 index= clip(src_x, 0, width) + src_y*stride;
|
|
1188 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
|
|
1189 + src[index+stride ]* frac_y )*s
|
|
1190 + r)>>(shift*2);
|
|
1191 }else{
|
|
1192 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
|
|
1193 dst[y*stride + x]= src[index ];
|
|
1194 }
|
|
1195 }
|
|
1196
|
|
1197 vx+= dxx;
|
|
1198 vy+= dyx;
|
|
1199 }
|
|
1200 ox += dxy;
|
|
1201 oy += dyy;
|
|
1202 }
|
|
1203 }
|
|
1204
|
|
1205 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1206 switch(width){
|
|
1207 case 2: put_pixels2_c (dst, src, stride, height); break;
|
|
1208 case 4: put_pixels4_c (dst, src, stride, height); break;
|
|
1209 case 8: put_pixels8_c (dst, src, stride, height); break;
|
|
1210 case 16:put_pixels16_c(dst, src, stride, height); break;
|
|
1211 }
|
|
1212 }
|
|
1213
|
|
1214 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1215 int i,j;
|
|
1216 for (i=0; i < height; i++) {
|
|
1217 for (j=0; j < width; j++) {
|
|
1218 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
|
|
1219 }
|
|
1220 src += stride;
|
|
1221 dst += stride;
|
|
1222 }
|
|
1223 }
|
|
1224
|
|
1225 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1226 int i,j;
|
|
1227 for (i=0; i < height; i++) {
|
|
1228 for (j=0; j < width; j++) {
|
|
1229 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
|
|
1230 }
|
|
1231 src += stride;
|
|
1232 dst += stride;
|
|
1233 }
|
|
1234 }
|
|
1235
|
|
1236 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1237 int i,j;
|
|
1238 for (i=0; i < height; i++) {
|
|
1239 for (j=0; j < width; j++) {
|
|
1240 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
|
|
1241 }
|
|
1242 src += stride;
|
|
1243 dst += stride;
|
|
1244 }
|
|
1245 }
|
|
1246
|
|
1247 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1248 int i,j;
|
|
1249 for (i=0; i < height; i++) {
|
|
1250 for (j=0; j < width; j++) {
|
|
1251 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
|
|
1252 }
|
|
1253 src += stride;
|
|
1254 dst += stride;
|
|
1255 }
|
|
1256 }
|
|
1257
|
|
1258 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1259 int i,j;
|
|
1260 for (i=0; i < height; i++) {
|
|
1261 for (j=0; j < width; j++) {
|
|
1262 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
|
1263 }
|
|
1264 src += stride;
|
|
1265 dst += stride;
|
|
1266 }
|
|
1267 }
|
|
1268
|
|
1269 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1270 int i,j;
|
|
1271 for (i=0; i < height; i++) {
|
|
1272 for (j=0; j < width; j++) {
|
|
1273 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
|
|
1274 }
|
|
1275 src += stride;
|
|
1276 dst += stride;
|
|
1277 }
|
|
1278 }
|
|
1279
|
|
1280 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1281 int i,j;
|
|
1282 for (i=0; i < height; i++) {
|
|
1283 for (j=0; j < width; j++) {
|
|
1284 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
|
|
1285 }
|
|
1286 src += stride;
|
|
1287 dst += stride;
|
|
1288 }
|
|
1289 }
|
|
1290
|
|
1291 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1292 int i,j;
|
|
1293 for (i=0; i < height; i++) {
|
|
1294 for (j=0; j < width; j++) {
|
|
1295 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
|
|
1296 }
|
|
1297 src += stride;
|
|
1298 dst += stride;
|
|
1299 }
|
|
1300 }
|
|
1301
|
|
1302 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1303 switch(width){
|
|
1304 case 2: avg_pixels2_c (dst, src, stride, height); break;
|
|
1305 case 4: avg_pixels4_c (dst, src, stride, height); break;
|
|
1306 case 8: avg_pixels8_c (dst, src, stride, height); break;
|
|
1307 case 16:avg_pixels16_c(dst, src, stride, height); break;
|
|
1308 }
|
|
1309 }
|
|
1310
|
|
1311 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1312 int i,j;
|
|
1313 for (i=0; i < height; i++) {
|
|
1314 for (j=0; j < width; j++) {
|
|
1315 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
|
|
1316 }
|
|
1317 src += stride;
|
|
1318 dst += stride;
|
|
1319 }
|
|
1320 }
|
|
1321
|
|
1322 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1323 int i,j;
|
|
1324 for (i=0; i < height; i++) {
|
|
1325 for (j=0; j < width; j++) {
|
|
1326 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
|
|
1327 }
|
|
1328 src += stride;
|
|
1329 dst += stride;
|
|
1330 }
|
|
1331 }
|
|
1332
|
|
1333 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1334 int i,j;
|
|
1335 for (i=0; i < height; i++) {
|
|
1336 for (j=0; j < width; j++) {
|
|
1337 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
|
|
1338 }
|
|
1339 src += stride;
|
|
1340 dst += stride;
|
|
1341 }
|
|
1342 }
|
|
1343
|
|
1344 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1345 int i,j;
|
|
1346 for (i=0; i < height; i++) {
|
|
1347 for (j=0; j < width; j++) {
|
|
1348 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
|
1349 }
|
|
1350 src += stride;
|
|
1351 dst += stride;
|
|
1352 }
|
|
1353 }
|
|
1354
|
|
1355 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1356 int i,j;
|
|
1357 for (i=0; i < height; i++) {
|
|
1358 for (j=0; j < width; j++) {
|
|
1359 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
|
1360 }
|
|
1361 src += stride;
|
|
1362 dst += stride;
|
|
1363 }
|
|
1364 }
|
|
1365
|
|
1366 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1367 int i,j;
|
|
1368 for (i=0; i < height; i++) {
|
|
1369 for (j=0; j < width; j++) {
|
|
1370 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
|
|
1371 }
|
|
1372 src += stride;
|
|
1373 dst += stride;
|
|
1374 }
|
|
1375 }
|
|
1376
|
|
1377 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1378 int i,j;
|
|
1379 for (i=0; i < height; i++) {
|
|
1380 for (j=0; j < width; j++) {
|
|
1381 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
|
1382 }
|
|
1383 src += stride;
|
|
1384 dst += stride;
|
|
1385 }
|
|
1386 }
|
|
1387
|
|
1388 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
|
|
1389 int i,j;
|
|
1390 for (i=0; i < height; i++) {
|
|
1391 for (j=0; j < width; j++) {
|
|
1392 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
|
|
1393 }
|
|
1394 src += stride;
|
|
1395 dst += stride;
|
|
1396 }
|
|
1397 }
|
|
1398 #if 0
|
|
1399 #define TPEL_WIDTH(width)\
|
|
1400 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
|
1401 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
|
|
1402 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
|
1403 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
|
|
1404 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
|
1405 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
|
|
1406 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
|
1407 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
|
|
1408 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
|
1409 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
|
|
1410 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
|
1411 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
|
|
1412 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
|
1413 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
|
|
1414 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
|
1415 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
|
|
1416 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
|
|
1417 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
|
|
1418 #endif
|
|
1419
|
|
1420 #define H264_CHROMA_MC(OPNAME, OP)\
|
|
1421 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
|
1422 const int A=(8-x)*(8-y);\
|
|
1423 const int B=( x)*(8-y);\
|
|
1424 const int C=(8-x)*( y);\
|
|
1425 const int D=( x)*( y);\
|
|
1426 int i;\
|
|
1427 \
|
|
1428 assert(x<8 && y<8 && x>=0 && y>=0);\
|
|
1429 \
|
|
1430 for(i=0; i<h; i++)\
|
|
1431 {\
|
|
1432 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
|
|
1433 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
|
|
1434 dst+= stride;\
|
|
1435 src+= stride;\
|
|
1436 }\
|
|
1437 }\
|
|
1438 \
|
|
1439 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
|
1440 const int A=(8-x)*(8-y);\
|
|
1441 const int B=( x)*(8-y);\
|
|
1442 const int C=(8-x)*( y);\
|
|
1443 const int D=( x)*( y);\
|
|
1444 int i;\
|
|
1445 \
|
|
1446 assert(x<8 && y<8 && x>=0 && y>=0);\
|
|
1447 \
|
|
1448 for(i=0; i<h; i++)\
|
|
1449 {\
|
|
1450 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
|
|
1451 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
|
|
1452 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
|
|
1453 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
|
|
1454 dst+= stride;\
|
|
1455 src+= stride;\
|
|
1456 }\
|
|
1457 }\
|
|
1458 \
|
|
1459 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
|
|
1460 const int A=(8-x)*(8-y);\
|
|
1461 const int B=( x)*(8-y);\
|
|
1462 const int C=(8-x)*( y);\
|
|
1463 const int D=( x)*( y);\
|
|
1464 int i;\
|
|
1465 \
|
|
1466 assert(x<8 && y<8 && x>=0 && y>=0);\
|
|
1467 \
|
|
1468 for(i=0; i<h; i++)\
|
|
1469 {\
|
|
1470 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
|
|
1471 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
|
|
1472 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
|
|
1473 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
|
|
1474 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
|
|
1475 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
|
|
1476 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
|
|
1477 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
|
|
1478 dst+= stride;\
|
|
1479 src+= stride;\
|
|
1480 }\
|
|
1481 }
|
|
1482
|
|
1483 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
|
|
1484 #define op_put(a, b) a = (((b) + 32)>>6)
|
|
1485
|
|
1486 H264_CHROMA_MC(put_ , op_put)
|
|
1487 H264_CHROMA_MC(avg_ , op_avg)
|
|
1488 #undef op_avg
|
|
1489 #undef op_put
|
|
1490
|
|
1491 static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
|
|
1492 const int A=(8-x)*(8-y);
|
|
1493 const int B=( x)*(8-y);
|
|
1494 const int C=(8-x)*( y);
|
|
1495 const int D=( x)*( y);
|
|
1496 int i;
|
|
1497
|
|
1498 assert(x<8 && y<8 && x>=0 && y>=0);
|
|
1499
|
|
1500 for(i=0; i<h; i++)
|
|
1501 {
|
|
1502 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
|
|
1503 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
|
|
1504 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
|
|
1505 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
|
|
1506 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
|
|
1507 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
|
|
1508 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
|
|
1509 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
|
|
1510 dst+= stride;
|
|
1511 src+= stride;
|
|
1512 }
|
|
1513 }
|
|
1514
|
|
1515 static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
1516 {
|
|
1517 int i;
|
|
1518 for(i=0; i<h; i++)
|
|
1519 {
|
|
1520 ST16(dst , LD16(src ));
|
|
1521 dst+=dstStride;
|
|
1522 src+=srcStride;
|
|
1523 }
|
|
1524 }
|
|
1525
|
|
1526 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
1527 {
|
|
1528 int i;
|
|
1529 for(i=0; i<h; i++)
|
|
1530 {
|
|
1531 ST32(dst , LD32(src ));
|
|
1532 dst+=dstStride;
|
|
1533 src+=srcStride;
|
|
1534 }
|
|
1535 }
|
|
1536
|
|
1537 static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
1538 {
|
|
1539 int i;
|
|
1540 for(i=0; i<h; i++)
|
|
1541 {
|
|
1542 ST32(dst , LD32(src ));
|
|
1543 ST32(dst+4 , LD32(src+4 ));
|
|
1544 dst+=dstStride;
|
|
1545 src+=srcStride;
|
|
1546 }
|
|
1547 }
|
|
1548
|
|
1549 static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
1550 {
|
|
1551 int i;
|
|
1552 for(i=0; i<h; i++)
|
|
1553 {
|
|
1554 ST32(dst , LD32(src ));
|
|
1555 ST32(dst+4 , LD32(src+4 ));
|
|
1556 ST32(dst+8 , LD32(src+8 ));
|
|
1557 ST32(dst+12, LD32(src+12));
|
|
1558 dst+=dstStride;
|
|
1559 src+=srcStride;
|
|
1560 }
|
|
1561 }
|
|
1562
|
|
1563 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
1564 {
|
|
1565 int i;
|
|
1566 for(i=0; i<h; i++)
|
|
1567 {
|
|
1568 ST32(dst , LD32(src ));
|
|
1569 ST32(dst+4 , LD32(src+4 ));
|
|
1570 ST32(dst+8 , LD32(src+8 ));
|
|
1571 ST32(dst+12, LD32(src+12));
|
|
1572 dst[16]= src[16];
|
|
1573 dst+=dstStride;
|
|
1574 src+=srcStride;
|
|
1575 }
|
|
1576 }
|
|
1577
|
|
1578 static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
1579 {
|
|
1580 int i;
|
|
1581 for(i=0; i<h; i++)
|
|
1582 {
|
|
1583 ST32(dst , LD32(src ));
|
|
1584 ST32(dst+4 , LD32(src+4 ));
|
|
1585 dst[8]= src[8];
|
|
1586 dst+=dstStride;
|
|
1587 src+=srcStride;
|
|
1588 }
|
|
1589 }
|
|
1590
|
|
1591
|
|
1592 #define QPEL_MC(r, OPNAME, RND, OP) \
|
|
1593 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
|
|
1594 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
1595 int i;\
|
|
1596 for(i=0; i<h; i++)\
|
|
1597 {\
|
|
1598 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
|
|
1599 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
|
|
1600 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
|
|
1601 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
|
|
1602 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
|
|
1603 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
|
|
1604 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
|
|
1605 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
|
|
1606 dst+=dstStride;\
|
|
1607 src+=srcStride;\
|
|
1608 }\
|
|
1609 }\
|
|
1610 \
|
|
1611 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
1612 const int w=8;\
|
|
1613 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
1614 int i;\
|
|
1615 for(i=0; i<w; i++)\
|
|
1616 {\
|
|
1617 const int src0= src[0*srcStride];\
|
|
1618 const int src1= src[1*srcStride];\
|
|
1619 const int src2= src[2*srcStride];\
|
|
1620 const int src3= src[3*srcStride];\
|
|
1621 const int src4= src[4*srcStride];\
|
|
1622 const int src5= src[5*srcStride];\
|
|
1623 const int src6= src[6*srcStride];\
|
|
1624 const int src7= src[7*srcStride];\
|
|
1625 const int src8= src[8*srcStride];\
|
|
1626 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
|
|
1627 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
|
|
1628 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
|
|
1629 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
|
|
1630 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
|
|
1631 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
|
|
1632 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
|
|
1633 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
|
|
1634 dst++;\
|
|
1635 src++;\
|
|
1636 }\
|
|
1637 }\
|
|
1638 \
|
|
1639 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
|
|
1640 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
1641 int i;\
|
|
1642 \
|
|
1643 for(i=0; i<h; i++)\
|
|
1644 {\
|
|
1645 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
|
|
1646 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
|
|
1647 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
|
|
1648 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
|
|
1649 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
|
|
1650 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
|
|
1651 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
|
|
1652 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
|
|
1653 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
|
|
1654 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
|
|
1655 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
|
|
1656 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
|
|
1657 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
|
|
1658 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
|
|
1659 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
|
|
1660 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
|
|
1661 dst+=dstStride;\
|
|
1662 src+=srcStride;\
|
|
1663 }\
|
|
1664 }\
|
|
1665 \
|
|
1666 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
1667 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
1668 int i;\
|
|
1669 const int w=16;\
|
|
1670 for(i=0; i<w; i++)\
|
|
1671 {\
|
|
1672 const int src0= src[0*srcStride];\
|
|
1673 const int src1= src[1*srcStride];\
|
|
1674 const int src2= src[2*srcStride];\
|
|
1675 const int src3= src[3*srcStride];\
|
|
1676 const int src4= src[4*srcStride];\
|
|
1677 const int src5= src[5*srcStride];\
|
|
1678 const int src6= src[6*srcStride];\
|
|
1679 const int src7= src[7*srcStride];\
|
|
1680 const int src8= src[8*srcStride];\
|
|
1681 const int src9= src[9*srcStride];\
|
|
1682 const int src10= src[10*srcStride];\
|
|
1683 const int src11= src[11*srcStride];\
|
|
1684 const int src12= src[12*srcStride];\
|
|
1685 const int src13= src[13*srcStride];\
|
|
1686 const int src14= src[14*srcStride];\
|
|
1687 const int src15= src[15*srcStride];\
|
|
1688 const int src16= src[16*srcStride];\
|
|
1689 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
|
|
1690 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
|
|
1691 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
|
|
1692 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
|
|
1693 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
|
|
1694 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
|
|
1695 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
|
|
1696 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
|
|
1697 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
|
|
1698 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
|
|
1699 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
|
|
1700 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
|
|
1701 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
|
|
1702 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
|
|
1703 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
|
|
1704 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
|
|
1705 dst++;\
|
|
1706 src++;\
|
|
1707 }\
|
|
1708 }\
|
|
1709 \
|
|
1710 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
|
|
1711 OPNAME ## pixels8_c(dst, src, stride, 8);\
|
|
1712 }\
|
|
1713 \
|
|
1714 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1715 uint8_t half[64];\
|
|
1716 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
|
|
1717 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
|
|
1718 }\
|
|
1719 \
|
|
1720 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1721 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
|
|
1722 }\
|
|
1723 \
|
|
1724 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1725 uint8_t half[64];\
|
|
1726 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
|
|
1727 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
|
|
1728 }\
|
|
1729 \
|
|
1730 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1731 uint8_t full[16*9];\
|
|
1732 uint8_t half[64];\
|
|
1733 copy_block9(full, src, 16, stride, 9);\
|
|
1734 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
|
|
1735 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
|
|
1736 }\
|
|
1737 \
|
|
1738 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1739 uint8_t full[16*9];\
|
|
1740 copy_block9(full, src, 16, stride, 9);\
|
|
1741 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
|
|
1742 }\
|
|
1743 \
|
|
1744 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1745 uint8_t full[16*9];\
|
|
1746 uint8_t half[64];\
|
|
1747 copy_block9(full, src, 16, stride, 9);\
|
|
1748 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
|
|
1749 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
|
|
1750 }\
|
|
1751 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1752 uint8_t full[16*9];\
|
|
1753 uint8_t halfH[72];\
|
|
1754 uint8_t halfV[64];\
|
|
1755 uint8_t halfHV[64];\
|
|
1756 copy_block9(full, src, 16, stride, 9);\
|
|
1757 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1758 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
|
|
1759 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1760 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
|
1761 }\
|
|
1762 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1763 uint8_t full[16*9];\
|
|
1764 uint8_t halfH[72];\
|
|
1765 uint8_t halfHV[64];\
|
|
1766 copy_block9(full, src, 16, stride, 9);\
|
|
1767 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1768 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
|
|
1769 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1770 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
|
|
1771 }\
|
|
1772 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1773 uint8_t full[16*9];\
|
|
1774 uint8_t halfH[72];\
|
|
1775 uint8_t halfV[64];\
|
|
1776 uint8_t halfHV[64];\
|
|
1777 copy_block9(full, src, 16, stride, 9);\
|
|
1778 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1779 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
|
|
1780 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1781 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
|
1782 }\
|
|
1783 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1784 uint8_t full[16*9];\
|
|
1785 uint8_t halfH[72];\
|
|
1786 uint8_t halfHV[64];\
|
|
1787 copy_block9(full, src, 16, stride, 9);\
|
|
1788 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1789 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
|
|
1790 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1791 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
|
|
1792 }\
|
|
1793 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1794 uint8_t full[16*9];\
|
|
1795 uint8_t halfH[72];\
|
|
1796 uint8_t halfV[64];\
|
|
1797 uint8_t halfHV[64];\
|
|
1798 copy_block9(full, src, 16, stride, 9);\
|
|
1799 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1800 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
|
|
1801 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1802 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
|
1803 }\
|
|
1804 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1805 uint8_t full[16*9];\
|
|
1806 uint8_t halfH[72];\
|
|
1807 uint8_t halfHV[64];\
|
|
1808 copy_block9(full, src, 16, stride, 9);\
|
|
1809 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1810 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
|
|
1811 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1812 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
|
|
1813 }\
|
|
1814 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1815 uint8_t full[16*9];\
|
|
1816 uint8_t halfH[72];\
|
|
1817 uint8_t halfV[64];\
|
|
1818 uint8_t halfHV[64];\
|
|
1819 copy_block9(full, src, 16, stride, 9);\
|
|
1820 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
|
|
1821 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
|
|
1822 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1823 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
|
|
1824 }\
|
|
1825 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1826 uint8_t full[16*9];\
|
|
1827 uint8_t halfH[72];\
|
|
1828 uint8_t halfHV[64];\
|
|
1829 copy_block9(full, src, 16, stride, 9);\
|
|
1830 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1831 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
|
|
1832 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1833 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
|
|
1834 }\
|
|
1835 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1836 uint8_t halfH[72];\
|
|
1837 uint8_t halfHV[64];\
|
|
1838 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
|
|
1839 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1840 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
|
|
1841 }\
|
|
1842 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1843 uint8_t halfH[72];\
|
|
1844 uint8_t halfHV[64];\
|
|
1845 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
|
|
1846 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1847 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
|
|
1848 }\
|
|
1849 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1850 uint8_t full[16*9];\
|
|
1851 uint8_t halfH[72];\
|
|
1852 uint8_t halfV[64];\
|
|
1853 uint8_t halfHV[64];\
|
|
1854 copy_block9(full, src, 16, stride, 9);\
|
|
1855 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1856 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
|
|
1857 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1858 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
|
|
1859 }\
|
|
1860 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1861 uint8_t full[16*9];\
|
|
1862 uint8_t halfH[72];\
|
|
1863 copy_block9(full, src, 16, stride, 9);\
|
|
1864 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1865 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
|
|
1866 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
|
|
1867 }\
|
|
1868 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1869 uint8_t full[16*9];\
|
|
1870 uint8_t halfH[72];\
|
|
1871 uint8_t halfV[64];\
|
|
1872 uint8_t halfHV[64];\
|
|
1873 copy_block9(full, src, 16, stride, 9);\
|
|
1874 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1875 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
|
|
1876 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
|
|
1877 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
|
|
1878 }\
|
|
1879 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1880 uint8_t full[16*9];\
|
|
1881 uint8_t halfH[72];\
|
|
1882 copy_block9(full, src, 16, stride, 9);\
|
|
1883 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
|
|
1884 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
|
|
1885 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
|
|
1886 }\
|
|
1887 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1888 uint8_t halfH[72];\
|
|
1889 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
|
|
1890 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
|
|
1891 }\
|
|
1892 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
|
|
1893 OPNAME ## pixels16_c(dst, src, stride, 16);\
|
|
1894 }\
|
|
1895 \
|
|
1896 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1897 uint8_t half[256];\
|
|
1898 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
|
|
1899 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
|
|
1900 }\
|
|
1901 \
|
|
1902 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1903 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
|
|
1904 }\
|
|
1905 \
|
|
1906 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1907 uint8_t half[256];\
|
|
1908 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
|
|
1909 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
|
|
1910 }\
|
|
1911 \
|
|
1912 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1913 uint8_t full[24*17];\
|
|
1914 uint8_t half[256];\
|
|
1915 copy_block17(full, src, 24, stride, 17);\
|
|
1916 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
|
|
1917 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
|
|
1918 }\
|
|
1919 \
|
|
1920 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1921 uint8_t full[24*17];\
|
|
1922 copy_block17(full, src, 24, stride, 17);\
|
|
1923 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
|
|
1924 }\
|
|
1925 \
|
|
1926 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1927 uint8_t full[24*17];\
|
|
1928 uint8_t half[256];\
|
|
1929 copy_block17(full, src, 24, stride, 17);\
|
|
1930 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
|
|
1931 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
|
|
1932 }\
|
|
1933 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1934 uint8_t full[24*17];\
|
|
1935 uint8_t halfH[272];\
|
|
1936 uint8_t halfV[256];\
|
|
1937 uint8_t halfHV[256];\
|
|
1938 copy_block17(full, src, 24, stride, 17);\
|
|
1939 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
1940 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
|
|
1941 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
1942 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
|
1943 }\
|
|
1944 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1945 uint8_t full[24*17];\
|
|
1946 uint8_t halfH[272];\
|
|
1947 uint8_t halfHV[256];\
|
|
1948 copy_block17(full, src, 24, stride, 17);\
|
|
1949 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
1950 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
|
|
1951 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
1952 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
|
|
1953 }\
|
|
1954 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1955 uint8_t full[24*17];\
|
|
1956 uint8_t halfH[272];\
|
|
1957 uint8_t halfV[256];\
|
|
1958 uint8_t halfHV[256];\
|
|
1959 copy_block17(full, src, 24, stride, 17);\
|
|
1960 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
1961 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
|
|
1962 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
1963 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
|
1964 }\
|
|
1965 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1966 uint8_t full[24*17];\
|
|
1967 uint8_t halfH[272];\
|
|
1968 uint8_t halfHV[256];\
|
|
1969 copy_block17(full, src, 24, stride, 17);\
|
|
1970 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
1971 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
|
|
1972 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
1973 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
|
|
1974 }\
|
|
1975 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1976 uint8_t full[24*17];\
|
|
1977 uint8_t halfH[272];\
|
|
1978 uint8_t halfV[256];\
|
|
1979 uint8_t halfHV[256];\
|
|
1980 copy_block17(full, src, 24, stride, 17);\
|
|
1981 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
1982 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
|
|
1983 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
1984 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
|
1985 }\
|
|
1986 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1987 uint8_t full[24*17];\
|
|
1988 uint8_t halfH[272];\
|
|
1989 uint8_t halfHV[256];\
|
|
1990 copy_block17(full, src, 24, stride, 17);\
|
|
1991 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
1992 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
|
|
1993 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
1994 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
|
|
1995 }\
|
|
1996 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
1997 uint8_t full[24*17];\
|
|
1998 uint8_t halfH[272];\
|
|
1999 uint8_t halfV[256];\
|
|
2000 uint8_t halfHV[256];\
|
|
2001 copy_block17(full, src, 24, stride, 17);\
|
|
2002 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
|
|
2003 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
|
|
2004 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
2005 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
|
|
2006 }\
|
|
2007 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2008 uint8_t full[24*17];\
|
|
2009 uint8_t halfH[272];\
|
|
2010 uint8_t halfHV[256];\
|
|
2011 copy_block17(full, src, 24, stride, 17);\
|
|
2012 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
2013 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
|
|
2014 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
2015 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
|
|
2016 }\
|
|
2017 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2018 uint8_t halfH[272];\
|
|
2019 uint8_t halfHV[256];\
|
|
2020 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
|
|
2021 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
2022 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
|
|
2023 }\
|
|
2024 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2025 uint8_t halfH[272];\
|
|
2026 uint8_t halfHV[256];\
|
|
2027 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
|
|
2028 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
2029 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
|
|
2030 }\
|
|
2031 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2032 uint8_t full[24*17];\
|
|
2033 uint8_t halfH[272];\
|
|
2034 uint8_t halfV[256];\
|
|
2035 uint8_t halfHV[256];\
|
|
2036 copy_block17(full, src, 24, stride, 17);\
|
|
2037 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
2038 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
|
|
2039 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
2040 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
|
|
2041 }\
|
|
2042 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2043 uint8_t full[24*17];\
|
|
2044 uint8_t halfH[272];\
|
|
2045 copy_block17(full, src, 24, stride, 17);\
|
|
2046 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
2047 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
|
|
2048 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
|
|
2049 }\
|
|
2050 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2051 uint8_t full[24*17];\
|
|
2052 uint8_t halfH[272];\
|
|
2053 uint8_t halfV[256];\
|
|
2054 uint8_t halfHV[256];\
|
|
2055 copy_block17(full, src, 24, stride, 17);\
|
|
2056 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
2057 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
|
|
2058 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
|
|
2059 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
|
|
2060 }\
|
|
2061 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2062 uint8_t full[24*17];\
|
|
2063 uint8_t halfH[272];\
|
|
2064 copy_block17(full, src, 24, stride, 17);\
|
|
2065 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
|
|
2066 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
|
|
2067 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
|
|
2068 }\
|
|
2069 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2070 uint8_t halfH[272];\
|
|
2071 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
|
|
2072 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
|
|
2073 }
|
|
2074
|
|
2075 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
|
|
2076 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
|
|
2077 #define op_put(a, b) a = cm[((b) + 16)>>5]
|
|
2078 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
|
|
2079
|
|
2080 QPEL_MC(0, put_ , _ , op_put)
|
|
2081 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
|
|
2082 QPEL_MC(0, avg_ , _ , op_avg)
|
|
2083 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
|
|
2084 #undef op_avg
|
|
2085 #undef op_avg_no_rnd
|
|
2086 #undef op_put
|
|
2087 #undef op_put_no_rnd
|
|
2088
|
|
2089 #if 1
|
|
2090 #define H264_LOWPASS(OPNAME, OP, OP2) \
|
|
2091 static void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
2092 const int h=2;\
|
|
2093 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
2094 int i;\
|
|
2095 for(i=0; i<h; i++)\
|
|
2096 {\
|
|
2097 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
|
|
2098 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
|
|
2099 dst+=dstStride;\
|
|
2100 src+=srcStride;\
|
|
2101 }\
|
|
2102 }\
|
|
2103 \
|
|
2104 static void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
2105 const int w=2;\
|
|
2106 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
2107 int i;\
|
|
2108 for(i=0; i<w; i++)\
|
|
2109 {\
|
|
2110 const int srcB= src[-2*srcStride];\
|
|
2111 const int srcA= src[-1*srcStride];\
|
|
2112 const int src0= src[0 *srcStride];\
|
|
2113 const int src1= src[1 *srcStride];\
|
|
2114 const int src2= src[2 *srcStride];\
|
|
2115 const int src3= src[3 *srcStride];\
|
|
2116 const int src4= src[4 *srcStride];\
|
|
2117 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
|
|
2118 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
|
|
2119 dst++;\
|
|
2120 src++;\
|
|
2121 }\
|
|
2122 }\
|
|
2123 \
|
|
2124 static void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
|
|
2125 const int h=2;\
|
|
2126 const int w=2;\
|
|
2127 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
2128 int i;\
|
|
2129 src -= 2*srcStride;\
|
|
2130 for(i=0; i<h+5; i++)\
|
|
2131 {\
|
|
2132 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
|
|
2133 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
|
|
2134 tmp+=tmpStride;\
|
|
2135 src+=srcStride;\
|
|
2136 }\
|
|
2137 tmp -= tmpStride*(h+5-2);\
|
|
2138 for(i=0; i<w; i++)\
|
|
2139 {\
|
|
2140 const int tmpB= tmp[-2*tmpStride];\
|
|
2141 const int tmpA= tmp[-1*tmpStride];\
|
|
2142 const int tmp0= tmp[0 *tmpStride];\
|
|
2143 const int tmp1= tmp[1 *tmpStride];\
|
|
2144 const int tmp2= tmp[2 *tmpStride];\
|
|
2145 const int tmp3= tmp[3 *tmpStride];\
|
|
2146 const int tmp4= tmp[4 *tmpStride];\
|
|
2147 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
|
|
2148 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
|
|
2149 dst++;\
|
|
2150 tmp++;\
|
|
2151 }\
|
|
2152 }\
|
|
2153 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
2154 const int h=4;\
|
|
2155 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
2156 int i;\
|
|
2157 for(i=0; i<h; i++)\
|
|
2158 {\
|
|
2159 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
|
|
2160 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
|
|
2161 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
|
|
2162 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
|
|
2163 dst+=dstStride;\
|
|
2164 src+=srcStride;\
|
|
2165 }\
|
|
2166 }\
|
|
2167 \
|
|
2168 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
2169 const int w=4;\
|
|
2170 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
2171 int i;\
|
|
2172 for(i=0; i<w; i++)\
|
|
2173 {\
|
|
2174 const int srcB= src[-2*srcStride];\
|
|
2175 const int srcA= src[-1*srcStride];\
|
|
2176 const int src0= src[0 *srcStride];\
|
|
2177 const int src1= src[1 *srcStride];\
|
|
2178 const int src2= src[2 *srcStride];\
|
|
2179 const int src3= src[3 *srcStride];\
|
|
2180 const int src4= src[4 *srcStride];\
|
|
2181 const int src5= src[5 *srcStride];\
|
|
2182 const int src6= src[6 *srcStride];\
|
|
2183 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
|
|
2184 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
|
|
2185 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
|
|
2186 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
|
|
2187 dst++;\
|
|
2188 src++;\
|
|
2189 }\
|
|
2190 }\
|
|
2191 \
|
|
2192 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
|
|
2193 const int h=4;\
|
|
2194 const int w=4;\
|
|
2195 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
2196 int i;\
|
|
2197 src -= 2*srcStride;\
|
|
2198 for(i=0; i<h+5; i++)\
|
|
2199 {\
|
|
2200 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
|
|
2201 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
|
|
2202 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
|
|
2203 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
|
|
2204 tmp+=tmpStride;\
|
|
2205 src+=srcStride;\
|
|
2206 }\
|
|
2207 tmp -= tmpStride*(h+5-2);\
|
|
2208 for(i=0; i<w; i++)\
|
|
2209 {\
|
|
2210 const int tmpB= tmp[-2*tmpStride];\
|
|
2211 const int tmpA= tmp[-1*tmpStride];\
|
|
2212 const int tmp0= tmp[0 *tmpStride];\
|
|
2213 const int tmp1= tmp[1 *tmpStride];\
|
|
2214 const int tmp2= tmp[2 *tmpStride];\
|
|
2215 const int tmp3= tmp[3 *tmpStride];\
|
|
2216 const int tmp4= tmp[4 *tmpStride];\
|
|
2217 const int tmp5= tmp[5 *tmpStride];\
|
|
2218 const int tmp6= tmp[6 *tmpStride];\
|
|
2219 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
|
|
2220 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
|
|
2221 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
|
|
2222 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
|
|
2223 dst++;\
|
|
2224 tmp++;\
|
|
2225 }\
|
|
2226 }\
|
|
2227 \
|
|
2228 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
2229 const int h=8;\
|
|
2230 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
2231 int i;\
|
|
2232 for(i=0; i<h; i++)\
|
|
2233 {\
|
|
2234 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
|
|
2235 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
|
|
2236 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
|
|
2237 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
|
|
2238 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
|
|
2239 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
|
|
2240 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
|
|
2241 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
|
|
2242 dst+=dstStride;\
|
|
2243 src+=srcStride;\
|
|
2244 }\
|
|
2245 }\
|
|
2246 \
|
|
2247 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
2248 const int w=8;\
|
|
2249 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
2250 int i;\
|
|
2251 for(i=0; i<w; i++)\
|
|
2252 {\
|
|
2253 const int srcB= src[-2*srcStride];\
|
|
2254 const int srcA= src[-1*srcStride];\
|
|
2255 const int src0= src[0 *srcStride];\
|
|
2256 const int src1= src[1 *srcStride];\
|
|
2257 const int src2= src[2 *srcStride];\
|
|
2258 const int src3= src[3 *srcStride];\
|
|
2259 const int src4= src[4 *srcStride];\
|
|
2260 const int src5= src[5 *srcStride];\
|
|
2261 const int src6= src[6 *srcStride];\
|
|
2262 const int src7= src[7 *srcStride];\
|
|
2263 const int src8= src[8 *srcStride];\
|
|
2264 const int src9= src[9 *srcStride];\
|
|
2265 const int src10=src[10*srcStride];\
|
|
2266 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
|
|
2267 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
|
|
2268 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
|
|
2269 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
|
|
2270 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
|
|
2271 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
|
|
2272 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
|
|
2273 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
|
|
2274 dst++;\
|
|
2275 src++;\
|
|
2276 }\
|
|
2277 }\
|
|
2278 \
|
|
2279 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
|
|
2280 const int h=8;\
|
|
2281 const int w=8;\
|
|
2282 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
|
|
2283 int i;\
|
|
2284 src -= 2*srcStride;\
|
|
2285 for(i=0; i<h+5; i++)\
|
|
2286 {\
|
|
2287 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
|
|
2288 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
|
|
2289 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
|
|
2290 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
|
|
2291 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
|
|
2292 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
|
|
2293 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
|
|
2294 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
|
|
2295 tmp+=tmpStride;\
|
|
2296 src+=srcStride;\
|
|
2297 }\
|
|
2298 tmp -= tmpStride*(h+5-2);\
|
|
2299 for(i=0; i<w; i++)\
|
|
2300 {\
|
|
2301 const int tmpB= tmp[-2*tmpStride];\
|
|
2302 const int tmpA= tmp[-1*tmpStride];\
|
|
2303 const int tmp0= tmp[0 *tmpStride];\
|
|
2304 const int tmp1= tmp[1 *tmpStride];\
|
|
2305 const int tmp2= tmp[2 *tmpStride];\
|
|
2306 const int tmp3= tmp[3 *tmpStride];\
|
|
2307 const int tmp4= tmp[4 *tmpStride];\
|
|
2308 const int tmp5= tmp[5 *tmpStride];\
|
|
2309 const int tmp6= tmp[6 *tmpStride];\
|
|
2310 const int tmp7= tmp[7 *tmpStride];\
|
|
2311 const int tmp8= tmp[8 *tmpStride];\
|
|
2312 const int tmp9= tmp[9 *tmpStride];\
|
|
2313 const int tmp10=tmp[10*tmpStride];\
|
|
2314 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
|
|
2315 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
|
|
2316 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
|
|
2317 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
|
|
2318 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
|
|
2319 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
|
|
2320 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
|
|
2321 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
|
|
2322 dst++;\
|
|
2323 tmp++;\
|
|
2324 }\
|
|
2325 }\
|
|
2326 \
|
|
2327 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
2328 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
|
|
2329 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
|
|
2330 src += 8*srcStride;\
|
|
2331 dst += 8*dstStride;\
|
|
2332 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
|
|
2333 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
|
|
2334 }\
|
|
2335 \
|
|
2336 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
|
|
2337 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
|
|
2338 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
|
|
2339 src += 8*srcStride;\
|
|
2340 dst += 8*dstStride;\
|
|
2341 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
|
|
2342 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
|
|
2343 }\
|
|
2344 \
|
|
2345 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
|
|
2346 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
|
|
2347 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
|
|
2348 src += 8*srcStride;\
|
|
2349 dst += 8*dstStride;\
|
|
2350 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
|
|
2351 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
|
|
2352 }\
|
|
2353
|
|
2354 #define H264_MC(OPNAME, SIZE) \
|
|
2355 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
|
|
2356 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
|
|
2357 }\
|
|
2358 \
|
|
2359 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2360 uint8_t half[SIZE*SIZE];\
|
|
2361 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
|
|
2362 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
|
|
2363 }\
|
|
2364 \
|
|
2365 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2366 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
|
|
2367 }\
|
|
2368 \
|
|
2369 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2370 uint8_t half[SIZE*SIZE];\
|
|
2371 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
|
|
2372 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
|
|
2373 }\
|
|
2374 \
|
|
2375 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2376 uint8_t full[SIZE*(SIZE+5)];\
|
|
2377 uint8_t * const full_mid= full + SIZE*2;\
|
|
2378 uint8_t half[SIZE*SIZE];\
|
|
2379 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
|
2380 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
|
|
2381 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
|
|
2382 }\
|
|
2383 \
|
|
2384 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2385 uint8_t full[SIZE*(SIZE+5)];\
|
|
2386 uint8_t * const full_mid= full + SIZE*2;\
|
|
2387 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
|
2388 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
|
|
2389 }\
|
|
2390 \
|
|
2391 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2392 uint8_t full[SIZE*(SIZE+5)];\
|
|
2393 uint8_t * const full_mid= full + SIZE*2;\
|
|
2394 uint8_t half[SIZE*SIZE];\
|
|
2395 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
|
2396 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
|
|
2397 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
|
|
2398 }\
|
|
2399 \
|
|
2400 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2401 uint8_t full[SIZE*(SIZE+5)];\
|
|
2402 uint8_t * const full_mid= full + SIZE*2;\
|
|
2403 uint8_t halfH[SIZE*SIZE];\
|
|
2404 uint8_t halfV[SIZE*SIZE];\
|
|
2405 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
|
|
2406 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
|
2407 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
|
2408 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
|
|
2409 }\
|
|
2410 \
|
|
2411 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2412 uint8_t full[SIZE*(SIZE+5)];\
|
|
2413 uint8_t * const full_mid= full + SIZE*2;\
|
|
2414 uint8_t halfH[SIZE*SIZE];\
|
|
2415 uint8_t halfV[SIZE*SIZE];\
|
|
2416 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
|
|
2417 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
|
|
2418 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
|
2419 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
|
|
2420 }\
|
|
2421 \
|
|
2422 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2423 uint8_t full[SIZE*(SIZE+5)];\
|
|
2424 uint8_t * const full_mid= full + SIZE*2;\
|
|
2425 uint8_t halfH[SIZE*SIZE];\
|
|
2426 uint8_t halfV[SIZE*SIZE];\
|
|
2427 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
|
|
2428 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
|
2429 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
|
2430 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
|
|
2431 }\
|
|
2432 \
|
|
2433 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2434 uint8_t full[SIZE*(SIZE+5)];\
|
|
2435 uint8_t * const full_mid= full + SIZE*2;\
|
|
2436 uint8_t halfH[SIZE*SIZE];\
|
|
2437 uint8_t halfV[SIZE*SIZE];\
|
|
2438 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
|
|
2439 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
|
|
2440 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
|
2441 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
|
|
2442 }\
|
|
2443 \
|
|
2444 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2445 int16_t tmp[SIZE*(SIZE+5)];\
|
|
2446 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
|
|
2447 }\
|
|
2448 \
|
|
2449 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2450 int16_t tmp[SIZE*(SIZE+5)];\
|
|
2451 uint8_t halfH[SIZE*SIZE];\
|
|
2452 uint8_t halfHV[SIZE*SIZE];\
|
|
2453 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
|
|
2454 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
|
|
2455 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
|
|
2456 }\
|
|
2457 \
|
|
2458 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2459 int16_t tmp[SIZE*(SIZE+5)];\
|
|
2460 uint8_t halfH[SIZE*SIZE];\
|
|
2461 uint8_t halfHV[SIZE*SIZE];\
|
|
2462 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
|
|
2463 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
|
|
2464 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
|
|
2465 }\
|
|
2466 \
|
|
2467 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2468 uint8_t full[SIZE*(SIZE+5)];\
|
|
2469 uint8_t * const full_mid= full + SIZE*2;\
|
|
2470 int16_t tmp[SIZE*(SIZE+5)];\
|
|
2471 uint8_t halfV[SIZE*SIZE];\
|
|
2472 uint8_t halfHV[SIZE*SIZE];\
|
|
2473 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
|
|
2474 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
|
2475 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
|
|
2476 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
|
|
2477 }\
|
|
2478 \
|
|
2479 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
|
|
2480 uint8_t full[SIZE*(SIZE+5)];\
|
|
2481 uint8_t * const full_mid= full + SIZE*2;\
|
|
2482 int16_t tmp[SIZE*(SIZE+5)];\
|
|
2483 uint8_t halfV[SIZE*SIZE];\
|
|
2484 uint8_t halfHV[SIZE*SIZE];\
|
|
2485 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
|
|
2486 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
|
|
2487 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
|
|
2488 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
|
|
2489 }\
|
|
2490
|
|
2491 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
|
|
2492 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
|
|
2493 #define op_put(a, b) a = cm[((b) + 16)>>5]
|
|
2494 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
|
|
2495 #define op2_put(a, b) a = cm[((b) + 512)>>10]
|
|
2496
|
|
2497 H264_LOWPASS(put_ , op_put, op2_put)
|
|
2498 H264_LOWPASS(avg_ , op_avg, op2_avg)
|
|
2499 H264_MC(put_, 2)
|
|
2500 H264_MC(put_, 4)
|
|
2501 H264_MC(put_, 8)
|
|
2502 H264_MC(put_, 16)
|
|
2503 H264_MC(avg_, 4)
|
|
2504 H264_MC(avg_, 8)
|
|
2505 H264_MC(avg_, 16)
|
|
2506
|
|
2507 #undef op_avg
|
|
2508 #undef op_put
|
|
2509 #undef op2_avg
|
|
2510 #undef op2_put
|
|
2511 #endif
|
|
2512
|
|
2513 #define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom )
|
|
2514 #define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
|
|
2515 #define H264_WEIGHT(W,H) \
|
|
2516 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
|
|
2517 int y; \
|
|
2518 offset <<= log2_denom; \
|
|
2519 if(log2_denom) offset += 1<<(log2_denom-1); \
|
|
2520 for(y=0; y<H; y++, block += stride){ \
|
|
2521 op_scale1(0); \
|
|
2522 op_scale1(1); \
|
|
2523 if(W==2) continue; \
|
|
2524 op_scale1(2); \
|
|
2525 op_scale1(3); \
|
|
2526 if(W==4) continue; \
|
|
2527 op_scale1(4); \
|
|
2528 op_scale1(5); \
|
|
2529 op_scale1(6); \
|
|
2530 op_scale1(7); \
|
|
2531 if(W==8) continue; \
|
|
2532 op_scale1(8); \
|
|
2533 op_scale1(9); \
|
|
2534 op_scale1(10); \
|
|
2535 op_scale1(11); \
|
|
2536 op_scale1(12); \
|
|
2537 op_scale1(13); \
|
|
2538 op_scale1(14); \
|
|
2539 op_scale1(15); \
|
|
2540 } \
|
|
2541 } \
|
|
2542 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
|
|
2543 int y; \
|
|
2544 offset = ((offset + 1) | 1) << log2_denom; \
|
|
2545 for(y=0; y<H; y++, dst += stride, src += stride){ \
|
|
2546 op_scale2(0); \
|
|
2547 op_scale2(1); \
|
|
2548 if(W==2) continue; \
|
|
2549 op_scale2(2); \
|
|
2550 op_scale2(3); \
|
|
2551 if(W==4) continue; \
|
|
2552 op_scale2(4); \
|
|
2553 op_scale2(5); \
|
|
2554 op_scale2(6); \
|
|
2555 op_scale2(7); \
|
|
2556 if(W==8) continue; \
|
|
2557 op_scale2(8); \
|
|
2558 op_scale2(9); \
|
|
2559 op_scale2(10); \
|
|
2560 op_scale2(11); \
|
|
2561 op_scale2(12); \
|
|
2562 op_scale2(13); \
|
|
2563 op_scale2(14); \
|
|
2564 op_scale2(15); \
|
|
2565 } \
|
|
2566 }
|
|
2567
|
|
2568 H264_WEIGHT(16,16)
|
|
2569 H264_WEIGHT(16,8)
|
|
2570 H264_WEIGHT(8,16)
|
|
2571 H264_WEIGHT(8,8)
|
|
2572 H264_WEIGHT(8,4)
|
|
2573 H264_WEIGHT(4,8)
|
|
2574 H264_WEIGHT(4,4)
|
|
2575 H264_WEIGHT(4,2)
|
|
2576 H264_WEIGHT(2,4)
|
|
2577 H264_WEIGHT(2,2)
|
|
2578
|
|
2579 #undef op_scale1
|
|
2580 #undef op_scale2
|
|
2581 #undef H264_WEIGHT
|
|
2582
|
|
2583 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
|
|
2584 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
2585 int i;
|
|
2586
|
|
2587 for(i=0; i<h; i++){
|
|
2588 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
|
|
2589 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
|
|
2590 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
|
|
2591 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
|
|
2592 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
|
|
2593 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
|
|
2594 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
|
|
2595 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
|
|
2596 dst+=dstStride;
|
|
2597 src+=srcStride;
|
|
2598 }
|
|
2599 }
|
|
2600
|
|
2601 #ifdef CONFIG_CAVS_DECODER
|
|
2602 /* AVS specific */
|
|
2603 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
|
|
2604
|
|
2605 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
|
|
2606 put_pixels8_c(dst, src, stride, 8);
|
|
2607 }
|
|
2608 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
|
|
2609 avg_pixels8_c(dst, src, stride, 8);
|
|
2610 }
|
|
2611 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
|
|
2612 put_pixels16_c(dst, src, stride, 16);
|
|
2613 }
|
|
2614 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
|
|
2615 avg_pixels16_c(dst, src, stride, 16);
|
|
2616 }
|
|
2617 #endif /* CONFIG_CAVS_DECODER */
|
|
2618
|
|
2619 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
|
|
2620 /* VC-1 specific */
|
|
2621 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
|
|
2622
|
|
2623 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
|
|
2624 put_pixels8_c(dst, src, stride, 8);
|
|
2625 }
|
|
2626 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
|
|
2627
|
|
2628 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
|
|
2629 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
2630 int i;
|
|
2631
|
|
2632 for(i=0; i<w; i++){
|
|
2633 const int src_1= src[ -srcStride];
|
|
2634 const int src0 = src[0 ];
|
|
2635 const int src1 = src[ srcStride];
|
|
2636 const int src2 = src[2*srcStride];
|
|
2637 const int src3 = src[3*srcStride];
|
|
2638 const int src4 = src[4*srcStride];
|
|
2639 const int src5 = src[5*srcStride];
|
|
2640 const int src6 = src[6*srcStride];
|
|
2641 const int src7 = src[7*srcStride];
|
|
2642 const int src8 = src[8*srcStride];
|
|
2643 const int src9 = src[9*srcStride];
|
|
2644 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
|
|
2645 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
|
|
2646 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
|
|
2647 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
|
|
2648 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
|
|
2649 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
|
|
2650 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
|
|
2651 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
|
|
2652 src++;
|
|
2653 dst++;
|
|
2654 }
|
|
2655 }
|
|
2656
|
|
2657 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
|
|
2658 put_pixels8_c(dst, src, stride, 8);
|
|
2659 }
|
|
2660
|
|
2661 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
|
|
2662 uint8_t half[64];
|
|
2663 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
|
|
2664 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
|
|
2665 }
|
|
2666
|
|
2667 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
|
|
2668 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
|
|
2669 }
|
|
2670
|
|
2671 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
|
|
2672 uint8_t half[64];
|
|
2673 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
|
|
2674 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
|
|
2675 }
|
|
2676
|
|
2677 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
|
|
2678 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
|
|
2679 }
|
|
2680
|
|
2681 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
|
|
2682 uint8_t halfH[88];
|
|
2683 uint8_t halfV[64];
|
|
2684 uint8_t halfHV[64];
|
|
2685 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
|
|
2686 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
|
|
2687 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
|
|
2688 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
|
|
2689 }
|
|
2690 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
|
|
2691 uint8_t halfH[88];
|
|
2692 uint8_t halfV[64];
|
|
2693 uint8_t halfHV[64];
|
|
2694 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
|
|
2695 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
|
|
2696 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
|
|
2697 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
|
|
2698 }
|
|
2699 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
|
|
2700 uint8_t halfH[88];
|
|
2701 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
|
|
2702 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
|
|
2703 }
|
|
2704
|
|
2705 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
2706 {
|
|
2707 int s, i;
|
|
2708
|
|
2709 s = 0;
|
|
2710 for(i=0;i<h;i++) {
|
|
2711 s += abs(pix1[0] - pix2[0]);
|
|
2712 s += abs(pix1[1] - pix2[1]);
|
|
2713 s += abs(pix1[2] - pix2[2]);
|
|
2714 s += abs(pix1[3] - pix2[3]);
|
|
2715 s += abs(pix1[4] - pix2[4]);
|
|
2716 s += abs(pix1[5] - pix2[5]);
|
|
2717 s += abs(pix1[6] - pix2[6]);
|
|
2718 s += abs(pix1[7] - pix2[7]);
|
|
2719 s += abs(pix1[8] - pix2[8]);
|
|
2720 s += abs(pix1[9] - pix2[9]);
|
|
2721 s += abs(pix1[10] - pix2[10]);
|
|
2722 s += abs(pix1[11] - pix2[11]);
|
|
2723 s += abs(pix1[12] - pix2[12]);
|
|
2724 s += abs(pix1[13] - pix2[13]);
|
|
2725 s += abs(pix1[14] - pix2[14]);
|
|
2726 s += abs(pix1[15] - pix2[15]);
|
|
2727 pix1 += line_size;
|
|
2728 pix2 += line_size;
|
|
2729 }
|
|
2730 return s;
|
|
2731 }
|
|
2732
|
|
2733 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
2734 {
|
|
2735 int s, i;
|
|
2736
|
|
2737 s = 0;
|
|
2738 for(i=0;i<h;i++) {
|
|
2739 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
|
|
2740 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
|
|
2741 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
|
|
2742 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
|
|
2743 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
|
|
2744 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
|
|
2745 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
|
|
2746 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
|
|
2747 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
|
|
2748 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
|
|
2749 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
|
|
2750 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
|
|
2751 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
|
|
2752 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
|
|
2753 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
|
|
2754 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
|
|
2755 pix1 += line_size;
|
|
2756 pix2 += line_size;
|
|
2757 }
|
|
2758 return s;
|
|
2759 }
|
|
2760
|
|
2761 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
2762 {
|
|
2763 int s, i;
|
|
2764 uint8_t *pix3 = pix2 + line_size;
|
|
2765
|
|
2766 s = 0;
|
|
2767 for(i=0;i<h;i++) {
|
|
2768 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
|
|
2769 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
|
|
2770 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
|
|
2771 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
|
|
2772 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
|
|
2773 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
|
|
2774 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
|
|
2775 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
|
|
2776 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
|
|
2777 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
|
|
2778 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
|
|
2779 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
|
|
2780 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
|
|
2781 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
|
|
2782 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
|
|
2783 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
|
|
2784 pix1 += line_size;
|
|
2785 pix2 += line_size;
|
|
2786 pix3 += line_size;
|
|
2787 }
|
|
2788 return s;
|
|
2789 }
|
|
2790
|
|
2791 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
2792 {
|
|
2793 int s, i;
|
|
2794 uint8_t *pix3 = pix2 + line_size;
|
|
2795
|
|
2796 s = 0;
|
|
2797 for(i=0;i<h;i++) {
|
|
2798 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
|
|
2799 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
|
|
2800 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
|
|
2801 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
|
|
2802 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
|
|
2803 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
|
|
2804 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
|
|
2805 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
|
|
2806 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
|
|
2807 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
|
|
2808 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
|
|
2809 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
|
|
2810 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
|
|
2811 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
|
|
2812 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
|
|
2813 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
|
|
2814 pix1 += line_size;
|
|
2815 pix2 += line_size;
|
|
2816 pix3 += line_size;
|
|
2817 }
|
|
2818 return s;
|
|
2819 }
|
|
2820
|
|
2821 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
2822 {
|
|
2823 int s, i;
|
|
2824
|
|
2825 s = 0;
|
|
2826 for(i=0;i<h;i++) {
|
|
2827 s += abs(pix1[0] - pix2[0]);
|
|
2828 s += abs(pix1[1] - pix2[1]);
|
|
2829 s += abs(pix1[2] - pix2[2]);
|
|
2830 s += abs(pix1[3] - pix2[3]);
|
|
2831 s += abs(pix1[4] - pix2[4]);
|
|
2832 s += abs(pix1[5] - pix2[5]);
|
|
2833 s += abs(pix1[6] - pix2[6]);
|
|
2834 s += abs(pix1[7] - pix2[7]);
|
|
2835 pix1 += line_size;
|
|
2836 pix2 += line_size;
|
|
2837 }
|
|
2838 return s;
|
|
2839 }
|
|
2840
|
|
2841 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
2842 {
|
|
2843 int s, i;
|
|
2844
|
|
2845 s = 0;
|
|
2846 for(i=0;i<h;i++) {
|
|
2847 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
|
|
2848 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
|
|
2849 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
|
|
2850 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
|
|
2851 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
|
|
2852 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
|
|
2853 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
|
|
2854 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
|
|
2855 pix1 += line_size;
|
|
2856 pix2 += line_size;
|
|
2857 }
|
|
2858 return s;
|
|
2859 }
|
|
2860
|
|
2861 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
2862 {
|
|
2863 int s, i;
|
|
2864 uint8_t *pix3 = pix2 + line_size;
|
|
2865
|
|
2866 s = 0;
|
|
2867 for(i=0;i<h;i++) {
|
|
2868 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
|
|
2869 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
|
|
2870 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
|
|
2871 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
|
|
2872 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
|
|
2873 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
|
|
2874 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
|
|
2875 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
|
|
2876 pix1 += line_size;
|
|
2877 pix2 += line_size;
|
|
2878 pix3 += line_size;
|
|
2879 }
|
|
2880 return s;
|
|
2881 }
|
|
2882
|
|
2883 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
2884 {
|
|
2885 int s, i;
|
|
2886 uint8_t *pix3 = pix2 + line_size;
|
|
2887
|
|
2888 s = 0;
|
|
2889 for(i=0;i<h;i++) {
|
|
2890 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
|
|
2891 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
|
|
2892 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
|
|
2893 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
|
|
2894 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
|
|
2895 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
|
|
2896 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
|
|
2897 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
|
|
2898 pix1 += line_size;
|
|
2899 pix2 += line_size;
|
|
2900 pix3 += line_size;
|
|
2901 }
|
|
2902 return s;
|
|
2903 }
|
|
2904
|
|
2905 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
|
|
2906 int i;
|
|
2907 unsigned int sum=0;
|
|
2908
|
|
2909 for(i=0; i<8*8; i++){
|
|
2910 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
|
|
2911 int w= weight[i];
|
|
2912 b>>= RECON_SHIFT;
|
|
2913 assert(-512<b && b<512);
|
|
2914
|
|
2915 sum += (w*b)*(w*b)>>4;
|
|
2916 }
|
|
2917 return sum>>2;
|
|
2918 }
|
|
2919
|
|
2920 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
|
|
2921 int i;
|
|
2922
|
|
2923 for(i=0; i<8*8; i++){
|
|
2924 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
|
|
2925 }
|
|
2926 }
|
|
2927
|
|
2928 /**
|
|
2929 * permutes an 8x8 block.
|
|
2930 * @param block the block which will be permuted according to the given permutation vector
|
|
2931 * @param permutation the permutation vector
|
|
2932 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
|
|
2933 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
|
|
2934 * (inverse) permutated to scantable order!
|
|
2935 */
|
|
2936 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
|
|
2937 {
|
|
2938 int i;
|
|
2939 DCTELEM temp[64];
|
|
2940
|
|
2941 if(last<=0) return;
|
|
2942 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
|
|
2943
|
|
2944 for(i=0; i<=last; i++){
|
|
2945 const int j= scantable[i];
|
|
2946 temp[j]= block[j];
|
|
2947 block[j]=0;
|
|
2948 }
|
|
2949
|
|
2950 for(i=0; i<=last; i++){
|
|
2951 const int j= scantable[i];
|
|
2952 const int perm_j= permutation[j];
|
|
2953 block[perm_j]= temp[j];
|
|
2954 }
|
|
2955 }
|
|
2956
|
|
2957 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
|
|
2958 return 0;
|
|
2959 }
|
|
2960
|
|
2961 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
|
|
2962 int i;
|
|
2963
|
|
2964 memset(cmp, 0, sizeof(void*)*5);
|
|
2965
|
|
2966 for(i=0; i<5; i++){
|
|
2967 switch(type&0xFF){
|
|
2968 case FF_CMP_SAD:
|
|
2969 cmp[i]= c->sad[i];
|
|
2970 break;
|
|
2971 case FF_CMP_SATD:
|
|
2972 cmp[i]= c->hadamard8_diff[i];
|
|
2973 break;
|
|
2974 case FF_CMP_SSE:
|
|
2975 cmp[i]= c->sse[i];
|
|
2976 break;
|
|
2977 case FF_CMP_DCT:
|
|
2978 cmp[i]= c->dct_sad[i];
|
|
2979 break;
|
|
2980 case FF_CMP_DCT264:
|
|
2981 cmp[i]= c->dct264_sad[i];
|
|
2982 break;
|
|
2983 case FF_CMP_DCTMAX:
|
|
2984 cmp[i]= c->dct_max[i];
|
|
2985 break;
|
|
2986 case FF_CMP_PSNR:
|
|
2987 cmp[i]= c->quant_psnr[i];
|
|
2988 break;
|
|
2989 case FF_CMP_BIT:
|
|
2990 cmp[i]= c->bit[i];
|
|
2991 break;
|
|
2992 case FF_CMP_RD:
|
|
2993 cmp[i]= c->rd[i];
|
|
2994 break;
|
|
2995 case FF_CMP_VSAD:
|
|
2996 cmp[i]= c->vsad[i];
|
|
2997 break;
|
|
2998 case FF_CMP_VSSE:
|
|
2999 cmp[i]= c->vsse[i];
|
|
3000 break;
|
|
3001 case FF_CMP_ZERO:
|
|
3002 cmp[i]= zero_cmp;
|
|
3003 break;
|
|
3004 case FF_CMP_NSSE:
|
|
3005 cmp[i]= c->nsse[i];
|
|
3006 break;
|
|
3007 #ifdef CONFIG_SNOW_ENCODER
|
|
3008 case FF_CMP_W53:
|
|
3009 cmp[i]= c->w53[i];
|
|
3010 break;
|
|
3011 case FF_CMP_W97:
|
|
3012 cmp[i]= c->w97[i];
|
|
3013 break;
|
|
3014 #endif
|
|
3015 default:
|
|
3016 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
|
|
3017 }
|
|
3018 }
|
|
3019 }
|
|
3020
|
|
3021 /**
|
|
3022 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
|
|
3023 */
|
|
3024 static void clear_blocks_c(DCTELEM *blocks)
|
|
3025 {
|
|
3026 memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
|
3027 }
|
|
3028
|
|
3029 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
|
|
3030 int i;
|
|
3031 for(i=0; i+7<w; i+=8){
|
|
3032 dst[i+0] += src[i+0];
|
|
3033 dst[i+1] += src[i+1];
|
|
3034 dst[i+2] += src[i+2];
|
|
3035 dst[i+3] += src[i+3];
|
|
3036 dst[i+4] += src[i+4];
|
|
3037 dst[i+5] += src[i+5];
|
|
3038 dst[i+6] += src[i+6];
|
|
3039 dst[i+7] += src[i+7];
|
|
3040 }
|
|
3041 for(; i<w; i++)
|
|
3042 dst[i+0] += src[i+0];
|
|
3043 }
|
|
3044
|
|
3045 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
|
|
3046 int i;
|
|
3047 for(i=0; i+7<w; i+=8){
|
|
3048 dst[i+0] = src1[i+0]-src2[i+0];
|
|
3049 dst[i+1] = src1[i+1]-src2[i+1];
|
|
3050 dst[i+2] = src1[i+2]-src2[i+2];
|
|
3051 dst[i+3] = src1[i+3]-src2[i+3];
|
|
3052 dst[i+4] = src1[i+4]-src2[i+4];
|
|
3053 dst[i+5] = src1[i+5]-src2[i+5];
|
|
3054 dst[i+6] = src1[i+6]-src2[i+6];
|
|
3055 dst[i+7] = src1[i+7]-src2[i+7];
|
|
3056 }
|
|
3057 for(; i<w; i++)
|
|
3058 dst[i+0] = src1[i+0]-src2[i+0];
|
|
3059 }
|
|
3060
|
|
3061 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
|
|
3062 int i;
|
|
3063 uint8_t l, lt;
|
|
3064
|
|
3065 l= *left;
|
|
3066 lt= *left_top;
|
|
3067
|
|
3068 for(i=0; i<w; i++){
|
|
3069 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
|
|
3070 lt= src1[i];
|
|
3071 l= src2[i];
|
|
3072 dst[i]= l - pred;
|
|
3073 }
|
|
3074
|
|
3075 *left= l;
|
|
3076 *left_top= lt;
|
|
3077 }
|
|
3078
|
|
3079 #define BUTTERFLY2(o1,o2,i1,i2) \
|
|
3080 o1= (i1)+(i2);\
|
|
3081 o2= (i1)-(i2);
|
|
3082
|
|
3083 #define BUTTERFLY1(x,y) \
|
|
3084 {\
|
|
3085 int a,b;\
|
|
3086 a= x;\
|
|
3087 b= y;\
|
|
3088 x= a+b;\
|
|
3089 y= a-b;\
|
|
3090 }
|
|
3091
|
|
3092 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
|
|
3093
|
|
3094 #define DCT8_1D {\
|
|
3095 const int s07 = SRC(0) + SRC(7);\
|
|
3096 const int s16 = SRC(1) + SRC(6);\
|
|
3097 const int s25 = SRC(2) + SRC(5);\
|
|
3098 const int s34 = SRC(3) + SRC(4);\
|
|
3099 const int a0 = s07 + s34;\
|
|
3100 const int a1 = s16 + s25;\
|
|
3101 const int a2 = s07 - s34;\
|
|
3102 const int a3 = s16 - s25;\
|
|
3103 const int d07 = SRC(0) - SRC(7);\
|
|
3104 const int d16 = SRC(1) - SRC(6);\
|
|
3105 const int d25 = SRC(2) - SRC(5);\
|
|
3106 const int d34 = SRC(3) - SRC(4);\
|
|
3107 const int a4 = d16 + d25 + (d07 + (d07>>1));\
|
|
3108 const int a5 = d07 - d34 - (d25 + (d25>>1));\
|
|
3109 const int a6 = d07 + d34 - (d16 + (d16>>1));\
|
|
3110 const int a7 = d16 - d25 + (d34 + (d34>>1));\
|
|
3111 DST(0, a0 + a1 ) ;\
|
|
3112 DST(1, a4 + (a7>>2)) ;\
|
|
3113 DST(2, a2 + (a3>>1)) ;\
|
|
3114 DST(3, a5 + (a6>>2)) ;\
|
|
3115 DST(4, a0 - a1 ) ;\
|
|
3116 DST(5, a6 - (a5>>2)) ;\
|
|
3117 DST(6, (a2>>1) - a3 ) ;\
|
|
3118 DST(7, (a4>>2) - a7 ) ;\
|
|
3119 }
|
|
3120
|
|
3121 static void vector_fmul_c(float *dst, const float *src, int len){
|
|
3122 int i;
|
|
3123 for(i=0; i<len; i++)
|
|
3124 dst[i] *= src[i];
|
|
3125 }
|
|
3126
|
|
3127 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
|
|
3128 int i;
|
|
3129 src1 += len-1;
|
|
3130 for(i=0; i<len; i++)
|
|
3131 dst[i] = src0[i] * src1[-i];
|
|
3132 }
|
|
3133
|
|
3134 void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){
|
|
3135 int i;
|
|
3136 for(i=0; i<len; i++)
|
|
3137 dst[i*step] = src0[i] * src1[i] + src2[i] + src3;
|
|
3138 }
|
|
3139
|
|
3140 void ff_float_to_int16_c(int16_t *dst, const float *src, int len){
|
|
3141 int i;
|
|
3142 for(i=0; i<len; i++) {
|
|
3143 int_fast32_t tmp = ((int32_t*)src)[i];
|
|
3144 if(tmp & 0xf0000){
|
|
3145 tmp = (0x43c0ffff - tmp)>>31;
|
|
3146 // is this faster on some gcc/cpu combinations?
|
|
3147 // if(tmp > 0x43c0ffff) tmp = 0xFFFF;
|
|
3148 // else tmp = 0;
|
|
3149 }
|
|
3150 dst[i] = tmp - 0x8000;
|
|
3151 }
|
|
3152 }
|
|
3153
|
|
3154 /* XXX: those functions should be suppressed ASAP when all IDCTs are
|
|
3155 converted */
|
|
3156 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
|
|
3157 {
|
|
3158 j_rev_dct (block);
|
|
3159 put_pixels_clamped_c(block, dest, line_size);
|
|
3160 }
|
|
3161 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
|
|
3162 {
|
|
3163 j_rev_dct (block);
|
|
3164 add_pixels_clamped_c(block, dest, line_size);
|
|
3165 }
|
|
3166
|
|
3167 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
|
|
3168 {
|
|
3169 j_rev_dct4 (block);
|
|
3170 put_pixels_clamped4_c(block, dest, line_size);
|
|
3171 }
|
|
3172 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
|
|
3173 {
|
|
3174 j_rev_dct4 (block);
|
|
3175 add_pixels_clamped4_c(block, dest, line_size);
|
|
3176 }
|
|
3177
|
|
3178 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
|
|
3179 {
|
|
3180 j_rev_dct2 (block);
|
|
3181 put_pixels_clamped2_c(block, dest, line_size);
|
|
3182 }
|
|
3183 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
|
|
3184 {
|
|
3185 j_rev_dct2 (block);
|
|
3186 add_pixels_clamped2_c(block, dest, line_size);
|
|
3187 }
|
|
3188
|
|
3189 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
|
|
3190 {
|
|
3191 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
3192
|
|
3193 dest[0] = cm[(block[0] + 4)>>3];
|
|
3194 }
|
|
3195 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
|
|
3196 {
|
|
3197 uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
|
3198
|
|
3199 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
|
|
3200 }
|
|
3201
|
|
3202 static void just_return() { return; }
|
|
3203
|
808
|
3204 /* init static data */
|
|
3205 void dsputil_static_init(void)
|
|
3206 {
|
|
3207 int i;
|
|
3208
|
|
3209 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
|
|
3210 for(i=0;i<MAX_NEG_CROP;i++) {
|
|
3211 cropTbl[i] = 0;
|
|
3212 cropTbl[i + MAX_NEG_CROP + 256] = 255;
|
|
3213 }
|
832
|
3214
|
808
|
3215 for(i=0;i<512;i++) {
|
|
3216 squareTbl[i] = (i - 256) * (i - 256);
|
|
3217 }
|
832
|
3218
|
808
|
3219 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
|
|
3220 }
|
832
|
3221
|
|
3222
|
|
3223 void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|
3224 {
|
|
3225 int i;
|
|
3226
|
|
3227 #ifdef CONFIG_ENCODERS
|
|
3228 if(avctx->dct_algo==FF_DCT_FASTINT) {
|
|
3229 c->fdct = fdct_ifast;
|
|
3230 c->fdct248 = fdct_ifast248;
|
|
3231 }
|
|
3232 else if(avctx->dct_algo==FF_DCT_FAAN) {
|
|
3233 c->fdct = ff_faandct;
|
|
3234 c->fdct248 = ff_faandct248;
|
|
3235 }
|
|
3236 else {
|
|
3237 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
|
|
3238 c->fdct248 = ff_fdct248_islow;
|
|
3239 }
|
|
3240 #endif //CONFIG_ENCODERS
|
|
3241
|
|
3242 c->get_pixels = get_pixels_c;
|
|
3243 c->diff_pixels = diff_pixels_c;
|
|
3244 c->put_pixels_clamped = put_pixels_clamped_c;
|
|
3245 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
|
|
3246 c->add_pixels_clamped = add_pixels_clamped_c;
|
|
3247 c->add_pixels8 = add_pixels8_c;
|
|
3248 c->add_pixels4 = add_pixels4_c;
|
|
3249 c->gmc1 = gmc1_c;
|
|
3250 c->gmc = ff_gmc_c;
|
|
3251 c->clear_blocks = clear_blocks_c;
|
|
3252 c->pix_sum = pix_sum_c;
|
|
3253 c->pix_norm1 = pix_norm1_c;
|
|
3254
|
|
3255 /* TODO [0] 16 [1] 8 */
|
|
3256 c->pix_abs[0][0] = pix_abs16_c;
|
|
3257 c->pix_abs[0][1] = pix_abs16_x2_c;
|
|
3258 c->pix_abs[0][2] = pix_abs16_y2_c;
|
|
3259 c->pix_abs[0][3] = pix_abs16_xy2_c;
|
|
3260 c->pix_abs[1][0] = pix_abs8_c;
|
|
3261 c->pix_abs[1][1] = pix_abs8_x2_c;
|
|
3262 c->pix_abs[1][2] = pix_abs8_y2_c;
|
|
3263 c->pix_abs[1][3] = pix_abs8_xy2_c;
|
|
3264
|
|
3265 #define dspfunc(PFX, IDX, NUM) \
|
|
3266 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
|
|
3267 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
|
|
3268 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
|
|
3269 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
|
|
3270
|
|
3271 dspfunc(put, 0, 16);
|
|
3272 dspfunc(put_no_rnd, 0, 16);
|
|
3273 dspfunc(put, 1, 8);
|
|
3274 dspfunc(put_no_rnd, 1, 8);
|
|
3275 dspfunc(put, 2, 4);
|
|
3276 dspfunc(put, 3, 2);
|
|
3277
|
|
3278 dspfunc(avg, 0, 16);
|
|
3279 dspfunc(avg_no_rnd, 0, 16);
|
|
3280 dspfunc(avg, 1, 8);
|
|
3281 dspfunc(avg_no_rnd, 1, 8);
|
|
3282 dspfunc(avg, 2, 4);
|
|
3283 dspfunc(avg, 3, 2);
|
|
3284 #undef dspfunc
|
|
3285
|
|
3286 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
|
|
3287 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
|
|
3288
|
|
3289 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
|
|
3290 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
|
|
3291 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
|
|
3292 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
|
|
3293 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
|
|
3294 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
|
|
3295 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
|
|
3296 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
|
|
3297 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
|
|
3298
|
|
3299 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
|
|
3300 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
|
|
3301 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
|
|
3302 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
|
|
3303 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
|
|
3304 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
|
|
3305 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
|
|
3306 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
|
|
3307 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
|
|
3308
|
|
3309 #define dspfunc(PFX, IDX, NUM) \
|
|
3310 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
|
|
3311 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
|
|
3312 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
|
|
3313 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
|
|
3314 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
|
|
3315 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
|
|
3316 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
|
|
3317 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
|
|
3318 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
|
|
3319 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
|
|
3320 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
|
|
3321 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
|
|
3322 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
|
|
3323 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
|
|
3324 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
|
|
3325 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
|
|
3326
|
|
3327 dspfunc(put_qpel, 0, 16);
|
|
3328 dspfunc(put_no_rnd_qpel, 0, 16);
|
|
3329
|
|
3330 dspfunc(avg_qpel, 0, 16);
|
|
3331 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
|
|
3332
|
|
3333 dspfunc(put_qpel, 1, 8);
|
|
3334 dspfunc(put_no_rnd_qpel, 1, 8);
|
|
3335
|
|
3336 dspfunc(avg_qpel, 1, 8);
|
|
3337 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
|
|
3338
|
|
3339 dspfunc(put_h264_qpel, 0, 16);
|
|
3340 dspfunc(put_h264_qpel, 1, 8);
|
|
3341 dspfunc(put_h264_qpel, 2, 4);
|
|
3342 dspfunc(put_h264_qpel, 3, 2);
|
|
3343 dspfunc(avg_h264_qpel, 0, 16);
|
|
3344 dspfunc(avg_h264_qpel, 1, 8);
|
|
3345 dspfunc(avg_h264_qpel, 2, 4);
|
|
3346
|
|
3347 #undef dspfunc
|
|
3348 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
|
|
3349 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
|
|
3350 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
|
|
3351 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
|
|
3352 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
|
|
3353 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
|
|
3354 c->put_no_rnd_h264_chroma_pixels_tab[0]= put_no_rnd_h264_chroma_mc8_c;
|
|
3355
|
|
3356 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
|
|
3357 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
|
|
3358 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
|
|
3359 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
|
|
3360 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
|
|
3361 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
|
|
3362 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
|
|
3363 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
|
|
3364 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
|
|
3365 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
|
|
3366 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
|
|
3367 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
|
|
3368 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
|
|
3369 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
|
|
3370 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
|
|
3371 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
|
|
3372 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
|
|
3373 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
|
|
3374 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
|
|
3375 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
|
|
3376
|
|
3377 #ifdef CONFIG_CAVS_DECODER
|
|
3378 ff_cavsdsp_init(c,avctx);
|
|
3379 #endif
|
|
3380 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
|
|
3381 ff_vc1dsp_init(c,avctx);
|
|
3382 #endif
|
|
3383
|
|
3384 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
|
|
3385 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
|
|
3386 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
|
|
3387 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
|
|
3388 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
|
|
3389 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
|
|
3390 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
|
|
3391 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
|
|
3392
|
|
3393 #define SET_CMP_FUNC(name) \
|
|
3394 c->name[0]= name ## 16_c;\
|
|
3395 c->name[1]= name ## 8x8_c;
|
|
3396
|
|
3397 c->add_bytes= add_bytes_c;
|
|
3398 c->diff_bytes= diff_bytes_c;
|
|
3399 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
|
|
3400 c->bswap_buf= bswap_buf;
|
|
3401
|
|
3402 c->try_8x8basis= try_8x8basis_c;
|
|
3403 c->add_8x8basis= add_8x8basis_c;
|
|
3404
|
|
3405 #ifdef CONFIG_SNOW_ENCODER
|
|
3406 c->vertical_compose97i = ff_snow_vertical_compose97i;
|
|
3407 c->horizontal_compose97i = ff_snow_horizontal_compose97i;
|
|
3408 c->inner_add_yblock = ff_snow_inner_add_yblock;
|
|
3409 #endif
|
|
3410
|
|
3411 #ifdef CONFIG_VORBIS_DECODER
|
|
3412 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
|
|
3413 #endif
|
|
3414 c->vector_fmul = vector_fmul_c;
|
|
3415 c->vector_fmul_reverse = vector_fmul_reverse_c;
|
|
3416 c->vector_fmul_add_add = ff_vector_fmul_add_add_c;
|
|
3417 c->float_to_int16 = ff_float_to_int16_c;
|
|
3418
|
|
3419 c->prefetch= just_return;
|
|
3420
|
|
3421 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
|
|
3422 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
|
|
3423
|
|
3424 #ifdef HAVE_MMX
|
|
3425 dsputil_init_mmx(c, avctx);
|
|
3426 #endif
|
|
3427 #ifdef ARCH_ARMV4L
|
|
3428 dsputil_init_armv4l(c, avctx);
|
|
3429 #endif
|
|
3430 #ifdef HAVE_MLIB
|
|
3431 dsputil_init_mlib(c, avctx);
|
|
3432 #endif
|
|
3433 #ifdef ARCH_SPARC
|
|
3434 dsputil_init_vis(c,avctx);
|
|
3435 #endif
|
|
3436 #ifdef ARCH_ALPHA
|
|
3437 dsputil_init_alpha(c, avctx);
|
|
3438 #endif
|
|
3439 #ifdef ARCH_POWERPC
|
|
3440 dsputil_init_ppc(c, avctx);
|
|
3441 #endif
|
|
3442 #ifdef HAVE_MMI
|
|
3443 dsputil_init_mmi(c, avctx);
|
|
3444 #endif
|
|
3445 #ifdef ARCH_SH4
|
|
3446 dsputil_init_sh4(c,avctx);
|
|
3447 #endif
|
|
3448 #ifdef ARCH_BFIN
|
|
3449 dsputil_init_bfin(c,avctx);
|
|
3450 #endif
|
|
3451
|
|
3452 for(i=0; i<64; i++){
|
|
3453 if(!c->put_2tap_qpel_pixels_tab[0][i])
|
|
3454 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
|
|
3455 if(!c->avg_2tap_qpel_pixels_tab[0][i])
|
|
3456 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
|
|
3457 }
|
|
3458
|
|
3459 switch(c->idct_permutation_type){
|
|
3460 case FF_NO_IDCT_PERM:
|
|
3461 for(i=0; i<64; i++)
|
|
3462 c->idct_permutation[i]= i;
|
|
3463 break;
|
|
3464 case FF_LIBMPEG2_IDCT_PERM:
|
|
3465 for(i=0; i<64; i++)
|
|
3466 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
|
3467 break;
|
|
3468 case FF_SIMPLE_IDCT_PERM:
|
|
3469 for(i=0; i<64; i++)
|
|
3470 c->idct_permutation[i]= simple_mmx_permutation[i];
|
|
3471 break;
|
|
3472 case FF_TRANSPOSE_IDCT_PERM:
|
|
3473 for(i=0; i<64; i++)
|
|
3474 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
|
|
3475 break;
|
|
3476 case FF_PARTTRANS_IDCT_PERM:
|
|
3477 for(i=0; i<64; i++)
|
|
3478 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
|
|
3479 break;
|
|
3480 default:
|
|
3481 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
|
|
3482 }
|
|
3483 }
|
|
3484
|