Mercurial > libavcodec.hg
annotate dct-test.c @ 4714:fc70a43a4f01 libavcodec
Fix H.264 8x8 transform selection
It seems that it's opt parse bug, it can't process the flags which start by
digit. After change 8x8dct to dct8x8, I can set it without problem. I guess
nobody will use it since it can't work as expected, so the quick fix is change
the option name.
Patch by Limin Wang lance dot lmwang at gmail com
author | mru |
---|---|
date | Sat, 24 Mar 2007 12:07:07 +0000 |
parents | bbe0bc387a19 |
children | d49f8b3e8c4a |
rev | line source |
---|---|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
1 /* |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
2 * (c) 2001 Fabrice Bellard |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
3 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
14 * Lesser General Public License for more details. |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
15 * |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
19 */ |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
3398
diff
changeset
|
20 |
1106 | 21 /** |
22 * @file dct-test.c | |
2967 | 23 * DCT test. (c) 2001 Fabrice Bellard. |
1106 | 24 * Started from sample code by Juan J. Sierralta P. |
25 */ | |
26 | |
0 | 27 #include <stdlib.h> |
28 #include <stdio.h> | |
29 #include <string.h> | |
30 #include <sys/time.h> | |
31 #include <unistd.h> | |
32 | |
33 #include "dsputil.h" | |
34 | |
633 | 35 #include "simple_idct.h" |
1557 | 36 #include "faandct.h" |
33 | 37 |
980 | 38 #ifndef MAX |
39 #define MAX(a, b) (((a) > (b)) ? (a) : (b)) | |
40 #endif | |
41 | |
2872 | 42 #undef printf |
43 | |
44 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);}; | |
45 | |
33 | 46 /* reference fdct/idct */ |
0 | 47 extern void fdct(DCTELEM *block); |
33 | 48 extern void idct(DCTELEM *block); |
2872 | 49 extern void ff_idct_xvid_mmx(DCTELEM *block); |
50 extern void ff_idct_xvid_mmx2(DCTELEM *block); | |
0 | 51 extern void init_fdct(); |
52 | |
33 | 53 extern void j_rev_dct(DCTELEM *data); |
54 extern void ff_mmx_idct(DCTELEM *data); | |
55 extern void ff_mmxext_idct(DCTELEM *data); | |
56 | |
633 | 57 extern void odivx_idct_c (short *block); |
58 | |
0 | 59 #define AANSCALE_BITS 12 |
60 static const unsigned short aanscales[64] = { | |
61 /* precomputed values scaled up by 14 bits */ | |
62 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |
63 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, | |
64 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, | |
65 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, | |
66 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | |
67 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, | |
68 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, | |
69 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 | |
70 }; | |
71 | |
4197 | 72 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; |
633 | 73 |
1064 | 74 int64_t gettime(void) |
0 | 75 { |
76 struct timeval tv; | |
77 gettimeofday(&tv,NULL); | |
1064 | 78 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; |
0 | 79 } |
80 | |
81 #define NB_ITS 20000 | |
82 #define NB_ITS_SPEED 50000 | |
83 | |
33 | 84 static short idct_mmx_perm[64]; |
85 | |
633 | 86 static short idct_simple_mmx_perm[64]={ |
2979 | 87 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
88 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
89 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
90 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
91 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
92 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
93 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
94 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
633 | 95 }; |
96 | |
33 | 97 void idct_mmx_init(void) |
98 { | |
99 int i; | |
100 | |
101 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ | |
102 for (i = 0; i < 64; i++) { | |
2979 | 103 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); |
104 // idct_simple_mmx_perm[i] = simple_block_permute_op(i); | |
33 | 105 } |
106 } | |
107 | |
108 static DCTELEM block[64] __attribute__ ((aligned (8))); | |
109 static DCTELEM block1[64] __attribute__ ((aligned (8))); | |
633 | 110 static DCTELEM block_org[64] __attribute__ ((aligned (8))); |
33 | 111 |
112 void dct_error(const char *name, int is_idct, | |
113 void (*fdct_func)(DCTELEM *block), | |
633 | 114 void (*fdct_ref)(DCTELEM *block), int test) |
0 | 115 { |
116 int it, i, scale; | |
117 int err_inf, v; | |
1064 | 118 int64_t err2, ti, ti1, it1; |
119 int64_t sysErr[64], sysErrMax=0; | |
633 | 120 int maxout=0; |
121 int blockSumErrMax=0, blockSumErr; | |
0 | 122 |
123 srandom(0); | |
124 | |
125 err_inf = 0; | |
126 err2 = 0; | |
633 | 127 for(i=0; i<64; i++) sysErr[i]=0; |
0 | 128 for(it=0;it<NB_ITS;it++) { |
633 | 129 for(i=0;i<64;i++) |
130 block1[i] = 0; | |
131 switch(test){ | |
2967 | 132 case 0: |
633 | 133 for(i=0;i<64;i++) |
134 block1[i] = (random() % 512) -256; | |
635 | 135 if (is_idct){ |
633 | 136 fdct(block1); |
635 | 137 |
138 for(i=0;i<64;i++) | |
139 block1[i]>>=3; | |
140 } | |
633 | 141 break; |
142 case 1:{ | |
143 int num= (random()%10)+1; | |
144 for(i=0;i<num;i++) | |
145 block1[random()%64] = (random() % 512) -256; | |
146 }break; | |
147 case 2: | |
148 block1[0]= (random()%4096)-2048; | |
149 block1[63]= (block1[0]&1)^1; | |
150 break; | |
151 } | |
33 | 152 |
633 | 153 #if 0 // simulate mismatch control |
154 { int sum=0; | |
155 for(i=0;i<64;i++) | |
156 sum+=block1[i]; | |
157 | |
2967 | 158 if((sum&1)==0) block1[63]^=1; |
633 | 159 } |
160 #endif | |
161 | |
162 for(i=0; i<64; i++) | |
163 block_org[i]= block1[i]; | |
33 | 164 |
165 if (fdct_func == ff_mmx_idct || | |
633 | 166 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { |
167 for(i=0;i<64;i++) | |
33 | 168 block[idct_mmx_perm[i]] = block1[i]; |
720 | 169 } else if(fdct_func == ff_simple_idct_mmx ) { |
633 | 170 for(i=0;i<64;i++) |
171 block[idct_simple_mmx_perm[i]] = block1[i]; | |
172 | |
2979 | 173 } else { |
633 | 174 for(i=0; i<64; i++) |
175 block[i]= block1[i]; | |
33 | 176 } |
633 | 177 #if 0 // simulate mismatch control for tested IDCT but not the ref |
178 { int sum=0; | |
179 for(i=0;i<64;i++) | |
180 sum+=block[i]; | |
181 | |
2967 | 182 if((sum&1)==0) block[63]^=1; |
633 | 183 } |
184 #endif | |
33 | 185 |
0 | 186 fdct_func(block); |
33 | 187 emms(); /* for ff_mmx_idct */ |
188 | |
2967 | 189 if (fdct_func == fdct_ifast |
190 #ifndef FAAN_POSTSCALE | |
1562
bf452704100f
optionally merge postscale into quantization table for the float aan dct
michael
parents:
1557
diff
changeset
|
191 || fdct_func == ff_faandct |
bf452704100f
optionally merge postscale into quantization table for the float aan dct
michael
parents:
1557
diff
changeset
|
192 #endif |
bf452704100f
optionally merge postscale into quantization table for the float aan dct
michael
parents:
1557
diff
changeset
|
193 ) { |
0 | 194 for(i=0; i<64; i++) { |
635 | 195 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i]; |
633 | 196 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS; |
197 } | |
198 } | |
199 | |
33 | 200 fdct_ref(block1); |
0 | 201 |
633 | 202 blockSumErr=0; |
0 | 203 for(i=0;i<64;i++) { |
204 v = abs(block[i] - block1[i]); | |
205 if (v > err_inf) | |
206 err_inf = v; | |
207 err2 += v * v; | |
2979 | 208 sysErr[i] += block[i] - block1[i]; |
209 blockSumErr += v; | |
210 if( abs(block[i])>maxout) maxout=abs(block[i]); | |
0 | 211 } |
633 | 212 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; |
213 #if 0 // print different matrix pairs | |
214 if(blockSumErr){ | |
215 printf("\n"); | |
216 for(i=0; i<64; i++){ | |
217 if((i&7)==0) printf("\n"); | |
218 printf("%4d ", block_org[i]); | |
219 } | |
220 for(i=0; i<64; i++){ | |
221 if((i&7)==0) printf("\n"); | |
222 printf("%4d ", block[i] - block1[i]); | |
223 } | |
224 } | |
225 #endif | |
0 | 226 } |
4001 | 227 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i])); |
2967 | 228 |
633 | 229 #if 1 // dump systematic errors |
230 for(i=0; i<64; i++){ | |
2979 | 231 if(i%8==0) printf("\n"); |
633 | 232 printf("%5d ", (int)sysErr[i]); |
233 } | |
234 printf("\n"); | |
235 #endif | |
2967 | 236 |
633 | 237 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", |
33 | 238 is_idct ? "IDCT" : "DCT", |
633 | 239 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax); |
240 #if 1 //Speed test | |
0 | 241 /* speed test */ |
633 | 242 for(i=0;i<64;i++) |
243 block1[i] = 0; | |
244 switch(test){ | |
2967 | 245 case 0: |
633 | 246 for(i=0;i<64;i++) |
247 block1[i] = (random() % 512) -256; | |
635 | 248 if (is_idct){ |
633 | 249 fdct(block1); |
635 | 250 |
251 for(i=0;i<64;i++) | |
252 block1[i]>>=3; | |
253 } | |
633 | 254 break; |
255 case 1:{ | |
256 case 2: | |
257 block1[0] = (random() % 512) -256; | |
258 block1[1] = (random() % 512) -256; | |
259 block1[2] = (random() % 512) -256; | |
260 block1[3] = (random() % 512) -256; | |
261 }break; | |
262 } | |
0 | 263 |
33 | 264 if (fdct_func == ff_mmx_idct || |
633 | 265 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { |
266 for(i=0;i<64;i++) | |
33 | 267 block[idct_mmx_perm[i]] = block1[i]; |
720 | 268 } else if(fdct_func == ff_simple_idct_mmx ) { |
633 | 269 for(i=0;i<64;i++) |
270 block[idct_simple_mmx_perm[i]] = block1[i]; | |
271 } else { | |
272 for(i=0; i<64; i++) | |
273 block[i]= block1[i]; | |
33 | 274 } |
275 | |
0 | 276 ti = gettime(); |
277 it1 = 0; | |
278 do { | |
279 for(it=0;it<NB_ITS_SPEED;it++) { | |
633 | 280 for(i=0; i<64; i++) |
281 block[i]= block1[i]; | |
282 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
283 // dont memcpy especially not fastmemcpy because it does movntq !!! | |
0 | 284 fdct_func(block); |
285 } | |
286 it1 += NB_ITS_SPEED; | |
287 ti1 = gettime() - ti; | |
288 } while (ti1 < 1000000); | |
33 | 289 emms(); |
0 | 290 |
633 | 291 printf("%s %s: %0.1f kdct/s\n", |
33 | 292 is_idct ? "IDCT" : "DCT", |
0 | 293 name, (double)it1 * 1000.0 / (double)ti1); |
633 | 294 #endif |
0 | 295 } |
296 | |
1064 | 297 static uint8_t img_dest[64] __attribute__ ((aligned (8))); |
298 static uint8_t img_dest1[64] __attribute__ ((aligned (8))); | |
720 | 299 |
1064 | 300 void idct248_ref(uint8_t *dest, int linesize, int16_t *block) |
720 | 301 { |
302 static int init; | |
303 static double c8[8][8]; | |
304 static double c4[4][4]; | |
305 double block1[64], block2[64], block3[64]; | |
306 double s, sum, v; | |
307 int i, j, k; | |
308 | |
309 if (!init) { | |
310 init = 1; | |
311 | |
312 for(i=0;i<8;i++) { | |
313 sum = 0; | |
314 for(j=0;j<8;j++) { | |
315 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0); | |
316 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); | |
317 sum += c8[i][j] * c8[i][j]; | |
318 } | |
319 } | |
2967 | 320 |
720 | 321 for(i=0;i<4;i++) { |
322 sum = 0; | |
323 for(j=0;j<4;j++) { | |
324 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0); | |
325 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); | |
326 sum += c4[i][j] * c4[i][j]; | |
327 } | |
328 } | |
329 } | |
330 | |
331 /* butterfly */ | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
332 s = 0.5 * sqrt(2.0); |
720 | 333 for(i=0;i<4;i++) { |
334 for(j=0;j<8;j++) { | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
335 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
336 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s; |
720 | 337 } |
338 } | |
339 | |
340 /* idct8 on lines */ | |
341 for(i=0;i<8;i++) { | |
342 for(j=0;j<8;j++) { | |
343 sum = 0; | |
344 for(k=0;k<8;k++) | |
345 sum += c8[k][j] * block1[8*i+k]; | |
346 block2[8*i+j] = sum; | |
347 } | |
348 } | |
349 | |
350 /* idct4 */ | |
351 for(i=0;i<8;i++) { | |
352 for(j=0;j<4;j++) { | |
353 /* top */ | |
354 sum = 0; | |
355 for(k=0;k<4;k++) | |
356 sum += c4[k][j] * block2[8*(2*k)+i]; | |
357 block3[8*(2*j)+i] = sum; | |
358 | |
359 /* bottom */ | |
360 sum = 0; | |
361 for(k=0;k<4;k++) | |
362 sum += c4[k][j] * block2[8*(2*k+1)+i]; | |
363 block3[8*(2*j+1)+i] = sum; | |
364 } | |
365 } | |
366 | |
367 /* clamp and store the result */ | |
368 for(i=0;i<8;i++) { | |
369 for(j=0;j<8;j++) { | |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
370 v = block3[8*i+j]; |
720 | 371 if (v < 0) |
372 v = 0; | |
373 else if (v > 255) | |
374 v = 255; | |
375 dest[i * linesize + j] = (int)rint(v); | |
376 } | |
377 } | |
378 } | |
379 | |
2967 | 380 void idct248_error(const char *name, |
1064 | 381 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block)) |
720 | 382 { |
383 int it, i, it1, ti, ti1, err_max, v; | |
384 | |
385 srandom(0); | |
2967 | 386 |
720 | 387 /* just one test to see if code is correct (precision is less |
388 important here) */ | |
389 err_max = 0; | |
390 for(it=0;it<NB_ITS;it++) { | |
2967 | 391 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
392 /* XXX: use forward transform to generate values */ |
720 | 393 for(i=0;i<64;i++) |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
394 block1[i] = (random() % 256) - 128; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
395 block1[0] += 1024; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
396 |
720 | 397 for(i=0; i<64; i++) |
398 block[i]= block1[i]; | |
399 idct248_ref(img_dest1, 8, block); | |
2967 | 400 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
401 for(i=0; i<64; i++) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
402 block[i]= block1[i]; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
403 idct248_put(img_dest, 8, block); |
2967 | 404 |
722
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
405 for(i=0;i<64;i++) { |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
406 v = abs((int)img_dest[i] - (int)img_dest1[i]); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
407 if (v == 255) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
408 printf("%d %d\n", img_dest[i], img_dest1[i]); |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
409 if (v > err_max) |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
410 err_max = v; |
ff90043f4a2d
in fact IDCT248 needs to be normalized as I suspected
bellard
parents:
720
diff
changeset
|
411 } |
720 | 412 #if 0 |
413 printf("ref=\n"); | |
414 for(i=0;i<8;i++) { | |
415 int j; | |
416 for(j=0;j<8;j++) { | |
417 printf(" %3d", img_dest1[i*8+j]); | |
418 } | |
419 printf("\n"); | |
420 } | |
2967 | 421 |
720 | 422 printf("out=\n"); |
423 for(i=0;i<8;i++) { | |
424 int j; | |
425 for(j=0;j<8;j++) { | |
426 printf(" %3d", img_dest[i*8+j]); | |
427 } | |
428 printf("\n"); | |
429 } | |
430 #endif | |
431 } | |
432 printf("%s %s: err_inf=%d\n", | |
433 1 ? "IDCT248" : "DCT248", | |
434 name, err_max); | |
435 | |
436 ti = gettime(); | |
437 it1 = 0; | |
438 do { | |
439 for(it=0;it<NB_ITS_SPEED;it++) { | |
440 for(i=0; i<64; i++) | |
441 block[i]= block1[i]; | |
442 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
443 // dont memcpy especially not fastmemcpy because it does movntq !!! | |
444 idct248_put(img_dest, 8, block); | |
445 } | |
446 it1 += NB_ITS_SPEED; | |
447 ti1 = gettime() - ti; | |
448 } while (ti1 < 1000000); | |
449 emms(); | |
450 | |
451 printf("%s %s: %0.1f kdct/s\n", | |
452 1 ? "IDCT248" : "DCT248", | |
453 name, (double)it1 * 1000.0 / (double)ti1); | |
454 } | |
455 | |
33 | 456 void help(void) |
457 { | |
633 | 458 printf("dct-test [-i] [<test-number>]\n" |
459 "test-number 0 -> test with random matrixes\n" | |
460 " 1 -> test with random sparse matrixes\n" | |
461 " 2 -> do 3. test from mpeg4 std\n" | |
720 | 462 "-i test IDCT implementations\n" |
463 "-4 test IDCT248 implementations\n"); | |
33 | 464 exit(1); |
465 } | |
466 | |
0 | 467 int main(int argc, char **argv) |
468 { | |
720 | 469 int test_idct = 0, test_248_dct = 0; |
633 | 470 int c,i; |
471 int test=1; | |
33 | 472 |
0 | 473 init_fdct(); |
33 | 474 idct_mmx_init(); |
0 | 475 |
4197 | 476 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
633 | 477 for(i=0;i<MAX_NEG_CROP;i++) { |
4197 | 478 cropTbl[i] = 0; |
479 cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
633 | 480 } |
2967 | 481 |
33 | 482 for(;;) { |
720 | 483 c = getopt(argc, argv, "ih4"); |
33 | 484 if (c == -1) |
485 break; | |
486 switch(c) { | |
487 case 'i': | |
488 test_idct = 1; | |
489 break; | |
720 | 490 case '4': |
491 test_248_dct = 1; | |
492 break; | |
633 | 493 default : |
33 | 494 case 'h': |
495 help(); | |
496 break; | |
497 } | |
498 } | |
2967 | 499 |
633 | 500 if(optind <argc) test= atoi(argv[optind]); |
2967 | 501 |
33 | 502 printf("ffmpeg DCT/IDCT test\n"); |
503 | |
720 | 504 if (test_248_dct) { |
505 idct248_error("SIMPLE-C", simple_idct248_put); | |
33 | 506 } else { |
720 | 507 if (!test_idct) { |
508 dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */ | |
509 dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test); | |
510 dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test); | |
511 dct_error("MMX", 0, ff_fdct_mmx, fdct, test); | |
1574 | 512 dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test); |
1557 | 513 dct_error("FAAN", 0, ff_faandct, fdct, test); |
720 | 514 } else { |
515 dct_error("REF-DBL", 1, idct, idct, test); | |
516 dct_error("INT", 1, j_rev_dct, idct, test); | |
517 dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test); | |
518 dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test); | |
519 dct_error("SIMPLE-C", 1, simple_idct, idct, test); | |
520 dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test); | |
2872 | 521 dct_error("XVID-MMX", 1, ff_idct_xvid_mmx, idct, test); |
522 dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test); | |
720 | 523 // dct_error("ODIVX-C", 1, odivx_idct_c, idct); |
524 //printf(" test against odivx idct\n"); | |
2979 | 525 // dct_error("REF", 1, idct, odivx_idct_c); |
720 | 526 // dct_error("INT", 1, j_rev_dct, odivx_idct_c); |
527 // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c); | |
528 // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c); | |
529 // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c); | |
530 // dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, odivx_idct_c); | |
531 // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c); | |
532 } | |
33 | 533 } |
0 | 534 return 0; |
535 } |