Mercurial > libavcodec.hg
comparison dct-test.c @ 720:409bdaa0b964 libavcodec
added IDCT248 testing
author | bellard |
---|---|
date | Thu, 03 Oct 2002 19:49:23 +0000 |
parents | 9abb13c21fbe |
children | ff90043f4a2d |
comparison
equal
deleted
inserted
replaced
719:2b7ff6dfee35 | 720:409bdaa0b964 |
---|---|
21 extern void j_rev_dct(DCTELEM *data); | 21 extern void j_rev_dct(DCTELEM *data); |
22 extern void ff_mmx_idct(DCTELEM *data); | 22 extern void ff_mmx_idct(DCTELEM *data); |
23 extern void ff_mmxext_idct(DCTELEM *data); | 23 extern void ff_mmxext_idct(DCTELEM *data); |
24 | 24 |
25 extern void odivx_idct_c (short *block); | 25 extern void odivx_idct_c (short *block); |
26 | |
27 void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, | |
28 UINT8 *pixels/*align 8*/, int line_size); | |
29 | |
30 void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, | |
31 UINT8 *pixels/*align 8*/, int line_size); | |
26 | 32 |
27 #define AANSCALE_BITS 12 | 33 #define AANSCALE_BITS 12 |
28 static const unsigned short aanscales[64] = { | 34 static const unsigned short aanscales[64] = { |
29 /* precomputed values scaled up by 14 bits */ | 35 /* precomputed values scaled up by 14 bits */ |
30 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, | 36 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, |
84 int it, i, scale; | 90 int it, i, scale; |
85 int err_inf, v; | 91 int err_inf, v; |
86 INT64 err2, ti, ti1, it1; | 92 INT64 err2, ti, ti1, it1; |
87 INT64 sysErr[64], sysErrMax=0; | 93 INT64 sysErr[64], sysErrMax=0; |
88 int maxout=0; | 94 int maxout=0; |
89 int max_sum=0; | |
90 int blockSumErrMax=0, blockSumErr; | 95 int blockSumErrMax=0, blockSumErr; |
91 | 96 |
92 srandom(0); | 97 srandom(0); |
93 | 98 |
94 err_inf = 0; | 99 err_inf = 0; |
133 | 138 |
134 if (fdct_func == ff_mmx_idct || | 139 if (fdct_func == ff_mmx_idct || |
135 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { | 140 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { |
136 for(i=0;i<64;i++) | 141 for(i=0;i<64;i++) |
137 block[idct_mmx_perm[i]] = block1[i]; | 142 block[idct_mmx_perm[i]] = block1[i]; |
138 } else if(fdct_func == simple_idct_mmx ) { | 143 } else if(fdct_func == ff_simple_idct_mmx ) { |
139 for(i=0;i<64;i++) | 144 for(i=0;i<64;i++) |
140 block[idct_simple_mmx_perm[i]] = block1[i]; | 145 block[idct_simple_mmx_perm[i]] = block1[i]; |
141 | 146 |
142 } else { | 147 } else { |
143 for(i=0; i<64; i++) | 148 for(i=0; i<64; i++) |
228 | 233 |
229 if (fdct_func == ff_mmx_idct || | 234 if (fdct_func == ff_mmx_idct || |
230 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { | 235 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { |
231 for(i=0;i<64;i++) | 236 for(i=0;i<64;i++) |
232 block[idct_mmx_perm[i]] = block1[i]; | 237 block[idct_mmx_perm[i]] = block1[i]; |
233 } else if(fdct_func == simple_idct_mmx ) { | 238 } else if(fdct_func == ff_simple_idct_mmx ) { |
234 for(i=0;i<64;i++) | 239 for(i=0;i<64;i++) |
235 block[idct_simple_mmx_perm[i]] = block1[i]; | 240 block[idct_simple_mmx_perm[i]] = block1[i]; |
236 } else { | 241 } else { |
237 for(i=0; i<64; i++) | 242 for(i=0; i<64; i++) |
238 block[i]= block1[i]; | 243 block[i]= block1[i]; |
257 is_idct ? "IDCT" : "DCT", | 262 is_idct ? "IDCT" : "DCT", |
258 name, (double)it1 * 1000.0 / (double)ti1); | 263 name, (double)it1 * 1000.0 / (double)ti1); |
259 #endif | 264 #endif |
260 } | 265 } |
261 | 266 |
267 static UINT8 img_dest[64] __attribute__ ((aligned (8))); | |
268 static UINT8 img_dest1[64] __attribute__ ((aligned (8))); | |
269 | |
270 void idct248_ref(UINT8 *dest, int linesize, INT16 *block) | |
271 { | |
272 static int init; | |
273 static double c8[8][8]; | |
274 static double c4[4][4]; | |
275 double block1[64], block2[64], block3[64]; | |
276 double s, sum, v; | |
277 int i, j, k; | |
278 | |
279 if (!init) { | |
280 init = 1; | |
281 | |
282 for(i=0;i<8;i++) { | |
283 sum = 0; | |
284 for(j=0;j<8;j++) { | |
285 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0); | |
286 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); | |
287 sum += c8[i][j] * c8[i][j]; | |
288 } | |
289 } | |
290 | |
291 for(i=0;i<4;i++) { | |
292 sum = 0; | |
293 for(j=0;j<4;j++) { | |
294 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0); | |
295 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); | |
296 sum += c4[i][j] * c4[i][j]; | |
297 } | |
298 } | |
299 } | |
300 | |
301 /* butterfly */ | |
302 for(i=0;i<4;i++) { | |
303 for(j=0;j<8;j++) { | |
304 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * 0.5; | |
305 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * 0.5; | |
306 } | |
307 } | |
308 | |
309 /* idct8 on lines */ | |
310 for(i=0;i<8;i++) { | |
311 for(j=0;j<8;j++) { | |
312 sum = 0; | |
313 for(k=0;k<8;k++) | |
314 sum += c8[k][j] * block1[8*i+k]; | |
315 block2[8*i+j] = sum; | |
316 } | |
317 } | |
318 | |
319 /* idct4 */ | |
320 for(i=0;i<8;i++) { | |
321 for(j=0;j<4;j++) { | |
322 /* top */ | |
323 sum = 0; | |
324 for(k=0;k<4;k++) | |
325 sum += c4[k][j] * block2[8*(2*k)+i]; | |
326 block3[8*(2*j)+i] = sum; | |
327 | |
328 /* bottom */ | |
329 sum = 0; | |
330 for(k=0;k<4;k++) | |
331 sum += c4[k][j] * block2[8*(2*k+1)+i]; | |
332 block3[8*(2*j+1)+i] = sum; | |
333 } | |
334 } | |
335 | |
336 /* clamp and store the result */ | |
337 for(i=0;i<8;i++) { | |
338 for(j=0;j<8;j++) { | |
339 v = block3[8*i+j] + 128.0; | |
340 if (v < 0) | |
341 v = 0; | |
342 else if (v > 255) | |
343 v = 255; | |
344 dest[i * linesize + j] = (int)rint(v); | |
345 } | |
346 } | |
347 } | |
348 | |
349 void idct248_error(const char *name, | |
350 void (*idct248_put)(UINT8 *dest, int line_size, INT16 *block)) | |
351 { | |
352 int it, i, it1, ti, ti1, err_max, v; | |
353 | |
354 srandom(0); | |
355 | |
356 /* just one test to see if code is correct (precision is less | |
357 important here) */ | |
358 err_max = 0; | |
359 for(it=0;it<NB_ITS;it++) { | |
360 for(i=0;i<64;i++) | |
361 block1[i] = (random() % 512) - 256; | |
362 | |
363 for(i=0; i<64; i++) | |
364 block[i]= block1[i]; | |
365 idct248_ref(img_dest1, 8, block); | |
366 | |
367 #if 0 | |
368 printf("ref=\n"); | |
369 for(i=0;i<8;i++) { | |
370 int j; | |
371 for(j=0;j<8;j++) { | |
372 printf(" %3d", img_dest1[i*8+j]); | |
373 } | |
374 printf("\n"); | |
375 } | |
376 #endif | |
377 | |
378 for(i=0; i<64; i++) | |
379 block[i]= block1[i]; | |
380 idct248_put(img_dest, 8, block); | |
381 | |
382 #if 0 | |
383 printf("out=\n"); | |
384 for(i=0;i<8;i++) { | |
385 int j; | |
386 for(j=0;j<8;j++) { | |
387 printf(" %3d", img_dest[i*8+j]); | |
388 } | |
389 printf("\n"); | |
390 } | |
391 #endif | |
392 for(i=0;i<64;i++) { | |
393 v = abs(img_dest[i] - img_dest1[i]); | |
394 if (v > err_max) | |
395 err_max = v; | |
396 } | |
397 } | |
398 printf("%s %s: err_inf=%d\n", | |
399 1 ? "IDCT248" : "DCT248", | |
400 name, err_max); | |
401 | |
402 ti = gettime(); | |
403 it1 = 0; | |
404 do { | |
405 for(it=0;it<NB_ITS_SPEED;it++) { | |
406 for(i=0; i<64; i++) | |
407 block[i]= block1[i]; | |
408 // memcpy(block, block1, sizeof(DCTELEM) * 64); | |
409 // dont memcpy especially not fastmemcpy because it does movntq !!! | |
410 idct248_put(img_dest, 8, block); | |
411 } | |
412 it1 += NB_ITS_SPEED; | |
413 ti1 = gettime() - ti; | |
414 } while (ti1 < 1000000); | |
415 emms(); | |
416 | |
417 printf("%s %s: %0.1f kdct/s\n", | |
418 1 ? "IDCT248" : "DCT248", | |
419 name, (double)it1 * 1000.0 / (double)ti1); | |
420 } | |
421 | |
262 void help(void) | 422 void help(void) |
263 { | 423 { |
264 printf("dct-test [-i] [<test-number>]\n" | 424 printf("dct-test [-i] [<test-number>]\n" |
265 "test-number 0 -> test with random matrixes\n" | 425 "test-number 0 -> test with random matrixes\n" |
266 " 1 -> test with random sparse matrixes\n" | 426 " 1 -> test with random sparse matrixes\n" |
267 " 2 -> do 3. test from mpeg4 std\n" | 427 " 2 -> do 3. test from mpeg4 std\n" |
268 "-i test IDCT implementations\n"); | 428 "-i test IDCT implementations\n" |
429 "-4 test IDCT248 implementations\n"); | |
269 exit(1); | 430 exit(1); |
270 } | 431 } |
271 | 432 |
272 int main(int argc, char **argv) | 433 int main(int argc, char **argv) |
273 { | 434 { |
274 int test_idct = 0; | 435 int test_idct = 0, test_248_dct = 0; |
275 int c,i; | 436 int c,i; |
276 int test=1; | 437 int test=1; |
277 | 438 |
278 init_fdct(); | 439 init_fdct(); |
279 idct_mmx_init(); | 440 idct_mmx_init(); |
283 cropTbl[i] = 0; | 444 cropTbl[i] = 0; |
284 cropTbl[i + MAX_NEG_CROP + 256] = 255; | 445 cropTbl[i + MAX_NEG_CROP + 256] = 255; |
285 } | 446 } |
286 | 447 |
287 for(;;) { | 448 for(;;) { |
288 c = getopt(argc, argv, "ih"); | 449 c = getopt(argc, argv, "ih4"); |
289 if (c == -1) | 450 if (c == -1) |
290 break; | 451 break; |
291 switch(c) { | 452 switch(c) { |
292 case 'i': | 453 case 'i': |
293 test_idct = 1; | 454 test_idct = 1; |
294 break; | 455 break; |
456 case '4': | |
457 test_248_dct = 1; | |
458 break; | |
295 default : | 459 default : |
296 case 'h': | 460 case 'h': |
297 help(); | 461 help(); |
298 break; | 462 break; |
299 } | 463 } |
301 | 465 |
302 if(optind <argc) test= atoi(argv[optind]); | 466 if(optind <argc) test= atoi(argv[optind]); |
303 | 467 |
304 printf("ffmpeg DCT/IDCT test\n"); | 468 printf("ffmpeg DCT/IDCT test\n"); |
305 | 469 |
306 if (!test_idct) { | 470 if (test_248_dct) { |
307 dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */ | 471 idct248_error("SIMPLE-C", simple_idct248_put); |
308 dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test); | |
309 dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test); | |
310 dct_error("MMX", 0, ff_fdct_mmx, fdct, test); | |
311 } else { | 472 } else { |
312 dct_error("REF-DBL", 1, idct, idct, test); | 473 if (!test_idct) { |
313 dct_error("INT", 1, j_rev_dct, idct, test); | 474 dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */ |
314 dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test); | 475 dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test); |
315 dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test); | 476 dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test); |
316 dct_error("SIMPLE-C", 1, simple_idct, idct, test); | 477 dct_error("MMX", 0, ff_fdct_mmx, fdct, test); |
317 dct_error("SIMPLE-MMX", 1, simple_idct_mmx, idct, test); | 478 } else { |
318 // dct_error("ODIVX-C", 1, odivx_idct_c, idct); | 479 dct_error("REF-DBL", 1, idct, idct, test); |
319 //printf(" test against odivx idct\n"); | 480 dct_error("INT", 1, j_rev_dct, idct, test); |
320 // dct_error("REF", 1, idct, odivx_idct_c); | 481 dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test); |
321 // dct_error("INT", 1, j_rev_dct, odivx_idct_c); | 482 dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test); |
322 // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c); | 483 dct_error("SIMPLE-C", 1, simple_idct, idct, test); |
323 // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c); | 484 dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test); |
324 // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c); | 485 // dct_error("ODIVX-C", 1, odivx_idct_c, idct); |
325 // dct_error("SIMPLE-MMX", 1, simple_idct_mmx, odivx_idct_c); | 486 //printf(" test against odivx idct\n"); |
326 // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c); | 487 // dct_error("REF", 1, idct, odivx_idct_c); |
488 // dct_error("INT", 1, j_rev_dct, odivx_idct_c); | |
489 // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c); | |
490 // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c); | |
491 // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c); | |
492 // dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, odivx_idct_c); | |
493 // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c); | |
494 } | |
327 } | 495 } |
328 return 0; | 496 return 0; |
329 } | 497 } |