comparison dct-test.c @ 720:409bdaa0b964 libavcodec

added IDCT248 testing
author bellard
date Thu, 03 Oct 2002 19:49:23 +0000
parents 9abb13c21fbe
children ff90043f4a2d
comparison
equal deleted inserted replaced
719:2b7ff6dfee35 720:409bdaa0b964
21 extern void j_rev_dct(DCTELEM *data); 21 extern void j_rev_dct(DCTELEM *data);
22 extern void ff_mmx_idct(DCTELEM *data); 22 extern void ff_mmx_idct(DCTELEM *data);
23 extern void ff_mmxext_idct(DCTELEM *data); 23 extern void ff_mmxext_idct(DCTELEM *data);
24 24
25 extern void odivx_idct_c (short *block); 25 extern void odivx_idct_c (short *block);
26
27 void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/,
28 UINT8 *pixels/*align 8*/, int line_size);
29
30 void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/,
31 UINT8 *pixels/*align 8*/, int line_size);
26 32
27 #define AANSCALE_BITS 12 33 #define AANSCALE_BITS 12
28 static const unsigned short aanscales[64] = { 34 static const unsigned short aanscales[64] = {
29 /* precomputed values scaled up by 14 bits */ 35 /* precomputed values scaled up by 14 bits */
30 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 36 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
84 int it, i, scale; 90 int it, i, scale;
85 int err_inf, v; 91 int err_inf, v;
86 INT64 err2, ti, ti1, it1; 92 INT64 err2, ti, ti1, it1;
87 INT64 sysErr[64], sysErrMax=0; 93 INT64 sysErr[64], sysErrMax=0;
88 int maxout=0; 94 int maxout=0;
89 int max_sum=0;
90 int blockSumErrMax=0, blockSumErr; 95 int blockSumErrMax=0, blockSumErr;
91 96
92 srandom(0); 97 srandom(0);
93 98
94 err_inf = 0; 99 err_inf = 0;
133 138
134 if (fdct_func == ff_mmx_idct || 139 if (fdct_func == ff_mmx_idct ||
135 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { 140 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
136 for(i=0;i<64;i++) 141 for(i=0;i<64;i++)
137 block[idct_mmx_perm[i]] = block1[i]; 142 block[idct_mmx_perm[i]] = block1[i];
138 } else if(fdct_func == simple_idct_mmx ) { 143 } else if(fdct_func == ff_simple_idct_mmx ) {
139 for(i=0;i<64;i++) 144 for(i=0;i<64;i++)
140 block[idct_simple_mmx_perm[i]] = block1[i]; 145 block[idct_simple_mmx_perm[i]] = block1[i];
141 146
142 } else { 147 } else {
143 for(i=0; i<64; i++) 148 for(i=0; i<64; i++)
228 233
229 if (fdct_func == ff_mmx_idct || 234 if (fdct_func == ff_mmx_idct ||
230 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { 235 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
231 for(i=0;i<64;i++) 236 for(i=0;i<64;i++)
232 block[idct_mmx_perm[i]] = block1[i]; 237 block[idct_mmx_perm[i]] = block1[i];
233 } else if(fdct_func == simple_idct_mmx ) { 238 } else if(fdct_func == ff_simple_idct_mmx ) {
234 for(i=0;i<64;i++) 239 for(i=0;i<64;i++)
235 block[idct_simple_mmx_perm[i]] = block1[i]; 240 block[idct_simple_mmx_perm[i]] = block1[i];
236 } else { 241 } else {
237 for(i=0; i<64; i++) 242 for(i=0; i<64; i++)
238 block[i]= block1[i]; 243 block[i]= block1[i];
257 is_idct ? "IDCT" : "DCT", 262 is_idct ? "IDCT" : "DCT",
258 name, (double)it1 * 1000.0 / (double)ti1); 263 name, (double)it1 * 1000.0 / (double)ti1);
259 #endif 264 #endif
260 } 265 }
261 266
267 static UINT8 img_dest[64] __attribute__ ((aligned (8)));
268 static UINT8 img_dest1[64] __attribute__ ((aligned (8)));
269
270 void idct248_ref(UINT8 *dest, int linesize, INT16 *block)
271 {
272 static int init;
273 static double c8[8][8];
274 static double c4[4][4];
275 double block1[64], block2[64], block3[64];
276 double s, sum, v;
277 int i, j, k;
278
279 if (!init) {
280 init = 1;
281
282 for(i=0;i<8;i++) {
283 sum = 0;
284 for(j=0;j<8;j++) {
285 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
286 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
287 sum += c8[i][j] * c8[i][j];
288 }
289 }
290
291 for(i=0;i<4;i++) {
292 sum = 0;
293 for(j=0;j<4;j++) {
294 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
295 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
296 sum += c4[i][j] * c4[i][j];
297 }
298 }
299 }
300
301 /* butterfly */
302 for(i=0;i<4;i++) {
303 for(j=0;j<8;j++) {
304 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * 0.5;
305 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * 0.5;
306 }
307 }
308
309 /* idct8 on lines */
310 for(i=0;i<8;i++) {
311 for(j=0;j<8;j++) {
312 sum = 0;
313 for(k=0;k<8;k++)
314 sum += c8[k][j] * block1[8*i+k];
315 block2[8*i+j] = sum;
316 }
317 }
318
319 /* idct4 */
320 for(i=0;i<8;i++) {
321 for(j=0;j<4;j++) {
322 /* top */
323 sum = 0;
324 for(k=0;k<4;k++)
325 sum += c4[k][j] * block2[8*(2*k)+i];
326 block3[8*(2*j)+i] = sum;
327
328 /* bottom */
329 sum = 0;
330 for(k=0;k<4;k++)
331 sum += c4[k][j] * block2[8*(2*k+1)+i];
332 block3[8*(2*j+1)+i] = sum;
333 }
334 }
335
336 /* clamp and store the result */
337 for(i=0;i<8;i++) {
338 for(j=0;j<8;j++) {
339 v = block3[8*i+j] + 128.0;
340 if (v < 0)
341 v = 0;
342 else if (v > 255)
343 v = 255;
344 dest[i * linesize + j] = (int)rint(v);
345 }
346 }
347 }
348
349 void idct248_error(const char *name,
350 void (*idct248_put)(UINT8 *dest, int line_size, INT16 *block))
351 {
352 int it, i, it1, ti, ti1, err_max, v;
353
354 srandom(0);
355
356 /* just one test to see if code is correct (precision is less
357 important here) */
358 err_max = 0;
359 for(it=0;it<NB_ITS;it++) {
360 for(i=0;i<64;i++)
361 block1[i] = (random() % 512) - 256;
362
363 for(i=0; i<64; i++)
364 block[i]= block1[i];
365 idct248_ref(img_dest1, 8, block);
366
367 #if 0
368 printf("ref=\n");
369 for(i=0;i<8;i++) {
370 int j;
371 for(j=0;j<8;j++) {
372 printf(" %3d", img_dest1[i*8+j]);
373 }
374 printf("\n");
375 }
376 #endif
377
378 for(i=0; i<64; i++)
379 block[i]= block1[i];
380 idct248_put(img_dest, 8, block);
381
382 #if 0
383 printf("out=\n");
384 for(i=0;i<8;i++) {
385 int j;
386 for(j=0;j<8;j++) {
387 printf(" %3d", img_dest[i*8+j]);
388 }
389 printf("\n");
390 }
391 #endif
392 for(i=0;i<64;i++) {
393 v = abs(img_dest[i] - img_dest1[i]);
394 if (v > err_max)
395 err_max = v;
396 }
397 }
398 printf("%s %s: err_inf=%d\n",
399 1 ? "IDCT248" : "DCT248",
400 name, err_max);
401
402 ti = gettime();
403 it1 = 0;
404 do {
405 for(it=0;it<NB_ITS_SPEED;it++) {
406 for(i=0; i<64; i++)
407 block[i]= block1[i];
408 // memcpy(block, block1, sizeof(DCTELEM) * 64);
409 // dont memcpy especially not fastmemcpy because it does movntq !!!
410 idct248_put(img_dest, 8, block);
411 }
412 it1 += NB_ITS_SPEED;
413 ti1 = gettime() - ti;
414 } while (ti1 < 1000000);
415 emms();
416
417 printf("%s %s: %0.1f kdct/s\n",
418 1 ? "IDCT248" : "DCT248",
419 name, (double)it1 * 1000.0 / (double)ti1);
420 }
421
262 void help(void) 422 void help(void)
263 { 423 {
264 printf("dct-test [-i] [<test-number>]\n" 424 printf("dct-test [-i] [<test-number>]\n"
265 "test-number 0 -> test with random matrixes\n" 425 "test-number 0 -> test with random matrixes\n"
266 " 1 -> test with random sparse matrixes\n" 426 " 1 -> test with random sparse matrixes\n"
267 " 2 -> do 3. test from mpeg4 std\n" 427 " 2 -> do 3. test from mpeg4 std\n"
268 "-i test IDCT implementations\n"); 428 "-i test IDCT implementations\n"
429 "-4 test IDCT248 implementations\n");
269 exit(1); 430 exit(1);
270 } 431 }
271 432
272 int main(int argc, char **argv) 433 int main(int argc, char **argv)
273 { 434 {
274 int test_idct = 0; 435 int test_idct = 0, test_248_dct = 0;
275 int c,i; 436 int c,i;
276 int test=1; 437 int test=1;
277 438
278 init_fdct(); 439 init_fdct();
279 idct_mmx_init(); 440 idct_mmx_init();
283 cropTbl[i] = 0; 444 cropTbl[i] = 0;
284 cropTbl[i + MAX_NEG_CROP + 256] = 255; 445 cropTbl[i + MAX_NEG_CROP + 256] = 255;
285 } 446 }
286 447
287 for(;;) { 448 for(;;) {
288 c = getopt(argc, argv, "ih"); 449 c = getopt(argc, argv, "ih4");
289 if (c == -1) 450 if (c == -1)
290 break; 451 break;
291 switch(c) { 452 switch(c) {
292 case 'i': 453 case 'i':
293 test_idct = 1; 454 test_idct = 1;
294 break; 455 break;
456 case '4':
457 test_248_dct = 1;
458 break;
295 default : 459 default :
296 case 'h': 460 case 'h':
297 help(); 461 help();
298 break; 462 break;
299 } 463 }
301 465
302 if(optind <argc) test= atoi(argv[optind]); 466 if(optind <argc) test= atoi(argv[optind]);
303 467
304 printf("ffmpeg DCT/IDCT test\n"); 468 printf("ffmpeg DCT/IDCT test\n");
305 469
306 if (!test_idct) { 470 if (test_248_dct) {
307 dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */ 471 idct248_error("SIMPLE-C", simple_idct248_put);
308 dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
309 dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
310 dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
311 } else { 472 } else {
312 dct_error("REF-DBL", 1, idct, idct, test); 473 if (!test_idct) {
313 dct_error("INT", 1, j_rev_dct, idct, test); 474 dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */
314 dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test); 475 dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
315 dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test); 476 dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
316 dct_error("SIMPLE-C", 1, simple_idct, idct, test); 477 dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
317 dct_error("SIMPLE-MMX", 1, simple_idct_mmx, idct, test); 478 } else {
318 // dct_error("ODIVX-C", 1, odivx_idct_c, idct); 479 dct_error("REF-DBL", 1, idct, idct, test);
319 //printf(" test against odivx idct\n"); 480 dct_error("INT", 1, j_rev_dct, idct, test);
320 // dct_error("REF", 1, idct, odivx_idct_c); 481 dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test);
321 // dct_error("INT", 1, j_rev_dct, odivx_idct_c); 482 dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test);
322 // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c); 483 dct_error("SIMPLE-C", 1, simple_idct, idct, test);
323 // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c); 484 dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test);
324 // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c); 485 // dct_error("ODIVX-C", 1, odivx_idct_c, idct);
325 // dct_error("SIMPLE-MMX", 1, simple_idct_mmx, odivx_idct_c); 486 //printf(" test against odivx idct\n");
326 // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c); 487 // dct_error("REF", 1, idct, odivx_idct_c);
488 // dct_error("INT", 1, j_rev_dct, odivx_idct_c);
489 // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
490 // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);
491 // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c);
492 // dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, odivx_idct_c);
493 // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c);
494 }
327 } 495 }
328 return 0; 496 return 0;
329 } 497 }