Mercurial > libavcodec.hg
view dct-test.c @ 2497:69adfbbdcdeb libavcodec
- samples from mplayer ftp in the "adv" profile seem to have profile=2,
which isn't the advanced one; and indeed, using adv. profile parser fails.
Using normal parser works, and that's what is done
- attempt at taking care of stride for NORM2 bitplane decoding
- duplication of much code from msmpeg4.c; this code isn't yet used, but
goes down as far as the block layer (mainly Transform Type stuff, the
remains are wild editing without checking). Unusable yet, and lacks the AC
decoding (but a step further in bitstream parsing)
patch by anonymous
author | michael |
---|---|
date | Fri, 04 Feb 2005 02:20:38 +0000 |
parents | 1396e33a9cb6 |
children | 062b2d5d1eba |
line wrap: on
line source
/** * @file dct-test.c * DCT test. (c) 2001 Fabrice Bellard. * Started from sample code by Juan J. Sierralta P. */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <sys/time.h> #include <unistd.h> #include "dsputil.h" #include "i386/mmx.h" #include "simple_idct.h" #include "faandct.h" #ifndef MAX #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #endif /* reference fdct/idct */ extern void fdct(DCTELEM *block); extern void idct(DCTELEM *block); extern void init_fdct(); extern void j_rev_dct(DCTELEM *data); extern void ff_mmx_idct(DCTELEM *data); extern void ff_mmxext_idct(DCTELEM *data); extern void odivx_idct_c (short *block); #define AANSCALE_BITS 12 static const unsigned short aanscales[64] = { /* precomputed values scaled up by 14 bits */ 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 }; uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; int64_t gettime(void) { struct timeval tv; gettimeofday(&tv,NULL); return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; } #define NB_ITS 20000 #define NB_ITS_SPEED 50000 static short idct_mmx_perm[64]; static short idct_simple_mmx_perm[64]={ 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, }; void idct_mmx_init(void) { int i; /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ for (i = 0; i < 64; i++) { idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); // idct_simple_mmx_perm[i] = simple_block_permute_op(i); } } static DCTELEM block[64] __attribute__ ((aligned (8))); static DCTELEM block1[64] __attribute__ ((aligned (8))); static DCTELEM block_org[64] __attribute__ ((aligned (8))); void dct_error(const char *name, int is_idct, void (*fdct_func)(DCTELEM *block), void (*fdct_ref)(DCTELEM *block), int test) { int it, i, scale; int err_inf, v; int64_t err2, ti, ti1, it1; int64_t sysErr[64], sysErrMax=0; int maxout=0; int blockSumErrMax=0, blockSumErr; srandom(0); err_inf = 0; err2 = 0; for(i=0; i<64; i++) sysErr[i]=0; for(it=0;it<NB_ITS;it++) { for(i=0;i<64;i++) block1[i] = 0; switch(test){ case 0: for(i=0;i<64;i++) block1[i] = (random() % 512) -256; if (is_idct){ fdct(block1); for(i=0;i<64;i++) block1[i]>>=3; } break; case 1:{ int num= (random()%10)+1; for(i=0;i<num;i++) block1[random()%64] = (random() % 512) -256; }break; case 2: block1[0]= (random()%4096)-2048; block1[63]= (block1[0]&1)^1; break; } #if 0 // simulate mismatch control { int sum=0; for(i=0;i<64;i++) sum+=block1[i]; if((sum&1)==0) block1[63]^=1; } #endif for(i=0; i<64; i++) block_org[i]= block1[i]; if (fdct_func == ff_mmx_idct || fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { for(i=0;i<64;i++) block[idct_mmx_perm[i]] = block1[i]; } else if(fdct_func == ff_simple_idct_mmx ) { for(i=0;i<64;i++) block[idct_simple_mmx_perm[i]] = block1[i]; } else { for(i=0; i<64; i++) block[i]= block1[i]; } #if 0 // simulate mismatch control for tested IDCT but not the ref { int sum=0; for(i=0;i<64;i++) sum+=block[i]; if((sum&1)==0) block[63]^=1; } #endif fdct_func(block); emms(); /* for ff_mmx_idct */ if (fdct_func == fdct_ifast #ifndef FAAN_POSTSCALE || fdct_func == ff_faandct #endif ) { for(i=0; i<64; i++) { scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i]; block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS; } } fdct_ref(block1); blockSumErr=0; for(i=0;i<64;i++) { v = abs(block[i] - block1[i]); if (v > err_inf) err_inf = v; err2 += v * v; sysErr[i] += block[i] - block1[i]; blockSumErr += v; if( abs(block[i])>maxout) maxout=abs(block[i]); } if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; #if 0 // print different matrix pairs if(blockSumErr){ printf("\n"); for(i=0; i<64; i++){ if((i&7)==0) printf("\n"); printf("%4d ", block_org[i]); } for(i=0; i<64; i++){ if((i&7)==0) printf("\n"); printf("%4d ", block[i] - block1[i]); } } #endif } for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, ABS(sysErr[i])); #if 1 // dump systematic errors for(i=0; i<64; i++){ if(i%8==0) printf("\n"); printf("%5d ", (int)sysErr[i]); } printf("\n"); #endif printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", is_idct ? "IDCT" : "DCT", name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax); #if 1 //Speed test /* speed test */ for(i=0;i<64;i++) block1[i] = 0; switch(test){ case 0: for(i=0;i<64;i++) block1[i] = (random() % 512) -256; if (is_idct){ fdct(block1); for(i=0;i<64;i++) block1[i]>>=3; } break; case 1:{ case 2: block1[0] = (random() % 512) -256; block1[1] = (random() % 512) -256; block1[2] = (random() % 512) -256; block1[3] = (random() % 512) -256; }break; } if (fdct_func == ff_mmx_idct || fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { for(i=0;i<64;i++) block[idct_mmx_perm[i]] = block1[i]; } else if(fdct_func == ff_simple_idct_mmx ) { for(i=0;i<64;i++) block[idct_simple_mmx_perm[i]] = block1[i]; } else { for(i=0; i<64; i++) block[i]= block1[i]; } ti = gettime(); it1 = 0; do { for(it=0;it<NB_ITS_SPEED;it++) { for(i=0; i<64; i++) block[i]= block1[i]; // memcpy(block, block1, sizeof(DCTELEM) * 64); // dont memcpy especially not fastmemcpy because it does movntq !!! fdct_func(block); } it1 += NB_ITS_SPEED; ti1 = gettime() - ti; } while (ti1 < 1000000); emms(); printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", name, (double)it1 * 1000.0 / (double)ti1); #endif } static uint8_t img_dest[64] __attribute__ ((aligned (8))); static uint8_t img_dest1[64] __attribute__ ((aligned (8))); void idct248_ref(uint8_t *dest, int linesize, int16_t *block) { static int init; static double c8[8][8]; static double c4[4][4]; double block1[64], block2[64], block3[64]; double s, sum, v; int i, j, k; if (!init) { init = 1; for(i=0;i<8;i++) { sum = 0; for(j=0;j<8;j++) { s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0); c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); sum += c8[i][j] * c8[i][j]; } } for(i=0;i<4;i++) { sum = 0; for(j=0;j<4;j++) { s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0); c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); sum += c4[i][j] * c4[i][j]; } } } /* butterfly */ s = 0.5 * sqrt(2.0); for(i=0;i<4;i++) { for(j=0;j<8;j++) { block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s; block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s; } } /* idct8 on lines */ for(i=0;i<8;i++) { for(j=0;j<8;j++) { sum = 0; for(k=0;k<8;k++) sum += c8[k][j] * block1[8*i+k]; block2[8*i+j] = sum; } } /* idct4 */ for(i=0;i<8;i++) { for(j=0;j<4;j++) { /* top */ sum = 0; for(k=0;k<4;k++) sum += c4[k][j] * block2[8*(2*k)+i]; block3[8*(2*j)+i] = sum; /* bottom */ sum = 0; for(k=0;k<4;k++) sum += c4[k][j] * block2[8*(2*k+1)+i]; block3[8*(2*j+1)+i] = sum; } } /* clamp and store the result */ for(i=0;i<8;i++) { for(j=0;j<8;j++) { v = block3[8*i+j]; if (v < 0) v = 0; else if (v > 255) v = 255; dest[i * linesize + j] = (int)rint(v); } } } void idct248_error(const char *name, void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block)) { int it, i, it1, ti, ti1, err_max, v; srandom(0); /* just one test to see if code is correct (precision is less important here) */ err_max = 0; for(it=0;it<NB_ITS;it++) { /* XXX: use forward transform to generate values */ for(i=0;i<64;i++) block1[i] = (random() % 256) - 128; block1[0] += 1024; for(i=0; i<64; i++) block[i]= block1[i]; idct248_ref(img_dest1, 8, block); for(i=0; i<64; i++) block[i]= block1[i]; idct248_put(img_dest, 8, block); for(i=0;i<64;i++) { v = abs((int)img_dest[i] - (int)img_dest1[i]); if (v == 255) printf("%d %d\n", img_dest[i], img_dest1[i]); if (v > err_max) err_max = v; } #if 0 printf("ref=\n"); for(i=0;i<8;i++) { int j; for(j=0;j<8;j++) { printf(" %3d", img_dest1[i*8+j]); } printf("\n"); } printf("out=\n"); for(i=0;i<8;i++) { int j; for(j=0;j<8;j++) { printf(" %3d", img_dest[i*8+j]); } printf("\n"); } #endif } printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max); ti = gettime(); it1 = 0; do { for(it=0;it<NB_ITS_SPEED;it++) { for(i=0; i<64; i++) block[i]= block1[i]; // memcpy(block, block1, sizeof(DCTELEM) * 64); // dont memcpy especially not fastmemcpy because it does movntq !!! idct248_put(img_dest, 8, block); } it1 += NB_ITS_SPEED; ti1 = gettime() - ti; } while (ti1 < 1000000); emms(); printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name, (double)it1 * 1000.0 / (double)ti1); } void help(void) { printf("dct-test [-i] [<test-number>]\n" "test-number 0 -> test with random matrixes\n" " 1 -> test with random sparse matrixes\n" " 2 -> do 3. test from mpeg4 std\n" "-i test IDCT implementations\n" "-4 test IDCT248 implementations\n"); exit(1); } int main(int argc, char **argv) { int test_idct = 0, test_248_dct = 0; int c,i; int test=1; init_fdct(); idct_mmx_init(); for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; for(i=0;i<MAX_NEG_CROP;i++) { cropTbl[i] = 0; cropTbl[i + MAX_NEG_CROP + 256] = 255; } for(;;) { c = getopt(argc, argv, "ih4"); if (c == -1) break; switch(c) { case 'i': test_idct = 1; break; case '4': test_248_dct = 1; break; default : case 'h': help(); break; } } if(optind <argc) test= atoi(argv[optind]); printf("ffmpeg DCT/IDCT test\n"); if (test_248_dct) { idct248_error("SIMPLE-C", simple_idct248_put); } else { if (!test_idct) { dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */ dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test); dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test); dct_error("MMX", 0, ff_fdct_mmx, fdct, test); dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test); dct_error("FAAN", 0, ff_faandct, fdct, test); } else { dct_error("REF-DBL", 1, idct, idct, test); dct_error("INT", 1, j_rev_dct, idct, test); dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test); dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test); dct_error("SIMPLE-C", 1, simple_idct, idct, test); dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test); // dct_error("ODIVX-C", 1, odivx_idct_c, idct); //printf(" test against odivx idct\n"); // dct_error("REF", 1, idct, odivx_idct_c); // dct_error("INT", 1, j_rev_dct, odivx_idct_c); // dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c); // dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c); // dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c); // dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, odivx_idct_c); // dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c); } } return 0; }