view dct-test.c @ 2956:5f51b1e0bed6 libavcodec

Cook compatibe decoder, patch by Benjamin Larsson Add cook demucing, change rm demuxer so that it reorders audio packets before sending them to the decoder, and send minimum decodeable sized packets; pass only real codec extradata fo the decoder Fix 28_8 decoder for the new demuxer strategy
author rtognimp
date Fri, 09 Dec 2005 16:08:18 +0000
parents 062b2d5d1eba
children ef2149182f1c
line wrap: on
line source

/**
 * @file dct-test.c
 * DCT test. (c) 2001 Fabrice Bellard. 
 * Started from sample code by Juan J. Sierralta P.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <unistd.h>

#include "dsputil.h"

#include "i386/mmx.h"
#include "simple_idct.h"
#include "faandct.h"

#ifndef MAX
#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
#endif

#undef printf

void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};

/* reference fdct/idct */
extern void fdct(DCTELEM *block);
extern void idct(DCTELEM *block);
extern void ff_idct_xvid_mmx(DCTELEM *block);
extern void ff_idct_xvid_mmx2(DCTELEM *block);
extern void init_fdct();

extern void j_rev_dct(DCTELEM *data);
extern void ff_mmx_idct(DCTELEM *data);
extern void ff_mmxext_idct(DCTELEM *data);

extern void odivx_idct_c (short *block);

#define AANSCALE_BITS 12
static const unsigned short aanscales[64] = {
    /* precomputed values scaled up by 14 bits */
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
};

uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];

int64_t gettime(void)
{
    struct timeval tv;
    gettimeofday(&tv,NULL);
    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
}

#define NB_ITS 20000
#define NB_ITS_SPEED 50000

static short idct_mmx_perm[64];

static short idct_simple_mmx_perm[64]={
	0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
	0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
	0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
	0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
	0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
	0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
	0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
	0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
};

void idct_mmx_init(void)
{
    int i;

    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
    for (i = 0; i < 64; i++) {
	idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
//	idct_simple_mmx_perm[i] = simple_block_permute_op(i);
    }
}

static DCTELEM block[64] __attribute__ ((aligned (8)));
static DCTELEM block1[64] __attribute__ ((aligned (8)));
static DCTELEM block_org[64] __attribute__ ((aligned (8)));

void dct_error(const char *name, int is_idct,
               void (*fdct_func)(DCTELEM *block),
               void (*fdct_ref)(DCTELEM *block), int test)
{
    int it, i, scale;
    int err_inf, v;
    int64_t err2, ti, ti1, it1;
    int64_t sysErr[64], sysErrMax=0;
    int maxout=0;
    int blockSumErrMax=0, blockSumErr;

    srandom(0);

    err_inf = 0;
    err2 = 0;
    for(i=0; i<64; i++) sysErr[i]=0;
    for(it=0;it<NB_ITS;it++) {
        for(i=0;i<64;i++)
            block1[i] = 0;
        switch(test){
        case 0: 
            for(i=0;i<64;i++)
                block1[i] = (random() % 512) -256;
            if (is_idct){
                fdct(block1);

                for(i=0;i<64;i++)
                    block1[i]>>=3;
            }
        break;
        case 1:{
            int num= (random()%10)+1;
            for(i=0;i<num;i++)
                block1[random()%64] = (random() % 512) -256;
        }break;
        case 2:
            block1[0]= (random()%4096)-2048;
            block1[63]= (block1[0]&1)^1;
        break;
        }

#if 0 // simulate mismatch control
{ int sum=0;
        for(i=0;i<64;i++)
           sum+=block1[i];

        if((sum&1)==0) block1[63]^=1; 
}
#endif

        for(i=0; i<64; i++)
            block_org[i]= block1[i];

        if (fdct_func == ff_mmx_idct ||
            fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
            for(i=0;i<64;i++)
                block[idct_mmx_perm[i]] = block1[i];
        } else if(fdct_func == ff_simple_idct_mmx ) {
            for(i=0;i<64;i++)
                block[idct_simple_mmx_perm[i]] = block1[i];

	} else {
            for(i=0; i<64; i++)
                block[i]= block1[i];
        }
#if 0 // simulate mismatch control for tested IDCT but not the ref
{ int sum=0;
        for(i=0;i<64;i++)
           sum+=block[i];

        if((sum&1)==0) block[63]^=1; 
}
#endif

        fdct_func(block);
        emms(); /* for ff_mmx_idct */

        if (fdct_func == fdct_ifast 
#ifndef FAAN_POSTSCALE        
            || fdct_func == ff_faandct
#endif
            ) {
            for(i=0; i<64; i++) {
                scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
            }
        }

        fdct_ref(block1);

        blockSumErr=0;
        for(i=0;i<64;i++) {
            v = abs(block[i] - block1[i]);
            if (v > err_inf)
                err_inf = v;
            err2 += v * v;
	    sysErr[i] += block[i] - block1[i];
	    blockSumErr += v;
	    if( abs(block[i])>maxout) maxout=abs(block[i]);
        }
        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
#if 0 // print different matrix pairs
        if(blockSumErr){
            printf("\n");
            for(i=0; i<64; i++){
                if((i&7)==0) printf("\n");
                printf("%4d ", block_org[i]);
            }
            for(i=0; i<64; i++){
                if((i&7)==0) printf("\n");
                printf("%4d ", block[i] - block1[i]);
            }
        }
#endif
    }
    for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, ABS(sysErr[i]));
    
#if 1 // dump systematic errors
    for(i=0; i<64; i++){
	if(i%8==0) printf("\n");
        printf("%5d ", (int)sysErr[i]);
    }
    printf("\n");
#endif
    
    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
           is_idct ? "IDCT" : "DCT",
           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
#if 1 //Speed test
    /* speed test */
    for(i=0;i<64;i++)
        block1[i] = 0;
    switch(test){
    case 0: 
        for(i=0;i<64;i++)
            block1[i] = (random() % 512) -256;
        if (is_idct){
            fdct(block1);

            for(i=0;i<64;i++)
                block1[i]>>=3;
        }
    break;
    case 1:{
    case 2:
        block1[0] = (random() % 512) -256;
        block1[1] = (random() % 512) -256;
        block1[2] = (random() % 512) -256;
        block1[3] = (random() % 512) -256;
    }break;
    }

    if (fdct_func == ff_mmx_idct ||
        fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
        for(i=0;i<64;i++)
            block[idct_mmx_perm[i]] = block1[i];
    } else if(fdct_func == ff_simple_idct_mmx ) {
        for(i=0;i<64;i++)
            block[idct_simple_mmx_perm[i]] = block1[i];
    } else {
        for(i=0; i<64; i++)
            block[i]= block1[i];
    }

    ti = gettime();
    it1 = 0;
    do {
        for(it=0;it<NB_ITS_SPEED;it++) {
            for(i=0; i<64; i++)
                block[i]= block1[i];
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
// dont memcpy especially not fastmemcpy because it does movntq !!!
            fdct_func(block);
        }
        it1 += NB_ITS_SPEED;
        ti1 = gettime() - ti;
    } while (ti1 < 1000000);
    emms();

    printf("%s %s: %0.1f kdct/s\n",
           is_idct ? "IDCT" : "DCT",
           name, (double)it1 * 1000.0 / (double)ti1);
#endif
}

static uint8_t img_dest[64] __attribute__ ((aligned (8)));
static uint8_t img_dest1[64] __attribute__ ((aligned (8)));

void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
{
    static int init;
    static double c8[8][8];
    static double c4[4][4];
    double block1[64], block2[64], block3[64];
    double s, sum, v;
    int i, j, k;

    if (!init) {
        init = 1;

        for(i=0;i<8;i++) {
            sum = 0;
            for(j=0;j<8;j++) {
                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
                sum += c8[i][j] * c8[i][j];
            }
        }
        
        for(i=0;i<4;i++) {
            sum = 0;
            for(j=0;j<4;j++) {
                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
                sum += c4[i][j] * c4[i][j];
            }
        }
    }

    /* butterfly */
    s = 0.5 * sqrt(2.0);
    for(i=0;i<4;i++) {
        for(j=0;j<8;j++) {
            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
        }
    }

    /* idct8 on lines */
    for(i=0;i<8;i++) {
        for(j=0;j<8;j++) {
            sum = 0;
            for(k=0;k<8;k++)
                sum += c8[k][j] * block1[8*i+k];
            block2[8*i+j] = sum;
        }
    }

    /* idct4 */
    for(i=0;i<8;i++) {
        for(j=0;j<4;j++) {
            /* top */
            sum = 0;
            for(k=0;k<4;k++)
                sum += c4[k][j] * block2[8*(2*k)+i];
            block3[8*(2*j)+i] = sum;

            /* bottom */
            sum = 0;
            for(k=0;k<4;k++)
                sum += c4[k][j] * block2[8*(2*k+1)+i];
            block3[8*(2*j+1)+i] = sum;
        }
    }

    /* clamp and store the result */
    for(i=0;i<8;i++) {
        for(j=0;j<8;j++) {
            v = block3[8*i+j];
            if (v < 0)
                v = 0;
            else if (v > 255)
                v = 255;
            dest[i * linesize + j] = (int)rint(v);
        }
    }
}

void idct248_error(const char *name, 
                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
{
    int it, i, it1, ti, ti1, err_max, v;

    srandom(0);
    
    /* just one test to see if code is correct (precision is less
       important here) */
    err_max = 0;
    for(it=0;it<NB_ITS;it++) {
        
        /* XXX: use forward transform to generate values */
        for(i=0;i<64;i++)
            block1[i] = (random() % 256) - 128;
        block1[0] += 1024;

        for(i=0; i<64; i++)
            block[i]= block1[i];
        idct248_ref(img_dest1, 8, block);
        
        for(i=0; i<64; i++)
            block[i]= block1[i];
        idct248_put(img_dest, 8, block);
        
        for(i=0;i<64;i++) {
            v = abs((int)img_dest[i] - (int)img_dest1[i]);
            if (v == 255)
                printf("%d %d\n", img_dest[i], img_dest1[i]);
            if (v > err_max)
                err_max = v;
        }
#if 0
        printf("ref=\n");
        for(i=0;i<8;i++) {
            int j;
            for(j=0;j<8;j++) {
                printf(" %3d", img_dest1[i*8+j]);
            }
            printf("\n");
        }
        
        printf("out=\n");
        for(i=0;i<8;i++) {
            int j;
            for(j=0;j<8;j++) {
                printf(" %3d", img_dest[i*8+j]);
            }
            printf("\n");
        }
#endif
    }
    printf("%s %s: err_inf=%d\n",
           1 ? "IDCT248" : "DCT248",
           name, err_max);

    ti = gettime();
    it1 = 0;
    do {
        for(it=0;it<NB_ITS_SPEED;it++) {
            for(i=0; i<64; i++)
                block[i]= block1[i];
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
// dont memcpy especially not fastmemcpy because it does movntq !!!
            idct248_put(img_dest, 8, block);
        }
        it1 += NB_ITS_SPEED;
        ti1 = gettime() - ti;
    } while (ti1 < 1000000);
    emms();

    printf("%s %s: %0.1f kdct/s\n",
           1 ? "IDCT248" : "DCT248",
           name, (double)it1 * 1000.0 / (double)ti1);
}

void help(void)
{
    printf("dct-test [-i] [<test-number>]\n"
           "test-number 0 -> test with random matrixes\n"
           "            1 -> test with random sparse matrixes\n"
           "            2 -> do 3. test from mpeg4 std\n"
           "-i          test IDCT implementations\n"
           "-4          test IDCT248 implementations\n");
    exit(1);
}

int main(int argc, char **argv)
{
    int test_idct = 0, test_248_dct = 0;
    int c,i;
    int test=1;

    init_fdct();
    idct_mmx_init();

    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
    for(i=0;i<MAX_NEG_CROP;i++) {
        cropTbl[i] = 0;
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
    }
    
    for(;;) {
        c = getopt(argc, argv, "ih4");
        if (c == -1)
            break;
        switch(c) {
        case 'i':
            test_idct = 1;
            break;
        case '4':
            test_248_dct = 1;
            break;
        default :
        case 'h':
            help();
            break;
        }
    }
    
    if(optind <argc) test= atoi(argv[optind]);
               
    printf("ffmpeg DCT/IDCT test\n");

    if (test_248_dct) {
        idct248_error("SIMPLE-C", simple_idct248_put);
    } else {
        if (!test_idct) {
            dct_error("REF-DBL", 0, fdct, fdct, test); /* only to verify code ! */
            dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
            dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
            dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
            dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test);
            dct_error("FAAN", 0, ff_faandct, fdct, test);
        } else {
            dct_error("REF-DBL", 1, idct, idct, test);
            dct_error("INT", 1, j_rev_dct, idct, test);
            dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test);
            dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test);
            dct_error("SIMPLE-C", 1, simple_idct, idct, test);
            dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test);
            dct_error("XVID-MMX", 1, ff_idct_xvid_mmx, idct, test);
            dct_error("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, test);
            //        dct_error("ODIVX-C", 1, odivx_idct_c, idct);
            //printf(" test against odivx idct\n");
            //	dct_error("REF", 1, idct, odivx_idct_c);
            //        dct_error("INT", 1, j_rev_dct, odivx_idct_c);
            //        dct_error("MMX", 1, ff_mmx_idct, odivx_idct_c);
            //        dct_error("MMXEXT", 1, ff_mmxext_idct, odivx_idct_c);
            //        dct_error("SIMPLE-C", 1, simple_idct, odivx_idct_c);
            //        dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, odivx_idct_c);
            //        dct_error("ODIVX-C", 1, odivx_idct_c, odivx_idct_c);
        }
    }
    return 0;
}