Mercurial > libavcodec.hg
changeset 1729:a4a5e7521339 libavcodec
interlaced dct decision cleanup
function moved to dspcontext
mmx&mmx2 optimized
change SSE -> SAD as default (better quality)
vbv buffer size command line option in kbyte
author | michael |
---|---|
date | Mon, 05 Jan 2004 22:57:07 +0000 |
parents | 73a3699b8375 |
children | 0e7f1aabd498 |
files | avcodec.h dsputil.c dsputil.h i386/dsputil_mmx.c motion_est.c mpegvideo.c |
diffstat | 6 files changed, 434 insertions(+), 150 deletions(-) [+] |
line wrap: on
line diff
--- a/avcodec.h Mon Jan 05 11:16:05 2004 +0000 +++ b/avcodec.h Mon Jan 05 22:57:07 2004 +0000 @@ -17,7 +17,7 @@ #define FFMPEG_VERSION_INT 0x000408 #define FFMPEG_VERSION "0.4.8" -#define LIBAVCODEC_BUILD 4698 +#define LIBAVCODEC_BUILD 4699 #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT #define LIBAVCODEC_VERSION FFMPEG_VERSION @@ -1196,6 +1196,12 @@ * - decoding: unused */ int mb_cmp; + /** + * interlaced dct compare function + * - encoding: set by user. + * - decoding: unused + */ + int ildct_cmp; #define FF_CMP_SAD 0 #define FF_CMP_SSE 1 #define FF_CMP_SATD 2 @@ -1204,6 +1210,8 @@ #define FF_CMP_BIT 5 #define FF_CMP_RD 6 #define FF_CMP_ZERO 7 +#define FF_CMP_VSAD 8 +#define FF_CMP_VSSE 9 #define FF_CMP_CHROMA 256 /**
--- a/dsputil.c Mon Jan 05 11:16:05 2004 +0000 +++ b/dsputil.c Mon Jan 05 22:57:07 2004 +0000 @@ -2560,6 +2560,53 @@ } } +static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ + return 0; +} + +void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ + int i; + + memset(cmp, 0, sizeof(void*)*5); + + for(i=0; i<5; i++){ + switch(type&0xFF){ + case FF_CMP_SAD: + cmp[i]= c->sad[i]; + break; + case FF_CMP_SATD: + cmp[i]= c->hadamard8_diff[i]; + break; + case FF_CMP_SSE: + cmp[i]= c->sse[i]; + break; + case FF_CMP_DCT: + cmp[i]= c->dct_sad[i]; + break; + case FF_CMP_PSNR: + cmp[i]= c->quant_psnr[i]; + break; + case FF_CMP_BIT: + cmp[i]= c->bit[i]; + break; + case FF_CMP_RD: + cmp[i]= c->rd[i]; + break; + case FF_CMP_VSAD: + cmp[i]= c->vsad[i]; + break; + case FF_CMP_VSSE: + cmp[i]= c->vsse[i]; + break; + case FF_CMP_ZERO: + cmp[i]= zero_cmp; + break; + default: + av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); + } + } +} + /** * memset(blocks, 0, sizeof(DCTELEM)*6*64) */ @@ -2685,17 +2732,19 @@ return sum; } -static int hadamard8_abs_c(uint8_t *src, int stride, int mean){ +static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ int i; int temp[64]; int sum=0; -//FIXME OOOPS ignore 0 term instead of mean mess + + assert(h==8); + for(i=0; i<8; i++){ //FIXME try pointer walks - BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean); - BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean); - BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean); - BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean); + BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); + BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); + BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); + BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); BUTTERFLY1(temp[8*i+0], temp[8*i+2]); BUTTERFLY1(temp[8*i+1], temp[8*i+3]); @@ -2726,6 +2775,8 @@ +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); } + sum -= ABS(temp[8*0] + temp[8*4]); // -mean + return sum; } @@ -2911,7 +2962,69 @@ return bits; } +static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ + int score=0; + int x,y; + + for(y=1; y<h; y++){ + for(x=0; x<16; x+=4){ + score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride]) + +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]); + } + s+= stride; + } + + return score; +} + +static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ + int score=0; + int x,y; + + for(y=1; y<h; y++){ + for(x=0; x<16; x++){ + score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); + } + s1+= stride; + s2+= stride; + } + + return score; +} + +#define SQ(a) ((a)*(a)) +static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ + int score=0; + int x,y; + + for(y=1; y<h; y++){ + for(x=0; x<16; x+=4){ + score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) + +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); + } + s+= stride; + } + + return score; +} + +static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ + int score=0; + int x,y; + + for(y=1; y<h; y++){ + for(x=0; x<16; x++){ + score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); + } + s1+= stride; + s2+= stride; + } + + return score; +} + WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) +WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) WARPER8_16_SQ(rd8x8_c, rd16_c) @@ -3095,13 +3208,12 @@ c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; - c->hadamard8_abs = hadamard8_abs_c; - #define SET_CMP_FUNC(name) \ c->name[0]= name ## 16_c;\ c->name[1]= name ## 8x8_c; SET_CMP_FUNC(hadamard8_diff) + c->hadamard8_diff[4]= hadamard8_intra16_c; SET_CMP_FUNC(dct_sad) c->sad[0]= pix_abs16_c; c->sad[1]= pix_abs8_c; @@ -3110,6 +3222,10 @@ SET_CMP_FUNC(quant_psnr) SET_CMP_FUNC(rd) SET_CMP_FUNC(bit) + c->vsad[0]= vsad16_c; + c->vsad[4]= vsad_intra16_c; + c->vsse[0]= vsse16_c; + c->vsse[4]= vsse_intra16_c; c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c;
--- a/dsputil.h Mon Jan 05 11:16:05 2004 +0000 +++ b/dsputil.h Mon Jan 05 22:57:07 2004 +0000 @@ -138,21 +138,22 @@ int (*pix_norm1)(uint8_t * pix, int line_size); // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 - me_cmp_func sad[4]; /* identical to pix_absAxA except additional void * */ - me_cmp_func sse[4]; - me_cmp_func hadamard8_diff[4]; - me_cmp_func dct_sad[4]; - me_cmp_func quant_psnr[4]; - me_cmp_func bit[4]; - me_cmp_func rd[4]; - int (*hadamard8_abs )(uint8_t *src, int stride, int mean); + me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */ + me_cmp_func sse[5]; + me_cmp_func hadamard8_diff[5]; + me_cmp_func dct_sad[5]; + me_cmp_func quant_psnr[5]; + me_cmp_func bit[5]; + me_cmp_func rd[5]; + me_cmp_func vsad[5]; + me_cmp_func vsse[5]; me_cmp_func me_pre_cmp[5]; me_cmp_func me_cmp[5]; me_cmp_func me_sub_cmp[5]; me_cmp_func mb_cmp[5]; + me_cmp_func ildct_cmp[5]; //only width 16 used - /* maybe create an array for 16/8/4/2 functions */ /** * Halfpel motion compensation with rounding (a+b+1)>>1. * this is an array[4][4] of motion compensation funcions for 4 @@ -293,6 +294,8 @@ */ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); +void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); + #define BYTE_VEC32(c) ((c)*0x01010101UL) static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
--- a/i386/dsputil_mmx.c Mon Jan 05 11:16:05 2004 +0000 +++ b/i386/dsputil_mmx.c Mon Jan 05 22:57:07 2004 +0000 @@ -22,6 +22,9 @@ #include "../dsputil.h" #include "../simple_idct.h" +//#undef NDEBUG +//#include <assert.h> + extern const uint8_t ff_h263_loop_filter_strength[32]; int mm_flags; /* multimedia extension flags */ @@ -747,6 +750,246 @@ return tmp; } +static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { + int tmp; + + assert( (((int)pix) & 7) == 0); + assert((line_size &7) ==0); + +#define SUM(in0, in1, out0, out1) \ + "movq (%0), %%mm2\n"\ + "movq 8(%0), %%mm3\n"\ + "addl %2,%0\n"\ + "movq %%mm2, " #out0 "\n"\ + "movq %%mm3, " #out1 "\n"\ + "psubusb " #in0 ", %%mm2\n"\ + "psubusb " #in1 ", %%mm3\n"\ + "psubusb " #out0 ", " #in0 "\n"\ + "psubusb " #out1 ", " #in1 "\n"\ + "por %%mm2, " #in0 "\n"\ + "por %%mm3, " #in1 "\n"\ + "movq " #in0 ", %%mm2\n"\ + "movq " #in1 ", %%mm3\n"\ + "punpcklbw %%mm7, " #in0 "\n"\ + "punpcklbw %%mm7, " #in1 "\n"\ + "punpckhbw %%mm7, %%mm2\n"\ + "punpckhbw %%mm7, %%mm3\n"\ + "paddw " #in1 ", " #in0 "\n"\ + "paddw %%mm3, %%mm2\n"\ + "paddw %%mm2, " #in0 "\n"\ + "paddw " #in0 ", %%mm6\n" + + + asm volatile ( + "movl %3,%%ecx\n" + "pxor %%mm6,%%mm6\n" + "pxor %%mm7,%%mm7\n" + "movq (%0),%%mm0\n" + "movq 8(%0),%%mm1\n" + "addl %2,%0\n" + "subl $2, %%ecx\n" + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + "1:\n" + + SUM(%%mm4, %%mm5, %%mm0, %%mm1) + + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + + "subl $2, %%ecx\n" + "jnz 1b\n" + + "movq %%mm6,%%mm0\n" + "psrlq $32, %%mm6\n" + "paddw %%mm6,%%mm0\n" + "movq %%mm0,%%mm6\n" + "psrlq $16, %%mm0\n" + "paddw %%mm6,%%mm0\n" + "movd %%mm0,%1\n" + : "+r" (pix), "=r"(tmp) + : "r" (line_size) , "m" (h) + : "%ecx"); + return tmp & 0xFFFF; +} +#undef SUM + +static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { + int tmp; + + assert( (((int)pix) & 7) == 0); + assert((line_size &7) ==0); + +#define SUM(in0, in1, out0, out1) \ + "movq (%0), " #out0 "\n"\ + "movq 8(%0), " #out1 "\n"\ + "addl %2,%0\n"\ + "psadbw " #out0 ", " #in0 "\n"\ + "psadbw " #out1 ", " #in1 "\n"\ + "paddw " #in1 ", " #in0 "\n"\ + "paddw " #in0 ", %%mm6\n" + + asm volatile ( + "movl %3,%%ecx\n" + "pxor %%mm6,%%mm6\n" + "pxor %%mm7,%%mm7\n" + "movq (%0),%%mm0\n" + "movq 8(%0),%%mm1\n" + "addl %2,%0\n" + "subl $2, %%ecx\n" + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + "1:\n" + + SUM(%%mm4, %%mm5, %%mm0, %%mm1) + + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + + "subl $2, %%ecx\n" + "jnz 1b\n" + + "movd %%mm6,%1\n" + : "+r" (pix), "=r"(tmp) + : "r" (line_size) , "m" (h) + : "%ecx"); + return tmp; +} +#undef SUM + +static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { + int tmp; + + assert( (((int)pix1) & 7) == 0); + assert( (((int)pix2) & 7) == 0); + assert((line_size &7) ==0); + +#define SUM(in0, in1, out0, out1) \ + "movq (%0),%%mm2\n"\ + "movq (%1)," #out0 "\n"\ + "movq 8(%0),%%mm3\n"\ + "movq 8(%1)," #out1 "\n"\ + "addl %3,%0\n"\ + "addl %3,%1\n"\ + "psubb " #out0 ", %%mm2\n"\ + "psubb " #out1 ", %%mm3\n"\ + "pxor %%mm7, %%mm2\n"\ + "pxor %%mm7, %%mm3\n"\ + "movq %%mm2, " #out0 "\n"\ + "movq %%mm3, " #out1 "\n"\ + "psubusb " #in0 ", %%mm2\n"\ + "psubusb " #in1 ", %%mm3\n"\ + "psubusb " #out0 ", " #in0 "\n"\ + "psubusb " #out1 ", " #in1 "\n"\ + "por %%mm2, " #in0 "\n"\ + "por %%mm3, " #in1 "\n"\ + "movq " #in0 ", %%mm2\n"\ + "movq " #in1 ", %%mm3\n"\ + "punpcklbw %%mm7, " #in0 "\n"\ + "punpcklbw %%mm7, " #in1 "\n"\ + "punpckhbw %%mm7, %%mm2\n"\ + "punpckhbw %%mm7, %%mm3\n"\ + "paddw " #in1 ", " #in0 "\n"\ + "paddw %%mm3, %%mm2\n"\ + "paddw %%mm2, " #in0 "\n"\ + "paddw " #in0 ", %%mm6\n" + + + asm volatile ( + "movl %4,%%ecx\n" + "pxor %%mm6,%%mm6\n" + "pcmpeqw %%mm7,%%mm7\n" + "psllw $15, %%mm7\n" + "packsswb %%mm7, %%mm7\n" + "movq (%0),%%mm0\n" + "movq (%1),%%mm2\n" + "movq 8(%0),%%mm1\n" + "movq 8(%1),%%mm3\n" + "addl %3,%0\n" + "addl %3,%1\n" + "subl $2, %%ecx\n" + "psubb %%mm2, %%mm0\n" + "psubb %%mm3, %%mm1\n" + "pxor %%mm7, %%mm0\n" + "pxor %%mm7, %%mm1\n" + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + "1:\n" + + SUM(%%mm4, %%mm5, %%mm0, %%mm1) + + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + + "subl $2, %%ecx\n" + "jnz 1b\n" + + "movq %%mm6,%%mm0\n" + "psrlq $32, %%mm6\n" + "paddw %%mm6,%%mm0\n" + "movq %%mm0,%%mm6\n" + "psrlq $16, %%mm0\n" + "paddw %%mm6,%%mm0\n" + "movd %%mm0,%2\n" + : "+r" (pix1), "+r" (pix2), "=r"(tmp) + : "r" (line_size) , "m" (h) + : "%ecx"); + return tmp & 0x7FFF; +} +#undef SUM + +static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { + int tmp; + + assert( (((int)pix1) & 7) == 0); + assert( (((int)pix2) & 7) == 0); + assert((line_size &7) ==0); + +#define SUM(in0, in1, out0, out1) \ + "movq (%0)," #out0 "\n"\ + "movq (%1),%%mm2\n"\ + "movq 8(%0)," #out1 "\n"\ + "movq 8(%1),%%mm3\n"\ + "addl %3,%0\n"\ + "addl %3,%1\n"\ + "psubb %%mm2, " #out0 "\n"\ + "psubb %%mm3, " #out1 "\n"\ + "pxor %%mm7, " #out0 "\n"\ + "pxor %%mm7, " #out1 "\n"\ + "psadbw " #out0 ", " #in0 "\n"\ + "psadbw " #out1 ", " #in1 "\n"\ + "paddw " #in1 ", " #in0 "\n"\ + "paddw " #in0 ", %%mm6\n" + + asm volatile ( + "movl %4,%%ecx\n" + "pxor %%mm6,%%mm6\n" + "pcmpeqw %%mm7,%%mm7\n" + "psllw $15, %%mm7\n" + "packsswb %%mm7, %%mm7\n" + "movq (%0),%%mm0\n" + "movq (%1),%%mm2\n" + "movq 8(%0),%%mm1\n" + "movq 8(%1),%%mm3\n" + "addl %3,%0\n" + "addl %3,%1\n" + "subl $2, %%ecx\n" + "psubb %%mm2, %%mm0\n" + "psubb %%mm3, %%mm1\n" + "pxor %%mm7, %%mm0\n" + "pxor %%mm7, %%mm1\n" + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + "1:\n" + + SUM(%%mm4, %%mm5, %%mm0, %%mm1) + + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + + "subl $2, %%ecx\n" + "jnz 1b\n" + + "movd %%mm6,%2\n" + : "+r" (pix1), "+r" (pix2), "=r"(tmp) + : "r" (line_size) , "m" (h) + : "%ecx"); + return tmp; +} +#undef SUM + static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ int i=0; asm volatile( @@ -1874,6 +2117,11 @@ c->pix_norm1 = pix_norm1_mmx; c->sse[0] = sse16_mmx; + c->vsad[4]= vsad_intra16_mmx; + + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->vsad[0] = vsad16_mmx; + } #endif //CONFIG_ENCODERS c->h263_v_loop_filter= h263_v_loop_filter_mmx; @@ -1897,6 +2145,7 @@ #ifdef CONFIG_ENCODERS c->hadamard8_diff[0]= hadamard8_diff16_mmx2; c->hadamard8_diff[1]= hadamard8_diff_mmx2; + c->vsad[4]= vsad_intra16_mmx2; #endif //CONFIG_ENCODERS if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ @@ -1906,6 +2155,7 @@ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + c->vsad[0] = vsad16_mmx2; } #if 1
--- a/motion_est.c Mon Jan 05 11:16:05 2004 +0000 +++ b/motion_est.c Mon Jan 05 22:57:07 2004 +0000 @@ -277,49 +277,6 @@ #undef INIT #undef CMP__DIRECT - -static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ - return 0; -} - -static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){ - DSPContext* c= &s->dsp; - int i; - - memset(cmp, 0, sizeof(void*)*5); - - for(i=0; i<4; i++){ - switch(type&0xFF){ - case FF_CMP_SAD: - cmp[i]= c->sad[i]; - break; - case FF_CMP_SATD: - cmp[i]= c->hadamard8_diff[i]; - break; - case FF_CMP_SSE: - cmp[i]= c->sse[i]; - break; - case FF_CMP_DCT: - cmp[i]= c->dct_sad[i]; - break; - case FF_CMP_PSNR: - cmp[i]= c->quant_psnr[i]; - break; - case FF_CMP_BIT: - cmp[i]= c->bit[i]; - break; - case FF_CMP_RD: - cmp[i]= c->rd[i]; - break; - case FF_CMP_ZERO: - cmp[i]= zero_cmp; - break; - default: - av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n"); - } - } -} - static inline int get_penalty_factor(MpegEncContext *s, int type){ switch(type&0xFF){ default: @@ -340,10 +297,10 @@ } void ff_init_me(MpegEncContext *s){ - set_cmp(s, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp); - set_cmp(s, s->dsp.me_cmp, s->avctx->me_cmp); - set_cmp(s, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); - set_cmp(s, s->dsp.mb_cmp, s->avctx->mb_cmp); + ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp); + ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); + ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); + ff_set_cmp(&s->dsp, s->dsp.mb_cmp, s->avctx->mb_cmp); if(s->flags&CODEC_FLAG_QPEL){ if(s->avctx->me_sub_cmp&FF_CMP_CHROMA) @@ -1783,6 +1740,10 @@ } //FIXME something smarter if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB +#if 0 + if(s->out_format == FMT_MPEG1) + type |= CANDIDATE_MB_TYPE_INTRA; +#endif } s->mb_type[mb_y*s->mb_stride + mb_x]= type;
--- a/mpegvideo.c Mon Jan 05 11:16:05 2004 +0000 +++ b/mpegvideo.c Mon Jan 05 22:57:07 2004 +0000 @@ -973,6 +973,8 @@ s->progressive_frame= s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME)); + ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp); + ff_init_me(s); #ifdef CONFIG_ENCODERS @@ -3168,71 +3170,6 @@ av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel); } -#if 0 -static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize - int score=0; - int x,y; - - for(y=0; y<7; y++){ - for(x=0; x<16; x+=4){ - score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride]) - +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]); - } - s+= stride; - } - - return score; -} - -static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize - int score=0; - int x,y; - - for(y=0; y<7; y++){ - for(x=0; x<16; x++){ - score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); - } - s1+= stride; - s2+= stride; - } - - return score; -} -#else -#define SQ(a) ((a)*(a)) - -static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize - int score=0; - int x,y; - - for(y=0; y<7; y++){ - for(x=0; x<16; x+=4){ - score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) - +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); - } - s+= stride; - } - - return score; -} - -static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize - int score=0; - int x,y; - - for(y=0; y<7; y++){ - for(x=0; x<16; x++){ - score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); - } - s1+= stride; - s2+= stride; - } - - return score; -} - -#endif - #endif //CONFIG_ENCODERS /** @@ -3352,16 +3289,20 @@ if(s->flags&CODEC_FLAG_INTERLACED_DCT){ int progressive_score, interlaced_score; - progressive_score= pix_vcmp16x8(ptr, wrap_y ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y ); - interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y , wrap_y*2); + s->interlaced_dct=0; + progressive_score= s->dsp.ildct_cmp[4](s, ptr , NULL, wrap_y, 8) + +s->dsp.ildct_cmp[4](s, ptr + wrap_y*8, NULL, wrap_y, 8) - 400; + + if(progressive_score > 0){ + interlaced_score = s->dsp.ildct_cmp[4](s, ptr , NULL, wrap_y*2, 8) + +s->dsp.ildct_cmp[4](s, ptr + wrap_y , NULL, wrap_y*2, 8); + if(progressive_score > interlaced_score){ + s->interlaced_dct=1; - if(progressive_score > interlaced_score + 100){ - s->interlaced_dct=1; - - dct_offset= wrap_y; - wrap_y<<=1; - }else - s->interlaced_dct=0; + dct_offset= wrap_y; + wrap_y<<=1; + } + } } s->dsp.get_pixels(s->block[0], ptr , wrap_y); @@ -3430,19 +3371,24 @@ if(s->flags&CODEC_FLAG_INTERLACED_DCT){ int progressive_score, interlaced_score; - - progressive_score= pix_diff_vcmp16x8(ptr_y , dest_y , wrap_y ) - + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y ); - interlaced_score = pix_diff_vcmp16x8(ptr_y , dest_y , wrap_y*2) - + pix_diff_vcmp16x8(ptr_y + wrap_y , dest_y + wrap_y , wrap_y*2); + + s->interlaced_dct=0; + progressive_score= s->dsp.ildct_cmp[0](s, dest_y , ptr_y , wrap_y, 8) + +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400; - if(progressive_score > interlaced_score + 600){ - s->interlaced_dct=1; + if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400; + + if(progressive_score>0){ + interlaced_score = s->dsp.ildct_cmp[0](s, dest_y , ptr_y , wrap_y*2, 8) + +s->dsp.ildct_cmp[0](s, dest_y + wrap_y , ptr_y + wrap_y , wrap_y*2, 8); - dct_offset= wrap_y; - wrap_y<<=1; - }else - s->interlaced_dct=0; + if(progressive_score > interlaced_score){ + s->interlaced_dct=1; + + dct_offset= wrap_y; + wrap_y<<=1; + } + } } s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y);