# HG changeset patch # User michael # Date 1072800477 0 # Node ID dea5b29469990b9514420f5b57fb26d2b1d1977b # Parent 027545a2fdbee76eb074ad91f3b738c8d71ae434 interlaced motion estimation interlaced mpeg2 encoding P & B frames rate distored interlaced mb decission alternate scantable support 4mv encoding fixes (thats also why the regression tests change) passing height to most dsp functions interlaced mpeg4 encoding (no direct mode MBs yet) various related cleanups disabled old motion estimaton algorithms (log, full, ...) they will either be fixed or removed diff -r 027545a2fdbe -r dea5b2946999 alpha/dsputil_alpha.c --- a/alpha/dsputil_alpha.c Tue Dec 30 15:29:38 2003 +0000 +++ b/alpha/dsputil_alpha.c Tue Dec 30 16:07:57 2003 +0000 @@ -39,11 +39,11 @@ const uint8_t *restrict pixels, int line_size); void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); -int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); #if 0 /* These functions were the base for the optimized assembler routines, @@ -290,11 +290,6 @@ return pix_abs16x16_mvi_asm(a, b, stride); } -static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride) -{ - return pix_abs8x8_mvi(a, b, stride); -} - void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) { c->put_pixels_tab[0][0] = put_pixels16_axp_asm; @@ -347,12 +342,13 @@ c->get_pixels = get_pixels_mvi; c->diff_pixels = diff_pixels_mvi; c->sad[0] = sad16x16_mvi; - c->sad[1] = sad8x8_mvi; - c->pix_abs8x8 = pix_abs8x8_mvi; - c->pix_abs16x16 = pix_abs16x16_mvi_asm; - c->pix_abs16x16_x2 = pix_abs16x16_x2_mvi; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mvi; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi; + c->sad[1] = pix_abs8x8_mvi; +// c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed + c->pix_abs[0][0] = sad16x16_mvi; + c->pix_abs[1][0] = pix_abs8x8_mvi; + c->pix_abs[0][1] = pix_abs16x16_x2_mvi; + c->pix_abs[0][2] = pix_abs16x16_y2_mvi; + c->pix_abs[0][3] = pix_abs16x16_xy2_mvi; } put_pixels_clamped_axp_p = c->put_pixels_clamped; diff -r 027545a2fdbe -r dea5b2946999 alpha/motion_est_alpha.c --- a/alpha/motion_est_alpha.c Tue Dec 30 15:29:38 2003 +0000 +++ b/alpha/motion_est_alpha.c Tue Dec 30 16:07:57 2003 +0000 @@ -84,10 +84,9 @@ return r1 + r2; } -int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 8; if ((size_t) pix2 & 0x7) { /* works only when pix2 is actually unaligned */ @@ -160,10 +159,9 @@ } #endif -int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; uint64_t disalign = (size_t) pix2 & 0x7; switch (disalign) { @@ -234,10 +232,9 @@ return result; } -int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; if ((size_t) pix2 & 0x7) { uint64_t t, p2_l, p2_r; @@ -288,10 +285,9 @@ return result; } -int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; uint64_t p1_l, p1_r; uint64_t p2_l, p2_r, p2_x; diff -r 027545a2fdbe -r dea5b2946999 avcodec.h --- a/avcodec.h Tue Dec 30 15:29:38 2003 +0000 +++ b/avcodec.h Tue Dec 30 16:07:57 2003 +0000 @@ -17,7 +17,7 @@ #define FFMPEG_VERSION_INT 0x000408 #define FFMPEG_VERSION "0.4.8" -#define LIBAVCODEC_BUILD 4697 +#define LIBAVCODEC_BUILD 4698 #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT #define LIBAVCODEC_VERSION FFMPEG_VERSION @@ -263,7 +263,8 @@ #define CODEC_FLAG_H263P_AIV 0x00000008 ///< H263 Alternative inter vlc #define CODEC_FLAG_OBMC 0x00000001 ///< OBMC #define CODEC_FLAG_LOOP_FILTER 0x00000800 ///< loop filter -#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000 +#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000 +#define CODEC_FLAG_INTERLACED_ME 0x20000000 ///< interlaced motion estimation /* Unsupported options : * Syntax Arithmetic coding (SAC) * Reference Picture Selection diff -r 027545a2fdbe -r dea5b2946999 dsputil.c --- a/dsputil.c Tue Dec 30 15:29:38 2003 +0000 +++ b/dsputil.c Tue Dec 30 16:07:57 2003 +0000 @@ -218,13 +218,13 @@ } } -static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) +static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { int s, i; uint32_t *sq = squareTbl + 256; s = 0; - for (i = 0; i < 8; i++) { + for (i = 0; i < h; i++) { s += sq[pix1[0] - pix2[0]]; s += sq[pix1[1] - pix2[1]]; s += sq[pix1[2] - pix2[2]]; @@ -239,13 +239,13 @@ return s; } -static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) +static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int s, i; uint32_t *sq = squareTbl + 256; s = 0; - for (i = 0; i < 16; i++) { + for (i = 0; i < h; i++) { s += sq[pix1[ 0] - pix2[ 0]]; s += sq[pix1[ 1] - pix2[ 1]]; s += sq[pix1[ 2] - pix2[ 2]]; @@ -2331,12 +2331,12 @@ } } -static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size) +static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int s, i; s = 0; - for(i=0;i<16;i++) { + for(i=0;idsp.diff_pixels(temp, src1, src2, stride); s->dsp.fdct(temp); @@ -2752,13 +2748,14 @@ void simple_idct(DCTELEM *block); //FIXME -static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ +static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ MpegEncContext * const s= (MpegEncContext *)c; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8]; DCTELEM * const temp= (DCTELEM*)aligned_temp; DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; int sum=0, i; + assert(h==8); s->mb_intra=0; s->dsp.diff_pixels(temp, src1, src2, stride); @@ -2775,7 +2772,7 @@ return sum; } -static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ +static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ MpegEncContext * const s= (MpegEncContext *)c; const uint8_t *scantable= s->intra_scantable.permutated; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; @@ -2787,6 +2784,8 @@ uint8_t * length; uint8_t * last_length; + assert(h==8); + for(i=0; i<8; i++){ ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; @@ -2847,12 +2846,12 @@ s->dsp.idct_add(bak, stride, temp); - distoration= s->dsp.sse[1](NULL, bak, src1, stride); + distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8); return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); } -static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ +static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ MpegEncContext * const s= (MpegEncContext *)c; const uint8_t *scantable= s->intra_scantable.permutated; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; @@ -2861,6 +2860,8 @@ const int esc_length= s->ac_esc_length; uint8_t * length; uint8_t * last_length; + + assert(h==8); s->dsp.diff_pixels(temp, src1, src2, stride); @@ -2910,12 +2911,11 @@ return bits; } - -WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c) -WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c) -WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c) -WARPER88_1616(rd8x8_c, rd16x16_c) -WARPER88_1616(bit8x8_c, bit16x16_c) +WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) +WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) +WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) +WARPER8_16_SQ(rd8x8_c, rd16_c) +WARPER8_16_SQ(bit8x8_c, bit16_c) /* XXX: those functions should be suppressed ASAP when all IDCTs are converted */ @@ -2989,18 +2989,16 @@ c->clear_blocks = clear_blocks_c; c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; - c->sse[0]= sse16_c; - c->sse[1]= sse8_c; /* TODO [0] 16 [1] 8 */ - c->pix_abs16x16 = pix_abs16x16_c; - c->pix_abs16x16_x2 = pix_abs16x16_x2_c; - c->pix_abs16x16_y2 = pix_abs16x16_y2_c; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c; - c->pix_abs8x8 = pix_abs8x8_c; - c->pix_abs8x8_x2 = pix_abs8x8_x2_c; - c->pix_abs8x8_y2 = pix_abs8x8_y2_c; - c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c; + c->pix_abs[0][0] = pix_abs16_c; + c->pix_abs[0][1] = pix_abs16_x2_c; + c->pix_abs[0][2] = pix_abs16_y2_c; + c->pix_abs[0][3] = pix_abs16_xy2_c; + c->pix_abs[1][0] = pix_abs8_c; + c->pix_abs[1][1] = pix_abs8_x2_c; + c->pix_abs[1][2] = pix_abs8_y2_c; + c->pix_abs[1][3] = pix_abs8_xy2_c; #define dspfunc(PFX, IDX, NUM) \ c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ @@ -3097,24 +3095,21 @@ c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; - c->hadamard8_diff[0]= hadamard8_diff16_c; - c->hadamard8_diff[1]= hadamard8_diff_c; c->hadamard8_abs = hadamard8_abs_c; - - c->dct_sad[0]= dct_sad16x16_c; - c->dct_sad[1]= dct_sad8x8_c; + +#define SET_CMP_FUNC(name) \ + c->name[0]= name ## 16_c;\ + c->name[1]= name ## 8x8_c; - c->sad[0]= sad16x16_c; - c->sad[1]= sad8x8_c; - - c->quant_psnr[0]= quant_psnr16x16_c; - c->quant_psnr[1]= quant_psnr8x8_c; - - c->rd[0]= rd16x16_c; - c->rd[1]= rd8x8_c; - - c->bit[0]= bit16x16_c; - c->bit[1]= bit8x8_c; + SET_CMP_FUNC(hadamard8_diff) + SET_CMP_FUNC(dct_sad) + c->sad[0]= pix_abs16_c; + c->sad[1]= pix_abs8_c; + c->sse[0]= sse16_c; + c->sse[1]= sse8_c; + SET_CMP_FUNC(quant_psnr) + SET_CMP_FUNC(rd) + SET_CMP_FUNC(bit) c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; diff -r 027545a2fdbe -r dea5b2946999 dsputil.h --- a/dsputil.h Tue Dec 30 15:29:38 2003 +0000 +++ b/dsputil.h Tue Dec 30 16:07:57 2003 +0000 @@ -110,9 +110,7 @@ /* motion estimation */ -typedef int (*op_pixels_abs_func)(uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/; - -typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/; +typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; /** @@ -136,19 +134,21 @@ void (*clear_blocks)(DCTELEM *blocks/*align 16*/); int (*pix_sum)(uint8_t * pix, int line_size); int (*pix_norm1)(uint8_t * pix, int line_size); - me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */ - me_cmp_func sse[2]; - me_cmp_func hadamard8_diff[2]; - me_cmp_func dct_sad[2]; - me_cmp_func quant_psnr[2]; - me_cmp_func bit[2]; - me_cmp_func rd[2]; +// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 + + me_cmp_func sad[4]; /* identical to pix_absAxA except additional void * */ + me_cmp_func sse[4]; + me_cmp_func hadamard8_diff[4]; + me_cmp_func dct_sad[4]; + me_cmp_func quant_psnr[4]; + me_cmp_func bit[4]; + me_cmp_func rd[4]; int (*hadamard8_abs )(uint8_t *src, int stride, int mean); - me_cmp_func me_pre_cmp[11]; - me_cmp_func me_cmp[11]; - me_cmp_func me_sub_cmp[11]; - me_cmp_func mb_cmp[11]; + me_cmp_func me_pre_cmp[5]; + me_cmp_func me_cmp[5]; + me_cmp_func me_sub_cmp[5]; + me_cmp_func mb_cmp[5]; /* maybe create an array for 16/8/4/2 functions */ /** @@ -226,14 +226,7 @@ qpel_mc_func put_h264_qpel_pixels_tab[3][16]; qpel_mc_func avg_h264_qpel_pixels_tab[3][16]; - op_pixels_abs_func pix_abs16x16; - op_pixels_abs_func pix_abs16x16_x2; - op_pixels_abs_func pix_abs16x16_y2; - op_pixels_abs_func pix_abs16x16_xy2; - op_pixels_abs_func pix_abs8x8; - op_pixels_abs_func pix_abs8x8_x2; - op_pixels_abs_func pix_abs8x8_y2; - op_pixels_abs_func pix_abs8x8_xy2; + me_cmp_func pix_abs[2][4]; /* huffyuv specific */ void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); @@ -484,12 +477,24 @@ const FFTSample *input, FFTSample *tmp); void ff_mdct_end(MDCTContext *s); -#define WARPER88_1616(name8, name16)\ -static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\ - return name8(s, dst , src , stride)\ - +name8(s, dst+8 , src+8 , stride)\ - +name8(s, dst +8*stride, src +8*stride, stride)\ - +name8(s, dst+8+8*stride, src+8+8*stride, stride);\ +#define WARPER8_16(name8, name16)\ +static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ + return name8(s, dst , src , stride, h)\ + +name8(s, dst+8 , src+8 , stride, h);\ +} + +#define WARPER8_16_SQ(name8, name16)\ +static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ + int score=0;\ + score +=name8(s, dst , src , stride, 8);\ + score +=name8(s, dst+8 , src+8 , stride, 8);\ + if(h==16){\ + dst += 8*stride;\ + src += 8*stride;\ + score +=name8(s, dst , src , stride, 8);\ + score +=name8(s, dst+8 , src+8 , stride, 8);\ + }\ + return score;\ } #ifndef HAVE_LRINTF diff -r 027545a2fdbe -r dea5b2946999 error_resilience.c --- a/error_resilience.c Tue Dec 30 15:29:38 2003 +0000 +++ b/error_resilience.c Tue Dec 30 16:07:57 2003 +0000 @@ -582,8 +582,8 @@ uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize; uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize; - is_intra_likely += s->dsp.pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize); - is_intra_likely -= s->dsp.pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize); + is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16); + is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16); }else{ if(IS_INTRA(s->current_picture.mb_type[mb_xy])) is_intra_likely++; diff -r 027545a2fdbe -r dea5b2946999 h263.c --- a/h263.c Tue Dec 30 15:29:38 2003 +0000 +++ b/h263.c Tue Dec 30 16:07:57 2003 +0000 @@ -479,9 +479,9 @@ for(i=1; imb_num; i++){ int mb_xy= s->mb_index2xy[i]; - if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&MB_TYPE_INTER4V)){ - s->mb_type[mb_xy]&= ~MB_TYPE_INTER4V; - s->mb_type[mb_xy]|= MB_TYPE_INTER; + if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){ + s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V; + s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER; } } @@ -508,9 +508,9 @@ for(i=1; imb_num; i++){ int mb_xy= s->mb_index2xy[i]; - if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&MB_TYPE_DIRECT)){ - s->mb_type[mb_xy]&= ~MB_TYPE_DIRECT; - s->mb_type[mb_xy]|= MB_TYPE_BIDIR; + if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){ + s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT; + s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR; } } } @@ -523,7 +523,7 @@ */ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){ const int mb_index= s->mb_x + s->mb_y*s->mb_stride; - const int colocated_mb_type= s->next_picture.mb_type[mb_index]; //FIXME or next? + const int colocated_mb_type= s->next_picture.mb_type[mb_index]; int xy= s->block_index[0]; uint16_t time_pp= s->pp_time; uint16_t time_pb= s->pb_time; @@ -547,18 +547,18 @@ s->mv_type = MV_TYPE_FIELD; for(i=0; i<2; i++){ if(s->top_field_first){ - time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i; - time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i; + time_pp= s->pp_field_time - s->p_field_select_table[i][mb_index] + i; + time_pb= s->pb_field_time - s->p_field_select_table[i][mb_index] + i; }else{ - time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i; - time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i; + time_pp= s->pp_field_time + s->p_field_select_table[i][mb_index] - i; + time_pb= s->pb_field_time + s->p_field_select_table[i][mb_index] - i; } - s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx; - s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my; - s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0] - : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp; - s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] - : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp; + s->mv[0][i][0] = s->p_field_mv_table[i][0][mb_index][0]*time_pb/time_pp + mx; + s->mv[0][i][1] = s->p_field_mv_table[i][0][mb_index][1]*time_pb/time_pp + my; + s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->p_field_mv_table[i][0][mb_index][0] + : s->p_field_mv_table[i][0][mb_index][0]*(time_pb - time_pp)/time_pp; + s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1] + : s->p_field_mv_table[i][0][mb_index][1]*(time_pb - time_pp)/time_pp; } return MB_TYPE_DIRECT2 | MB_TYPE_16x8 | MB_TYPE_L0L1 | MB_TYPE_INTERLACED; }else{ @@ -598,9 +598,9 @@ motion_y = s->mv[0][0][1] + s->mv[0][1][1]; motion_x = (motion_x>>1) | (motion_x&1); for(i=0; i<2; i++){ - s->field_mv_table[mb_xy][i][0]= s->mv[0][i][0]; - s->field_mv_table[mb_xy][i][1]= s->mv[0][i][1]; - s->field_select_table[mb_xy][i]= s->field_select[0][i]; + s->p_field_mv_table[i][0][mb_xy][0]= s->mv[0][i][0]; + s->p_field_mv_table[i][0][mb_xy][1]= s->mv[0][i][1]; + s->p_field_select_table[i][mb_xy]= s->field_select[0][i]; } } @@ -744,12 +744,14 @@ if(s->pict_type==B_TYPE){ static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */ int mb_type= mb_type_table[s->mv_dir]; - + if(s->mb_x==0){ - s->last_mv[0][0][0]= - s->last_mv[0][0][1]= - s->last_mv[1][0][0]= - s->last_mv[1][0][1]= 0; + for(i=0; i<2; i++){ + s->last_mv[i][0][0]= + s->last_mv[i][0][1]= + s->last_mv[i][1][0]= + s->last_mv[i][1][1]= 0; + } } assert(s->dquant>=-2 && s->dquant<=2); @@ -803,50 +805,64 @@ if(cbp) put_bits(&s->pb, 1, s->interlaced_dct); if(mb_type) // not diect mode - put_bits(&s->pb, 1, 0); // no interlaced ME yet + put_bits(&s->pb, 1, s->mv_type == MV_TYPE_FIELD); } if(interleaved_stats){ s->misc_bits+= get_bits_diff(s); } - switch(mb_type) - { - case 0: /* direct */ + if(mb_type == 0){ + assert(s->mv_dir & MV_DIRECT); h263_encode_motion(s, motion_x, 1); h263_encode_motion(s, motion_y, 1); s->b_count++; s->f_count++; - break; - case 1: /* bidir */ - h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); - h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); - h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); - h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); - s->last_mv[0][0][0]= s->mv[0][0][0]; - s->last_mv[0][0][1]= s->mv[0][0][1]; - s->last_mv[1][0][0]= s->mv[1][0][0]; - s->last_mv[1][0][1]= s->mv[1][0][1]; - s->b_count++; - s->f_count++; - break; - case 2: /* backward */ - h263_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code); - h263_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code); - s->last_mv[1][0][0]= motion_x; - s->last_mv[1][0][1]= motion_y; - s->b_count++; - break; - case 3: /* forward */ - h263_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); - h263_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); - s->last_mv[0][0][0]= motion_x; - s->last_mv[0][0][1]= motion_y; - s->f_count++; - break; - default: - av_log(s->avctx, AV_LOG_ERROR, "unknown mb type\n"); - return; + }else{ + assert(mb_type > 0 && mb_type < 4); + if(s->mv_type != MV_TYPE_FIELD){ + if(s->mv_dir & MV_DIR_FORWARD){ + h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); + h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); + s->last_mv[0][0][0]= s->last_mv[0][1][0]= s->mv[0][0][0]; + s->last_mv[0][0][1]= s->last_mv[0][1][1]= s->mv[0][0][1]; + s->f_count++; + } + if(s->mv_dir & MV_DIR_BACKWARD){ + h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); + h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); + s->last_mv[1][0][0]= s->last_mv[1][1][0]= s->mv[1][0][0]; + s->last_mv[1][0][1]= s->last_mv[1][1][1]= s->mv[1][0][1]; + s->b_count++; + } + }else{ + if(s->mv_dir & MV_DIR_FORWARD){ + put_bits(&s->pb, 1, s->field_select[0][0]); + put_bits(&s->pb, 1, s->field_select[0][1]); + } + if(s->mv_dir & MV_DIR_BACKWARD){ + put_bits(&s->pb, 1, s->field_select[1][0]); + put_bits(&s->pb, 1, s->field_select[1][1]); + } + if(s->mv_dir & MV_DIR_FORWARD){ + for(i=0; i<2; i++){ + h263_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0] , s->f_code); + h263_encode_motion(s, s->mv[0][i][1] - s->last_mv[0][i][1]/2, s->f_code); + s->last_mv[0][i][0]= s->mv[0][i][0]; + s->last_mv[0][i][1]= s->mv[0][i][1]*2; + } + s->f_count++; + } + if(s->mv_dir & MV_DIR_BACKWARD){ + for(i=0; i<2; i++){ + h263_encode_motion(s, s->mv[1][i][0] - s->last_mv[1][i][0] , s->b_code); + h263_encode_motion(s, s->mv[1][i][1] - s->last_mv[1][i][1]/2, s->b_code); + s->last_mv[1][i][0]= s->mv[1][i][0]; + s->last_mv[1][i][1]= s->mv[1][i][1]*2; + } + s->b_count++; + } + } } if(interleaved_stats){ @@ -861,6 +877,7 @@ if(interleaved_stats){ s->p_tex_bits+= get_bits_diff(s); } + }else{ /* s->pict_type==B_TYPE */ cbp= get_p_cbp(s, block, motion_x, motion_y); @@ -889,7 +906,7 @@ if(pic==NULL || pic->pict_type!=B_TYPE) break; b_pic= pic->data[0] + offset + 16; //FIXME +16 - diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize); + diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16); if(diff>s->qscale*70){ //FIXME check that 70 is optimal s->mb_skiped=0; break; @@ -929,7 +946,7 @@ if(!s->progressive_sequence){ if(cbp) put_bits(pb2, 1, s->interlaced_dct); - put_bits(pb2, 1, 0); // no interlaced ME yet + put_bits(pb2, 1, 0); } if(interleaved_stats){ @@ -941,7 +958,38 @@ h263_encode_motion(s, motion_x - pred_x, s->f_code); h263_encode_motion(s, motion_y - pred_y, s->f_code); + }else if(s->mv_type==MV_TYPE_FIELD){ + if(s->dquant) cbpc+= 8; + put_bits(&s->pb, + inter_MCBPC_bits[cbpc], + inter_MCBPC_code[cbpc]); + + put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(pb2, 2, dquant_code[s->dquant+2]); + + assert(!s->progressive_sequence); + if(cbp) + put_bits(pb2, 1, s->interlaced_dct); + put_bits(pb2, 1, 1); + + if(interleaved_stats){ + s->misc_bits+= get_bits_diff(s); + } + + /* motion vectors: 16x8 interlaced mode */ + h263_pred_motion(s, 0, &pred_x, &pred_y); + pred_y /=2; + + put_bits(&s->pb, 1, s->field_select[0][0]); + put_bits(&s->pb, 1, s->field_select[0][1]); + + h263_encode_motion(s, s->mv[0][0][0] - pred_x, s->f_code); + h263_encode_motion(s, s->mv[0][0][1] - pred_y, s->f_code); + h263_encode_motion(s, s->mv[0][1][0] - pred_x, s->f_code); + h263_encode_motion(s, s->mv[0][1][1] - pred_y, s->f_code); }else{ + assert(s->mv_type==MV_TYPE_8X8); put_bits(&s->pb, inter_MCBPC_bits[cbpc+16], inter_MCBPC_code[cbpc+16]); diff -r 027545a2fdbe -r dea5b2946999 h263data.h --- a/h263data.h Tue Dec 30 15:29:38 2003 +0000 +++ b/h263data.h Tue Dec 30 16:07:57 2003 +0000 @@ -61,8 +61,8 @@ MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_16x16, MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_QUANT | MB_TYPE_16x16, 0, //stuffing - MB_TYPE_INTRA | MB_TYPE_CBP, - MB_TYPE_INTRA | MB_TYPE_CBP | MB_TYPE_QUANT, + MB_TYPE_INTRA4x4 | MB_TYPE_CBP, + MB_TYPE_INTRA4x4 | MB_TYPE_CBP | MB_TYPE_QUANT, }; const uint8_t cbpc_b_tab[4][2] = { diff -r 027545a2fdbe -r dea5b2946999 i386/dsputil_mmx.c --- a/i386/dsputil_mmx.c Tue Dec 30 15:29:38 2003 +0000 +++ b/i386/dsputil_mmx.c Tue Dec 30 16:07:57 2003 +0000 @@ -687,10 +687,10 @@ return tmp; } -static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) { +static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { int tmp; asm volatile ( - "movl $16,%%ecx\n" + "movl %4,%%ecx\n" "pxor %%mm0,%%mm0\n" /* mm0 = 0 */ "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */ "1:\n" @@ -741,7 +741,9 @@ "psrlq $32, %%mm7\n" /* shift hi dword to lo */ "paddd %%mm7,%%mm1\n" "movd %%mm1,%2\n" - : "+r" (pix1), "+r" (pix2), "=r"(tmp) : "r" (line_size) : "%ecx"); + : "+r" (pix1), "+r" (pix2), "=r"(tmp) + : "r" (line_size) , "m" (h) + : "%ecx"); return tmp; } @@ -866,9 +868,11 @@ "movq "#c", "#o"+32(%1) \n\t"\ "movq "#d", "#o"+48(%1) \n\t"\ -static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride){ +static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ uint64_t temp[16] __align8; int sum=0; + + assert(h==8); diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride); @@ -951,9 +955,11 @@ return sum&0xFFFF; } -static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride){ +static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ uint64_t temp[16] __align8; int sum=0; + + assert(h==8); diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride); @@ -1037,8 +1043,8 @@ } -WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx) -WARPER88_1616(hadamard8_diff_mmx2, hadamard8_diff16_mmx2) +WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx) +WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2) #endif //CONFIG_ENCODERS #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d) diff -r 027545a2fdbe -r dea5b2946999 i386/motion_est_mmx.c --- a/i386/motion_est_mmx.c Tue Dec 30 15:29:38 2003 +0000 +++ b/i386/motion_est_mmx.c Tue Dec 30 16:07:57 2003 +0000 @@ -28,9 +28,9 @@ static __attribute__ ((aligned(8), unused)) uint64_t bone= 0x0101010101010101LL; -static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) +static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) { - int len= -(stride<pix_abs16x16 = pix_abs16x16_mmx; - c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; - c->pix_abs8x8 = pix_abs8x8_mmx; - c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; - c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; - c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx; + c->pix_abs[0][0] = sad16_mmx; + c->pix_abs[0][1] = sad16_x2_mmx; + c->pix_abs[0][2] = sad16_y2_mmx; + c->pix_abs[0][3] = sad16_xy2_mmx; + c->pix_abs[1][0] = sad8_mmx; + c->pix_abs[1][1] = sad8_x2_mmx; + c->pix_abs[1][2] = sad8_y2_mmx; + c->pix_abs[1][3] = sad8_xy2_mmx; - c->sad[0]= sad16x16_mmx; - c->sad[1]= sad8x8_mmx; + c->sad[0]= sad16_mmx; + c->sad[1]= sad8_mmx; } if (mm_flags & MM_MMXEXT) { - c->pix_abs16x16 = pix_abs16x16_mmx2; - c->pix_abs8x8 = pix_abs8x8_mmx2; + c->pix_abs[0][0] = sad16_mmx2; + c->pix_abs[1][0] = sad8_mmx2; - c->sad[0]= sad16x16_mmx2; - c->sad[1]= sad8x8_mmx2; + c->sad[0]= sad16_mmx2; + c->sad[1]= sad8_mmx2; if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2; - c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; - c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; - c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; + c->pix_abs[0][1] = sad16_x2_mmx2; + c->pix_abs[0][2] = sad16_y2_mmx2; + c->pix_abs[0][3] = sad16_xy2_mmx2; + c->pix_abs[1][1] = sad8_x2_mmx2; + c->pix_abs[1][2] = sad8_y2_mmx2; + c->pix_abs[1][3] = sad8_xy2_mmx2; } } } diff -r 027545a2fdbe -r dea5b2946999 motion_est.c --- a/motion_est.c Tue Dec 30 15:29:38 2003 +0000 +++ b/motion_est.c Tue Dec 30 16:07:57 2003 +0000 @@ -46,9 +46,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *picture, - int n, int size, uint8_t * const mv_penalty); + int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, + int size, int h, uint8_t * const mv_penalty); static inline int update_map_generation(MpegEncContext * s) { @@ -78,20 +78,21 @@ #define RENAME(a) simple_ ## a #define CMP(d, x, y, size)\ -d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride); +d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h); #define CMP_HPEL(d, dx, dy, x, y, size)\ {\ const int dxy= (dx) + 2*(dy);\ - hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\ - d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ + hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\ } + #define CMP_QPEL(d, dx, dy, x, y, size)\ {\ const int dxy= (dx) + 4*(dy);\ qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\ - d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\ } #include "motion_est_template.c" @@ -105,29 +106,29 @@ #define RENAME(a) simple_chroma_ ## a #define CMP(d, x, y, size)\ -d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\ +d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);\ if(chroma_cmp){\ int dxy= ((x)&1) + 2*((y)&1);\ int c= ((x)>>1) + ((y)>>1)*uvstride;\ \ - chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ - d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\ - chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ - d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\ + chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\ + d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride, h>>1);\ + chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\ + d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride, h>>1);\ } #define CMP_HPEL(d, dx, dy, x, y, size)\ {\ const int dxy= (dx) + 2*(dy);\ - hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\ - d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ + hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\ if(chroma_cmp_sub){\ int cxy= (dxy) | ((x)&1) | (2*((y)&1));\ int c= ((x)>>1) + ((y)>>1)*uvstride;\ - chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ - d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\ - chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ - d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\ }\ } @@ -135,7 +136,7 @@ {\ const int dxy= (dx) + 4*(dy);\ qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\ - d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\ if(chroma_cmp_sub){\ int cxy, c;\ int cx= (4*(x) + (dx))/2;\ @@ -144,10 +145,10 @@ cy= (cy>>1)|(cy&1);\ cxy= (cx&1) + 2*(cy&1);\ c= ((cx)>>1) + ((cy)>>1)*uvstride;\ - chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ - d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\ - chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ - d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\ }\ } @@ -178,7 +179,7 @@ \ uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\ hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\ - hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\ + hpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 8);\ }\ }else{\ int fx = s->me.direct_basis_mv[0][0] + hx;\ @@ -198,9 +199,9 @@ assert((by>>1) + 16*s->mb_y <= s->height);\ \ hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\ - hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\ + hpel_avg[0][bxy](s->me.scratchpad, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 16);\ }\ - d = cmp_func(s, s->me.scratchpad, src_y, stride);\ + d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\ }else\ d= 256*256*256*32; @@ -238,7 +239,7 @@ \ uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\ qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\ - qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\ + qpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>2) + (by>>2)*(stride), stride);\ }\ }else{\ int fx = s->me.direct_basis_mv[0][0] + qx;\ @@ -252,12 +253,12 @@ qpel_put[1][fxy](s->me.scratchpad + 8 , (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 , stride);\ qpel_put[1][fxy](s->me.scratchpad + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8*stride, stride);\ qpel_put[1][fxy](s->me.scratchpad + 8 + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 + 8*stride, stride);\ - qpel_avg[1][bxy](s->me.scratchpad , (ref2_y) + (bx>>2) + (by>>2)*(stride) , stride);\ - qpel_avg[1][bxy](s->me.scratchpad + 8 , (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8 , stride);\ - qpel_avg[1][bxy](s->me.scratchpad + 8*stride, (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8*stride, stride);\ - qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\ + qpel_avg[1][bxy](s->me.scratchpad , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) , stride);\ + qpel_avg[1][bxy](s->me.scratchpad + 8 , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 , stride);\ + qpel_avg[1][bxy](s->me.scratchpad + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8*stride, stride);\ + qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\ }\ - d = cmp_func(s, s->me.scratchpad, src_y, stride);\ + d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\ }else\ d= 256*256*256*32; @@ -277,7 +278,7 @@ #undef CMP__DIRECT -static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){ +static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ return 0; } @@ -285,44 +286,37 @@ DSPContext* c= &s->dsp; int i; - memset(cmp, 0, sizeof(void*)*11); - - switch(type&0xFF){ - case FF_CMP_SAD: - cmp[0]= c->sad[0]; - cmp[1]= c->sad[1]; - break; - case FF_CMP_SATD: - cmp[0]= c->hadamard8_diff[0]; - cmp[1]= c->hadamard8_diff[1]; - break; - case FF_CMP_SSE: - cmp[0]= c->sse[0]; - cmp[1]= c->sse[1]; - break; - case FF_CMP_DCT: - cmp[0]= c->dct_sad[0]; - cmp[1]= c->dct_sad[1]; - break; - case FF_CMP_PSNR: - cmp[0]= c->quant_psnr[0]; - cmp[1]= c->quant_psnr[1]; - break; - case FF_CMP_BIT: - cmp[0]= c->bit[0]; - cmp[1]= c->bit[1]; - break; - case FF_CMP_RD: - cmp[0]= c->rd[0]; - cmp[1]= c->rd[1]; - break; - case FF_CMP_ZERO: - for(i=0; i<7; i++){ + memset(cmp, 0, sizeof(void*)*5); + + for(i=0; i<4; i++){ + switch(type&0xFF){ + case FF_CMP_SAD: + cmp[i]= c->sad[i]; + break; + case FF_CMP_SATD: + cmp[i]= c->hadamard8_diff[i]; + break; + case FF_CMP_SSE: + cmp[i]= c->sse[i]; + break; + case FF_CMP_DCT: + cmp[i]= c->dct_sad[i]; + break; + case FF_CMP_PSNR: + cmp[i]= c->quant_psnr[i]; + break; + case FF_CMP_BIT: + cmp[i]= c->bit[i]; + break; + case FF_CMP_RD: + cmp[i]= c->rd[i]; + break; + case FF_CMP_ZERO: cmp[i]= zero_cmp; + break; + default: + av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n"); } - break; - default: - av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n"); } } @@ -362,7 +356,7 @@ else if( s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx-> me_cmp == FF_CMP_SAD && s->avctx-> mb_cmp == FF_CMP_SAD) - s->me.sub_motion_search= sad_hpel_motion_search; + s->me.sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles else s->me.sub_motion_search= simple_hpel_motion_search; } @@ -370,9 +364,11 @@ if(s->avctx->me_cmp&FF_CMP_CHROMA){ s->me.motion_search[0]= simple_chroma_epzs_motion_search; s->me.motion_search[1]= simple_chroma_epzs_motion_search4; + s->me.motion_search[4]= simple_chroma_epzs_motion_search2; }else{ s->me.motion_search[0]= simple_epzs_motion_search; s->me.motion_search[1]= simple_epzs_motion_search4; + s->me.motion_search[4]= simple_epzs_motion_search2; } if(s->avctx->me_pre_cmp&FF_CMP_CHROMA){ @@ -453,8 +449,8 @@ my = 0; for (y = y1; y <= y2; y++) { for (x = x1; x <= x2; x++) { - d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, - s->linesize); + d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, + s->linesize, 16); if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < @@ -518,7 +514,7 @@ do { for (y = y1; y <= y2; y += range) { for (x = x1; x <= x2; x += range) { - d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); + d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dmin = d; mx = x; @@ -598,7 +594,7 @@ lastx = x; for (x = x1; x <= x2; x += range) { - d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); + d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dminx = d; mx = x; @@ -607,7 +603,7 @@ x = lastx; for (y = y1; y <= y2; y += range) { - d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); + d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dminy = d; my = y; @@ -651,35 +647,25 @@ #define CHECK_SAD_HALF_MV(suffix, x, y) \ {\ - d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\ + d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\ d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\ COPY3_IF_LT(dminh, d, dx, x, dy, y)\ } static inline int sad_hpel_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *picture, - int n, int size, uint8_t * const mv_penalty) + int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, + int size, int h, uint8_t * const mv_penalty) { - uint8_t *ref_picture= picture->data[0]; uint32_t *score_map= s->me.score_map; const int penalty_factor= s->me.sub_penalty_factor; - int mx, my, xx, yy, dminh; + int mx, my, dminh; uint8_t *pix, *ptr; - op_pixels_abs_func pix_abs_x2; - op_pixels_abs_func pix_abs_y2; - op_pixels_abs_func pix_abs_xy2; - - if(size==0){ - pix_abs_x2 = s->dsp.pix_abs16x16_x2; - pix_abs_y2 = s->dsp.pix_abs16x16_y2; - pix_abs_xy2= s->dsp.pix_abs16x16_xy2; - }else{ - pix_abs_x2 = s->dsp.pix_abs8x8_x2; - pix_abs_y2 = s->dsp.pix_abs8x8_y2; - pix_abs_xy2= s->dsp.pix_abs8x8_xy2; - } + const int xmin= s->me.xmin; + const int ymin= s->me.ymin; + const int xmax= s->me.xmax; + const int ymax= s->me.ymax; if(s->me.skip){ // printf("S"); @@ -689,13 +675,11 @@ } // printf("N"); - xx = 16 * s->mb_x + 8*(n&1); - yy = 16 * s->mb_y + 8*(n>>1); - pix = s->new_picture.data[0] + (yy * s->linesize) + xx; + pix = src_data[0]; mx = *mx_ptr; my = *my_ptr; - ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx); + ptr = ref_data[0] + (my * stride) + mx; dminh = dmin; @@ -715,16 +699,16 @@ pen_x= pred_x + mx; pen_y= pred_y + my; - ptr-= s->linesize; + ptr-= stride; if(t<=b){ CHECK_SAD_HALF_MV(y2 , 0, -1) if(l<=r){ CHECK_SAD_HALF_MV(xy2, -1, -1) if(t+r<=b+l){ CHECK_SAD_HALF_MV(xy2, +1, -1) - ptr+= s->linesize; + ptr+= stride; }else{ - ptr+= s->linesize; + ptr+= stride; CHECK_SAD_HALF_MV(xy2, -1, +1) } CHECK_SAD_HALF_MV(x2 , -1, 0) @@ -732,9 +716,9 @@ CHECK_SAD_HALF_MV(xy2, +1, -1) if(t+l<=b+r){ CHECK_SAD_HALF_MV(xy2, -1, -1) - ptr+= s->linesize; + ptr+= stride; }else{ - ptr+= s->linesize; + ptr+= stride; CHECK_SAD_HALF_MV(xy2, +1, +1) } CHECK_SAD_HALF_MV(x2 , +1, 0) @@ -743,9 +727,9 @@ if(l<=r){ if(t+l<=b+r){ CHECK_SAD_HALF_MV(xy2, -1, -1) - ptr+= s->linesize; + ptr+= stride; }else{ - ptr+= s->linesize; + ptr+= stride; CHECK_SAD_HALF_MV(xy2, +1, +1) } CHECK_SAD_HALF_MV(x2 , -1, 0) @@ -753,9 +737,9 @@ }else{ if(t+r<=b+l){ CHECK_SAD_HALF_MV(xy2, +1, -1) - ptr+= s->linesize; + ptr+= stride; }else{ - ptr+= s->linesize; + ptr+= stride; CHECK_SAD_HALF_MV(xy2, -1, +1) } CHECK_SAD_HALF_MV(x2 , +1, 0) @@ -802,35 +786,41 @@ /** * get fullpel ME search limits. - * @param range the approximate search range for the old ME code, unused for EPZS and newer */ -static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymin, int *xmax, int *ymax) +static inline void get_limits(MpegEncContext *s, int x, int y) { - if(s->avctx->me_range) *range= s->avctx->me_range >> 1; - else *range= 16; - +/* + if(s->avctx->me_range) s->me.range= s->avctx->me_range >> 1; + else s->me.range= 16; +*/ if (s->unrestricted_mv) { - *xmin = -16; - *ymin = -16; - *xmax = s->mb_width*16; - *ymax = s->mb_height*16; + s->me.xmin = - x - 16; + s->me.ymin = - y - 16; + s->me.xmax = - x + s->mb_width *16; + s->me.ymax = - y + s->mb_height*16; } else { - *xmin = 0; - *ymin = 0; - *xmax = s->mb_width*16 - 16; - *ymax = s->mb_height*16 - 16; + s->me.xmin = - x; + s->me.ymin = - y; + s->me.xmax = - x + s->mb_width *16 - 16; + s->me.ymax = - y + s->mb_height*16 - 16; } - - //FIXME try to limit x/y min/max if me_range is set } -static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift) +static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift) { + const int size= 1; + const int h=8; int block; int P[10][2]; int dmin_sum=0, mx4_sum=0, my4_sum=0; uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; int same=1; + const int stride= s->linesize; + const int uvstride= s->uvlinesize; + const int xmin= s->me.xmin; + const int ymin= s->me.ymin; + const int xmax= s->me.xmax; + const int ymax= s->me.ymax; for(block=0; block<4; block++){ int mx4, my4; @@ -839,23 +829,23 @@ static const int off[4]= {2, 1, 1, -1}; const int mot_stride = s->block_wrap[0]; const int mot_xy = s->block_index[block]; -// const int block_x= (block&1); -// const int block_y= (block>>1); -#if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way - const int rel_xmin4= xmin; - const int rel_xmax4= xmax; - const int rel_ymin4= ymin; - const int rel_ymax4= ymax; -#else - const int rel_xmin4= xmin - block_x*8; - const int rel_xmax4= xmax - block_x*8 + 8; - const int rel_ymin4= ymin - block_y*8; - const int rel_ymax4= ymax - block_y*8 + 8; -#endif + const int block_x= (block&1); + const int block_y= (block>>1); + uint8_t *src_data[3]= { + s->new_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma? + s->new_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y), + s->new_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y) + }; + uint8_t *ref_data[3]= { + s->last_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma? + s->last_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y), + s->last_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y) + }; + P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0]; P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1]; - if(P_LEFT[0] > (rel_xmax4< (s->me.xmax<me.xmax<mb_y == 0 && block<2) { @@ -866,10 +856,10 @@ P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1]; P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0]; P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1]; - if(P_TOP[1] > (rel_ymax4< (rel_xmax4< (rel_ymax4< (s->me.ymax<me.ymax<me.xmin<me.xmin< (s->me.xmax<me.xmax< (s->me.ymax<me.ymax<me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, - &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty); + dmin4 = s->me.motion_search[1](s, &mx4, &my4, P, pred_x4, pred_y4, + src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty); - dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, - pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty); + dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, + pred_x4, pred_y4, src_data, ref_data, stride, uvstride, size, h, mv_penalty); if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){ int dxy; - const int offset= ((block&1) + (block>>1)*s->linesize)*8; + const int offset= ((block&1) + (block>>1)*stride)*8; uint8_t *dest_y = s->me.scratchpad + offset; if(s->quarter_sample){ - uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>2)) + (s->mb_y*16 + (my4>>2))*s->linesize + offset; + uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride + offset; dxy = ((my4 & 3) << 2) | (mx4 & 3); if(s->no_rounding) s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , s->linesize); else - s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , s->linesize); + s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride); }else{ - uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>1)) + (s->mb_y*16 + (my4>>1))*s->linesize + offset; + uint8_t *ref= ref_data[0] + (mx4>>1) + (my4>>1)*stride + offset; dxy = ((my4 & 1) << 1) | (mx4 & 1); if(s->no_rounding) - s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , s->linesize, 8); + s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , stride, h); else - s->dsp.put_pixels_tab [1][dxy](dest_y , ref , s->linesize, 8); + s->dsp.put_pixels_tab [1][dxy](dest_y , ref , stride, h); } dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor; }else @@ -937,7 +927,7 @@ return INT_MAX; if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){ - dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*s->linesize, s->me.scratchpad, s->linesize); + dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, s->me.scratchpad, stride, 16); } if(s->avctx->mb_cmp&FF_CMP_CHROMA){ @@ -959,8 +949,8 @@ s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8); } - dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize); - dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize); + dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize, 8); + dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize, 8); } switch(s->avctx->mb_cmp&0xFF){ @@ -973,13 +963,134 @@ } } +static int interlaced_search(MpegEncContext *s, uint8_t *frame_src_data[3], uint8_t *frame_ref_data[3], + int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int f_code, int mx, int my) +{ + const int size=0; + const int h=8; + int block; + int P[10][2]; + uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV; + int same=1; + const int stride= 2*s->linesize; + const int uvstride= 2*s->uvlinesize; + int dmin_sum= 0; + const int mot_stride= s->mb_stride; + const int xy= s->mb_x + s->mb_y*mot_stride; + + s->me.ymin>>=1; + s->me.ymax>>=1; + + for(block=0; block<2; block++){ + int field_select; + int best_dmin= INT_MAX; + int best_field= -1; + + uint8_t *src_data[3]= { + frame_src_data[0] + s-> linesize*block, + frame_src_data[1] + s->uvlinesize*block, + frame_src_data[2] + s->uvlinesize*block + }; + + for(field_select=0; field_select<2; field_select++){ + int dmin, mx_i, my_i, pred_x, pred_y; + uint8_t *ref_data[3]= { + frame_ref_data[0] + s-> linesize*field_select, + frame_ref_data[1] + s->uvlinesize*field_select, + frame_ref_data[2] + s->uvlinesize*field_select + }; + int16_t (*mv_table)[2]= mv_tables[block][field_select]; + + P_LEFT[0] = mv_table[xy - 1][0]; + P_LEFT[1] = mv_table[xy - 1][1]; + if(P_LEFT[0] > (s->me.xmax<<1)) P_LEFT[0] = (s->me.xmax<<1); + + pred_x= P_LEFT[0]; + pred_y= P_LEFT[1]; + + if(s->mb_y){ + P_TOP[0] = mv_table[xy - mot_stride][0]; + P_TOP[1] = mv_table[xy - mot_stride][1]; + P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0]; + P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1]; + if(P_TOP[1] > (s->me.ymax<<1)) P_TOP[1] = (s->me.ymax<<1); + if(P_TOPRIGHT[0] < (s->me.xmin<<1)) P_TOPRIGHT[0]= (s->me.xmin<<1); + if(P_TOPRIGHT[0] > (s->me.xmax<<1)) P_TOPRIGHT[0]= (s->me.xmax<<1); + if(P_TOPRIGHT[1] > (s->me.ymax<<1)) P_TOPRIGHT[1]= (s->me.ymax<<1); + + P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); + P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); + } + P_MV1[0]= mx; //FIXME not correct if block != field_select + P_MV1[1]= my / 2; + + dmin = s->me.motion_search[4](s, &mx_i, &my_i, P, pred_x, pred_y, + src_data, ref_data, stride, uvstride, mv_table, (1<<16)>>1, mv_penalty); + + dmin= s->me.sub_motion_search(s, &mx_i, &my_i, dmin, + pred_x, pred_y, src_data, ref_data, stride, uvstride, size, h, mv_penalty); + + mv_table[xy][0]= mx_i; + mv_table[xy][1]= my_i; + + if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){ + int dxy; + + //FIXME chroma ME + uint8_t *ref= ref_data[0] + (mx_i>>1) + (my_i>>1)*stride; + dxy = ((my_i & 1) << 1) | (mx_i & 1); + + if(s->no_rounding){ + s->dsp.put_no_rnd_pixels_tab[size][dxy](s->me.scratchpad, ref , stride, h); + }else{ + s->dsp.put_pixels_tab [size][dxy](s->me.scratchpad, ref , stride, h); + } + dmin= s->dsp.mb_cmp[size](s, src_data[0], s->me.scratchpad, stride, h); + dmin+= (mv_penalty[mx_i-pred_x] + mv_penalty[my_i-pred_y] + 1)*s->me.mb_penalty_factor; + }else + dmin+= s->me.mb_penalty_factor; //field_select bits + + dmin += field_select != block; //slightly prefer same field + + if(dmin < best_dmin){ + best_dmin= dmin; + best_field= field_select; + } + } + { + int16_t (*mv_table)[2]= mv_tables[block][best_field]; + + if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all + if(mv_table[xy][1]&1) same=0; + if(mv_table[xy][1]*2 != my) same=0; + if(best_field != block) same=0; + } + + field_select_tables[block][xy]= best_field; + dmin_sum += best_dmin; + } + + s->me.ymin<<=1; + s->me.ymax<<=1; + + if(same) + return INT_MAX; + + switch(s->avctx->mb_cmp&0xFF){ + /*case FF_CMP_SSE: + return dmin_sum+ 32*s->qscale*s->qscale;*/ + case FF_CMP_RD: + return dmin_sum; + default: + return dmin_sum+ 11*s->me.mb_penalty_factor; + } +} + void ff_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y) { uint8_t *pix, *ppix; - int sum, varc, vard, mx, my, range, dmin, xx, yy; - int xmin, ymin, xmax, ymax; - int rel_xmin, rel_ymin, rel_xmax, rel_ymax; + int sum, varc, vard, mx, my, dmin, xx, yy; int pred_x=0, pred_y=0; int P[10][2]; const int shift= 1+s->quarter_sample; @@ -987,18 +1098,26 @@ uint8_t *ref_picture= s->last_picture.data[0]; Picture * const pic= &s->current_picture; uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; - + const int stride= s->linesize; + const int uvstride= s->uvlinesize; + uint8_t *src_data[3]= { + s->new_picture.data[0] + 16*(mb_x + stride*mb_y), + s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y), + s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y) + }; + uint8_t *ref_data[3]= { + s->last_picture.data[0] + 16*(mb_x + stride*mb_y), + s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y), + s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y) + }; + assert(s->quarter_sample==0 || s->quarter_sample==1); s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); - get_limits(s, &range, &xmin, &ymin, &xmax, &ymax); - rel_xmin= xmin - mb_x*16; - rel_xmax= xmax - mb_x*16; - rel_ymin= ymin - mb_y*16; - rel_ymax= ymax - mb_y*16; + get_limits(s, 16*mb_x, 16*mb_y); s->me.skip=0; switch(s->me_method) { @@ -1009,21 +1128,23 @@ my-= mb_y*16; dmin = 0; break; +#if 0 case ME_FULL: - dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture); + dmin = full_motion_search(s, &mx, &my, range, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; case ME_LOG: - dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; case ME_PHODS: - dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; +#endif case ME_X1: case ME_EPZS: { @@ -1033,16 +1154,16 @@ P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0]; P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1]; - if(P_LEFT[0] > (rel_xmax< (s->me.xmax<me.xmax<current_picture.motion_val[0][mot_xy - mot_stride ][0]; P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1]; P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0]; P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1]; - if(P_TOP[1] > (rel_ymax< (rel_ymax< (s->me.ymax<me.ymax<me.xmin<me.xmin< (s->me.ymax<me.ymax<me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty); + dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y, + src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty); break; } @@ -1070,14 +1191,14 @@ xx = mb_x * 16; yy = mb_y * 16; - pix = s->new_picture.data[0] + (yy * s->linesize) + xx; + pix = src_data[0]; /* At this point (mx,my) are full-pell and the relative displacement */ - ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx); + ppix = ref_data[0] + (my * s->linesize) + mx; sum = s->dsp.pix_sum(pix, s->linesize); varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; - vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8; + vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16)+128)>>8; //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); pic->mb_var [s->mb_stride * mb_y + mb_x] = varc; @@ -1099,47 +1220,59 @@ s->scene_change_score+= s->qscale; if (vard*2 + 200 > varc) - mb_type|= MB_TYPE_INTRA; + mb_type|= CANDIDATE_MB_TYPE_INTRA; if (varc*2 + 200 > vard){ - mb_type|= MB_TYPE_INTER; - s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); + mb_type|= CANDIDATE_MB_TYPE_INTER; + s->me.sub_motion_search(s, &mx, &my, dmin, + pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); if(s->flags&CODEC_FLAG_MV0) if(mx || my) - mb_type |= MB_TYPE_SKIPED; //FIXME check difference + mb_type |= CANDIDATE_MB_TYPE_SKIPED; //FIXME check difference }else{ mx <<=shift; my <<=shift; } if((s->flags&CODEC_FLAG_4MV) && !s->me.skip && varc>50 && vard>10){ - if(h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift) < INT_MAX) - mb_type|=MB_TYPE_INTER4V; + if(h263_mv4_search(s, mx, my, shift) < INT_MAX) + mb_type|=CANDIDATE_MB_TYPE_INTER4V; set_p_mv_tables(s, mx, my, 0); }else set_p_mv_tables(s, mx, my, 1); + if((s->flags&CODEC_FLAG_INTERLACED_ME) + && !s->me.skip){ //FIXME varc/d checks + if(interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my) < INT_MAX) + mb_type |= CANDIDATE_MB_TYPE_INTER_I; + } }else{ int intra_score, i; - mb_type= MB_TYPE_INTER; + mb_type= CANDIDATE_MB_TYPE_INTER; - dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); - + dmin= s->me.sub_motion_search(s, &mx, &my, dmin, + pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) - dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, &s->last_picture, mv_penalty); + dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty); if((s->flags&CODEC_FLAG_4MV) && !s->me.skip && varc>50 && vard>10){ - int dmin4= h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); + int dmin4= h263_mv4_search(s, mx, my, shift); if(dmin4 < dmin){ - mb_type= MB_TYPE_INTER4V; + mb_type= CANDIDATE_MB_TYPE_INTER4V; dmin=dmin4; } } + if((s->flags&CODEC_FLAG_INTERLACED_ME) + && !s->me.skip){ //FIXME varc/d checks + int dmin_i= interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my); + if(dmin_i < dmin){ + mb_type = CANDIDATE_MB_TYPE_INTER_I; + dmin= dmin_i; + } + } // pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; - set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V); + set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V); /* get intra luma score */ if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){ @@ -1155,7 +1288,7 @@ *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean; } - intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize); + intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize, 16); } #if 0 //FIXME /* get chroma score */ @@ -1184,8 +1317,8 @@ intra_score += s->me.mb_penalty_factor*16; if(intra_score < dmin){ - mb_type= MB_TYPE_INTRA; - s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= MB_TYPE_INTRA; //FIXME cleanup + mb_type= CANDIDATE_MB_TYPE_INTRA; + s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup }else s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0; @@ -1202,30 +1335,36 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y) { - int mx, my, range, dmin; - int xmin, ymin, xmax, ymax; - int rel_xmin, rel_ymin, rel_xmax, rel_ymax; + int mx, my, dmin; int pred_x=0, pred_y=0; int P[10][2]; const int shift= 1+s->quarter_sample; uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; const int xy= mb_x + mb_y*s->mb_stride; + const int stride= s->linesize; + const int uvstride= s->uvlinesize; + uint8_t *src_data[3]= { + s->new_picture.data[0] + 16*(mb_x + stride*mb_y), + s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y), + s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y) + }; + uint8_t *ref_data[3]= { + s->last_picture.data[0] + 16*(mb_x + stride*mb_y), + s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y), + s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y) + }; assert(s->quarter_sample==0 || s->quarter_sample==1); s->me.pre_penalty_factor = get_penalty_factor(s, s->avctx->me_pre_cmp); - get_limits(s, &range, &xmin, &ymin, &xmax, &ymax); - rel_xmin= xmin - mb_x*16; - rel_xmax= xmax - mb_x*16; - rel_ymin= ymin - mb_y*16; - rel_ymax= ymax - mb_y*16; + get_limits(s, 16*mb_x, 16*mb_y); s->me.skip=0; P_LEFT[0] = s->p_mv_table[xy + 1][0]; P_LEFT[1] = s->p_mv_table[xy + 1][1]; - if(P_LEFT[0] < (rel_xmin<me.xmin<me.xmin<mb_height-1) { @@ -1238,9 +1377,9 @@ P_TOP[1] = s->p_mv_table[xy + s->mb_stride ][1]; P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0]; P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1]; - if(P_TOP[1] < (rel_ymin< (rel_xmax<me.ymin<me.ymin< (s->me.xmax<me.xmax<me.ymin<me.ymin<me.pre_motion_search(s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty); + dmin = s->me.pre_motion_search(s, &mx, &my, P, pred_x, pred_y, + src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty); s->p_mv_table[xy][0] = mx<p_mv_table[xy][1] = my<quarter_sample; const int mot_stride = s->mb_stride; const int mot_xy = mb_y*mot_stride + mb_x; - uint8_t * const ref_picture= picture->data[0]; + uint8_t * const ref_picture= ref_data[0] - 16*s->mb_x - 16*s->mb_y*s->linesize; //FIXME ugly uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV; int mv_scale; @@ -1276,11 +1414,7 @@ s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); - get_limits(s, &range, &xmin, &ymin, &xmax, &ymax); - rel_xmin= xmin - mb_x*16; - rel_xmax= xmax - mb_x*16; - rel_ymin= ymin - mb_y*16; - rel_ymax= ymax - mb_y*16; + get_limits(s, 16*mb_x, 16*mb_y); switch(s->me_method) { case ME_ZERO: @@ -1290,28 +1424,30 @@ mx-= mb_x*16; my-= mb_y*16; break; +#if 0 case ME_FULL: - dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture); + dmin = full_motion_search(s, &mx, &my, range, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; case ME_LOG: - dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; case ME_PHODS: - dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; +#endif case ME_X1: case ME_EPZS: { P_LEFT[0] = mv_table[mot_xy - 1][0]; P_LEFT[1] = mv_table[mot_xy - 1][1]; - if(P_LEFT[0] > (rel_xmax< (s->me.xmax<me.xmax< (rel_ymax< (rel_ymax< (s->me.ymax<me.ymax<me.xmin<me.xmin< (s->me.ymax<me.ymax<pb_time - s->pp_time)<<16) / (s->pp_time<me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - picture, s->p_mv_table, mv_scale, mv_penalty); + dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y, + src_data, ref_data, stride, uvstride, s->p_mv_table, mv_scale, mv_penalty); break; } - dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, picture, 0, 0, mv_penalty); + dmin= s->me.sub_motion_search(s, &mx, &my, dmin, + pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) - dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, picture, mv_penalty); + dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty); //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; @@ -1356,16 +1492,18 @@ return dmin; } -static inline int check_bidir_mv(MpegEncContext * s, - int mb_x, int mb_y, +static inline int check_bidir_mv(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6], + int stride, int uvstride, int motion_fx, int motion_fy, int motion_bx, int motion_by, int pred_fx, int pred_fy, - int pred_bx, int pred_by) + int pred_bx, int pred_by, + int size, int h) { //FIXME optimize? //FIXME move into template? //FIXME better f_code prediction (max mv & distance) + //FIXME pointers uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame uint8_t *dest_y = s->me.scratchpad; uint8_t *ptr; @@ -1375,45 +1513,37 @@ if(s->quarter_sample){ dxy = ((motion_fy & 3) << 2) | (motion_fx & 3); - src_x = mb_x * 16 + (motion_fx >> 2); - src_y = mb_y * 16 + (motion_fy >> 2); - assert(src_x >=-16 && src_x<=s->h_edge_pos); - assert(src_y >=-16 && src_y<=s->v_edge_pos); + src_x = motion_fx >> 2; + src_y = motion_fy >> 2; - ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize); + ptr = ref_data[0] + (src_y * stride) + src_x; + s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , stride); dxy = ((motion_by & 3) << 2) | (motion_bx & 3); - src_x = mb_x * 16 + (motion_bx >> 2); - src_y = mb_y * 16 + (motion_by >> 2); - assert(src_x >=-16 && src_x<=s->h_edge_pos); - assert(src_y >=-16 && src_y<=s->v_edge_pos); + src_x = motion_bx >> 2; + src_y = motion_by >> 2; - ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize); + ptr = ref_data[3] + (src_y * stride) + src_x; + s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y , ptr , stride); }else{ dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); - src_x = mb_x * 16 + (motion_fx >> 1); - src_y = mb_y * 16 + (motion_fy >> 1); - assert(src_x >=-16 && src_x<=s->h_edge_pos); - assert(src_y >=-16 && src_y<=s->v_edge_pos); + src_x = motion_fx >> 1; + src_y = motion_fy >> 1; - ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); + ptr = ref_data[0] + (src_y * stride) + src_x; + s->dsp.put_pixels_tab[size][dxy](dest_y , ptr , stride, h); dxy = ((motion_by & 1) << 1) | (motion_bx & 1); - src_x = mb_x * 16 + (motion_bx >> 1); - src_y = mb_y * 16 + (motion_by >> 1); - assert(src_x >=-16 && src_x<=s->h_edge_pos); - assert(src_y >=-16 && src_y<=s->v_edge_pos); + src_x = motion_bx >> 1; + src_y = motion_by >> 1; - ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); + ptr = ref_data[3] + (src_y * stride) + src_x; + s->dsp.avg_pixels_tab[size][dxy](dest_y , ptr , stride, h); } fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor - + s->dsp.mb_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); + + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic if(s->avctx->mb_cmp&FF_CMP_CHROMA){ } @@ -1423,7 +1553,8 @@ } /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/ -static inline int bidir_refine(MpegEncContext * s, +static inline int bidir_refine(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6], + int stride, int uvstride, int mb_x, int mb_y) { const int mot_stride = s->mb_stride; @@ -1440,16 +1571,18 @@ //FIXME do refinement and add flag - fbmin= check_bidir_mv(s, mb_x, mb_y, + fbmin= check_bidir_mv(s, src_data, ref_data, stride, uvstride, motion_fx, motion_fy, motion_bx, motion_by, pred_fx, pred_fy, - pred_bx, pred_by); + pred_bx, pred_by, + 0, 16); return fbmin; } -static inline int direct_search(MpegEncContext * s, +static inline int direct_search(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6], + int stride, int uvstride, int mb_x, int mb_y) { int P[10][2]; @@ -1508,6 +1641,11 @@ return 256*256*256*64; } + + s->me.xmin= xmin; + s->me.ymin= ymin; + s->me.xmax= xmax; + s->me.ymax= ymax; P_LEFT[0] = clip(mv_table[mot_xy - 1][0], xmin<flags&CODEC_FLAG_QPEL){ - dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, - &s->last_picture, mv_table, 1<<14, mv_penalty); - dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, - 0, 0, &s->last_picture, 0, 0, mv_penalty); + dmin = simple_direct_qpel_epzs_motion_search(s, &mx, &my, P, 0, 0, + src_data, ref_data, stride, uvstride, mv_table, 1<<14, mv_penalty); + dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, + 0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) - dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty); + dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty); }else{ - dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, - &s->last_picture, mv_table, 1<<15, mv_penalty); - dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, - 0, 0, &s->last_picture, 0, 0, mv_penalty); + dmin = simple_direct_hpel_epzs_motion_search(s, &mx, &my, P, 0, 0, + src_data, ref_data, stride, uvstride, mv_table, 1<<15, mv_penalty); + dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, + 0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) - dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty); + dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty); } + + get_limits(s, 16*mb_x, 16*mb_y); //restore s->me.?min/max, maybe not needed s->b_direct_mv_table[mot_xy][0]= mx; s->b_direct_mv_table[mot_xy][1]= my; @@ -1551,40 +1691,80 @@ int mb_x, int mb_y) { const int penalty_factor= s->me.mb_penalty_factor; - int fmin, bmin, dmin, fbmin; + int fmin, bmin, dmin, fbmin, bimin, fimin; int type=0; + const int stride= s->linesize; + const int uvstride= s->uvlinesize; + uint8_t *src_data[3]= { + s->new_picture.data[0] + 16*(s->mb_x + stride*s->mb_y), + s->new_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y), + s->new_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y) + }; + uint8_t *ref_data[6]= { + s->last_picture.data[0] + 16*(s->mb_x + stride*s->mb_y), + s->last_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y), + s->last_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y), + s->next_picture.data[0] + 16*(s->mb_x + stride*s->mb_y), + s->next_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y), + s->next_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y) + }; s->me.skip=0; if (s->codec_id == CODEC_ID_MPEG4) - dmin= direct_search(s, mb_x, mb_y); + dmin= direct_search(s, src_data, ref_data, stride, uvstride, mb_x, mb_y); else dmin= INT_MAX; - +//FIXME penalty stuff for non mpeg4 s->me.skip=0; - fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code) + 3*penalty_factor; + fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, src_data, + ref_data, stride, uvstride, s->f_code) + 3*penalty_factor; s->me.skip=0; - bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) + 2*penalty_factor; + bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, src_data, + ref_data+3, stride, uvstride, s->b_code) + 2*penalty_factor; //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); s->me.skip=0; - fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor; + fbmin= bidir_refine(s, src_data, ref_data, stride, uvstride, mb_x, mb_y) + penalty_factor; //printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin); + + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + const int xy = mb_y*s->mb_stride + mb_x; + +//FIXME mb type penalty + s->me.skip=0; + fimin= interlaced_search(s, src_data, ref_data , + s->b_field_mv_table[0], s->b_field_select_table[0], s->f_code, + s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); + bimin= interlaced_search(s, src_data, ref_data+3, + s->b_field_mv_table[1], s->b_field_select_table[1], s->b_code, + s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1]); + }else + fimin= bimin= INT_MAX; + { int score= fmin; - type = MB_TYPE_FORWARD; + type = CANDIDATE_MB_TYPE_FORWARD; if (dmin <= score){ score = dmin; - type = MB_TYPE_DIRECT; + type = CANDIDATE_MB_TYPE_DIRECT; } if(bmin>16; @@ -1593,8 +1773,16 @@ } if(s->avctx->mb_decision > FF_MB_DECISION_SIMPLE){ - type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter - if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB + type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT; + if(fimin < INT_MAX) + type |= CANDIDATE_MB_TYPE_FORWARD_I; + if(bimin < INT_MAX) + type |= CANDIDATE_MB_TYPE_BACKWARD_I; + if(fimin < INT_MAX && bimin < INT_MAX){ + type |= CANDIDATE_MB_TYPE_BIDIR_I; + } + //FIXME something smarter + if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB } s->mb_type[mb_y*s->mb_stride + mb_x]= type; @@ -1661,24 +1849,6 @@ if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range; - /* clip / convert to intra 16x16 type MVs */ - for(y=0; ymb_height; y++){ - int x; - int xy= y*s->mb_stride; - for(x=0; xmb_width; x++){ - if(s->mb_type[xy]&MB_TYPE_INTER){ - if( s->p_mv_table[xy][0] >=range || s->p_mv_table[xy][0] <-range - || s->p_mv_table[xy][1] >=range || s->p_mv_table[xy][1] <-range){ - s->mb_type[xy] &= ~MB_TYPE_INTER; - s->mb_type[xy] |= MB_TYPE_INTRA; - s->current_picture.mb_type[xy]= MB_TYPE_INTRA; - s->p_mv_table[xy][0] = 0; - s->p_mv_table[xy][1] = 0; - } - } - xy++; - } - } //printf("%d no:%d %d//\n", clip, noclip, f_code); if(s->flags&CODEC_FLAG_4MV){ const int wrap= 2+ s->mb_width*2; @@ -1690,7 +1860,7 @@ int x; for(x=0; xmb_width; x++){ - if(s->mb_type[i]&MB_TYPE_INTER4V){ + if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){ int block; for(block=0; block<4; block++){ int off= (block& 1) + (block>>1)*wrap; @@ -1699,9 +1869,9 @@ if( mx >=range || mx <-range || my >=range || my <-range){ - s->mb_type[i] &= ~MB_TYPE_INTER4V; - s->mb_type[i] |= MB_TYPE_INTRA; - s->current_picture.mb_type[i]= MB_TYPE_INTRA; + s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V; + s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA; + s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA; } } } @@ -1712,30 +1882,45 @@ } } -void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type) +/** + * + * @param truncate 1 for truncation, 0 for using intra + */ +void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select, + int16_t (*mv_table)[2], int f_code, int type, int truncate) { - int y; + int y, h_range, v_range; // RAL: 8 in MPEG-1, 16 in MPEG-4 int range = (((s->out_format == FMT_MPEG1) ? 8 : 16) << f_code); - + + if(s->msmpeg4_version) range= 16; if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range; + h_range= range; + v_range= field_select_table ? range>>1 : range; + /* clip / convert to intra 16x16 type MVs */ for(y=0; ymb_height; y++){ int x; int xy= y*s->mb_stride; for(x=0; xmb_width; x++){ if (s->mb_type[xy] & type){ // RAL: "type" test added... - if( mv_table[xy][0] >=range || mv_table[xy][0] <-range - || mv_table[xy][1] >=range || mv_table[xy][1] <-range){ + if(field_select_table==NULL || field_select_table[xy] == field_select){ + if( mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range + || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){ - if(s->codec_id == CODEC_ID_MPEG1VIDEO && 0){ - }else{ - if (mv_table[xy][0] > range-1) mv_table[xy][0]= range-1; - else if(mv_table[xy][0] < -range ) mv_table[xy][0]= -range; - if (mv_table[xy][1] > range-1) mv_table[xy][1]= range-1; - else if(mv_table[xy][1] < -range ) mv_table[xy][1]= -range; + if(truncate){ + if (mv_table[xy][0] > h_range-1) mv_table[xy][0]= h_range-1; + else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range; + if (mv_table[xy][1] > v_range-1) mv_table[xy][1]= v_range-1; + else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range; + }else{ + s->mb_type[xy] &= ~type; + s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA; + mv_table[xy][0]= + mv_table[xy][1]= 0; + } } } } diff -r 027545a2fdbe -r dea5b2946999 motion_est_template.c --- a/motion_est_template.c Tue Dec 30 15:29:38 2003 +0000 +++ b/motion_est_template.c Tue Dec 30 16:07:57 2003 +0000 @@ -22,29 +22,31 @@ * @file motion_est_template.c * Motion estimation template. */ - +//FIXME ref2_y next_pic? //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...) //Note, the last line is there to kill these ugly unused var warnings -#define LOAD_COMMON(x, y)\ +#define LOAD_COMMON\ uint32_t * const score_map= s->me.score_map;\ - const int stride= s->linesize;\ - const int uvstride= s->uvlinesize;\ const int time_pp= s->pp_time;\ const int time_pb= s->pb_time;\ - uint8_t * const src_y= s->new_picture.data[0] + ((y) * stride) + (x);\ - uint8_t * const src_u= s->new_picture.data[1] + (((y)>>1) * uvstride) + ((x)>>1);\ - uint8_t * const src_v= s->new_picture.data[2] + (((y)>>1) * uvstride) + ((x)>>1);\ - uint8_t * const ref_y= ref_picture->data[0] + ((y) * stride) + (x);\ - uint8_t * const ref_u= ref_picture->data[1] + (((y)>>1) * uvstride) + ((x)>>1);\ - uint8_t * const ref_v= ref_picture->data[2] + (((y)>>1) * uvstride) + ((x)>>1);\ - uint8_t * const ref2_y= s->next_picture.data[0] + ((y) * stride) + (x);\ + const int xmin= s->me.xmin;\ + const int ymin= s->me.ymin;\ + const int xmax= s->me.xmax;\ + const int ymax= s->me.ymax;\ + uint8_t * const src_y= src_data[0];\ + uint8_t * const src_u= src_data[1];\ + uint8_t * const src_v= src_data[2];\ + uint8_t * const ref_y= ref_data[0];\ + uint8_t * const ref_u= ref_data[1];\ + uint8_t * const ref_v= ref_data[2];\ op_pixels_func (*hpel_put)[4];\ op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\ op_pixels_func (*chroma_hpel_put)[4];\ qpel_mc_func (*qpel_put)[16];\ qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\ const __attribute__((unused)) int unu= time_pp + time_pb + (size_t)src_u + (size_t)src_v + (size_t)ref_u + (size_t)ref_v\ - + (size_t)ref2_y + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map;\ + + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map\ + + xmin + xmax + ymin + ymax;\ if(s->no_rounding /*FIXME b_type*/){\ hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\ chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\ @@ -70,9 +72,8 @@ #if 0 static int RENAME(hpel_motion_search)(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *ref_picture, - int n, int size, uint8_t * const mv_penalty) + int pred_x, int pred_y, uint8_t *ref_data[3], + int size, uint8_t * const mv_penalty) { const int xx = 16 * s->mb_x + 8*(n&1); const int yy = 16 * s->mb_y + 8*(n>>1); @@ -80,7 +81,7 @@ const int my = *my_ptr; const int penalty_factor= s->me.sub_penalty_factor; - LOAD_COMMON(xx, yy); + LOAD_COMMON // INIT; //FIXME factorize @@ -139,19 +140,17 @@ #else static int RENAME(hpel_motion_search)(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *ref_picture, - int n, int size, uint8_t * const mv_penalty) + int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, + int size, int h, uint8_t * const mv_penalty) { - const int xx = 16 * s->mb_x + 8*(n&1); - const int yy = 16 * s->mb_y + 8*(n>>1); const int mx = *mx_ptr; const int my = *my_ptr; const int penalty_factor= s->me.sub_penalty_factor; me_cmp_func cmp_sub, chroma_cmp_sub; int bx=2*mx, by=2*my; - LOAD_COMMON(xx, yy); + LOAD_COMMON //FIXME factorize @@ -247,20 +246,18 @@ } #endif -static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, +static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, uint8_t * const mv_penalty) { // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp; const int size= 0; - const int xx = 16 * s->mb_x; - const int yy = 16 * s->mb_y; + const int h= 16; const int penalty_factor= s->me.mb_penalty_factor; - const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these - const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit me_cmp_func cmp_sub, chroma_cmp_sub; int d; - LOAD_COMMON(xx, yy); + LOAD_COMMON //FIXME factorize @@ -295,12 +292,10 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *ref_picture, - int n, int size, uint8_t * const mv_penalty) + int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, + int size, int h, uint8_t * const mv_penalty) { - const int xx = 16 * s->mb_x + 8*(n&1); - const int yy = 16 * s->mb_y + 8*(n>>1); const int mx = *mx_ptr; const int my = *my_ptr; const int penalty_factor= s->me.sub_penalty_factor; @@ -310,7 +305,7 @@ me_cmp_func cmp, chroma_cmp; me_cmp_func cmp_sub, chroma_cmp_sub; - LOAD_COMMON(xx, yy); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME @@ -514,19 +509,17 @@ return dmin; } -static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, +static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, uint8_t * const mv_penalty) { const int size= 0; - const int xx = 16 * s->mb_x; - const int yy = 16 * s->mb_y; + const int h= 16; const int penalty_factor= s->me.mb_penalty_factor; - const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these - const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit me_cmp_func cmp_sub, chroma_cmp_sub; int d; - LOAD_COMMON(xx, yy); + LOAD_COMMON //FIXME factorize @@ -597,15 +590,16 @@ static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin, - Picture *ref_picture, + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int const pred_x, int const pred_y, int const penalty_factor, - int const xmin, int const ymin, int const xmax, int const ymax, int const shift, - uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty + int const shift, + uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty ) { me_cmp_func cmp, chroma_cmp; int next_dir=-1; - LOAD_COMMON(s->mb_x*16, s->mb_y*16); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -639,15 +633,16 @@ } static inline int RENAME(funny_diamond_search)(MpegEncContext * s, int *best, int dmin, - Picture *ref_picture, + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int const pred_x, int const pred_y, int const penalty_factor, - int const xmin, int const ymin, int const xmax, int const ymax, int const shift, - uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty + int const shift, + uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty ) { me_cmp_func cmp, chroma_cmp; int dia_size; - LOAD_COMMON(s->mb_x*16, s->mb_y*16); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -730,17 +725,18 @@ #define MAX_SAB_SIZE 16 static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int dmin, - Picture *ref_picture, + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int const pred_x, int const pred_y, int const penalty_factor, - int const xmin, int const ymin, int const xmax, int const ymax, int const shift, - uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty + int const shift, + uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty ) { me_cmp_func cmp, chroma_cmp; Minima minima[MAX_SAB_SIZE]; const int minima_count= ABS(s->me.dia_size); int i, j; - LOAD_COMMON(s->mb_x*16, s->mb_y*16); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -810,15 +806,16 @@ } static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin, - Picture *ref_picture, + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int const pred_x, int const pred_y, int const penalty_factor, - int const xmin, int const ymin, int const xmax, int const ymax, int const shift, - uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty + int const shift, + uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty ) { me_cmp_func cmp, chroma_cmp; int dia_size; - LOAD_COMMON(s->mb_x*16, s->mb_y*16); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -886,10 +883,10 @@ return dmin; } -static int RENAME(epzs_motion_search)(MpegEncContext * s, int block, +static int RENAME(epzs_motion_search)(MpegEncContext * s, int *mx_ptr, int *my_ptr, - int P[10][2], int pred_x, int pred_y, - int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], + int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], int ref_mv_scale, uint8_t * const mv_penalty) { int best[2]={0, 0}; @@ -899,10 +896,11 @@ int map_generation; const int penalty_factor= s->me.penalty_factor; const int size=0; - const int ref_mv_stride= s->mb_stride; - const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; + const int h=16; + const int ref_mv_stride= s->mb_stride; //pass as arg FIXME + const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME me_cmp_func cmp, chroma_cmp; - LOAD_COMMON(s->mb_x*16, s->mb_y*16); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -973,21 +971,21 @@ //check(best[0],best[1],0, b0) if(s->me.dia_size==-1) - dmin= RENAME(funny_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else if(s->me.dia_size<-1) - dmin= RENAME(sab_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else if(s->me.dia_size<2) - dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else - dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); //check(best[0],best[1],0, b1) *mx_ptr= best[0]; @@ -998,10 +996,11 @@ } #ifndef CMP_DIRECT /* no 4mv search needed in direct mode */ -static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block, +static int RENAME(epzs_motion_search4)(MpegEncContext * s, int *mx_ptr, int *my_ptr, int P[10][2], int pred_x, int pred_y, - int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], int ref_mv_scale, uint8_t * const mv_penalty) { int best[2]={0, 0}; @@ -1011,10 +1010,11 @@ int map_generation; const int penalty_factor= s->me.penalty_factor; const int size=1; + const int h=8; const int ref_mv_stride= s->mb_stride; const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; me_cmp_func cmp, chroma_cmp; - LOAD_COMMON((s->mb_x*2 + (block&1))*8, (s->mb_y*2 + (block>>1))*8); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -1024,7 +1024,7 @@ dmin = 1000000; //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); /* first line */ - if (s->mb_y == 0 && block<2) { + if (s->mb_y == 0/* && block<2*/) { CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) @@ -1049,21 +1049,100 @@ } if(s->me.dia_size==-1) - dmin= RENAME(funny_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else if(s->me.dia_size<-1) - dmin= RENAME(sab_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else if(s->me.dia_size<2) - dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else - dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); + + + *mx_ptr= best[0]; + *my_ptr= best[1]; + +// printf("%d %d %d \n", best[0], best[1], dmin); + return dmin; +} + +//try to merge with above FIXME (needs PSNR test) +static int RENAME(epzs_motion_search2)(MpegEncContext * s, + int *mx_ptr, int *my_ptr, + int P[10][2], int pred_x, int pred_y, + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], + int ref_mv_scale, uint8_t * const mv_penalty) +{ + int best[2]={0, 0}; + int d, dmin; + const int shift= 1+s->quarter_sample; + uint32_t *map= s->me.map; + int map_generation; + const int penalty_factor= s->me.penalty_factor; + const int size=0; //FIXME pass as arg + const int h=8; + const int ref_mv_stride= s->mb_stride; + const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; + me_cmp_func cmp, chroma_cmp; + LOAD_COMMON + + cmp= s->dsp.me_cmp[size]; + chroma_cmp= s->dsp.me_cmp[size+1]; + + map_generation= update_map_generation(s); + + dmin = 1000000; +//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); + /* first line */ + if (s->mb_y == 0) { + CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) + CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) + CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) + }else{ + CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) + //FIXME try some early stop + if(dmin>64*2){ + CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) + CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) + CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift) + CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) + CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) + } + } + if(dmin>64*4){ + CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) + CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) + } + + if(s->me.dia_size==-1) + dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); + else if(s->me.dia_size<-1) + dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); + else if(s->me.dia_size<2) + dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); + else + dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); + *mx_ptr= best[0]; *my_ptr= best[1]; diff -r 027545a2fdbe -r dea5b2946999 mpeg12.c --- a/mpeg12.c Tue Dec 30 15:29:38 2003 +0000 +++ b/mpeg12.c Tue Dec 30 16:07:57 2003 +0000 @@ -29,6 +29,9 @@ #include "mpeg12data.h" +//#undef NDEBUG +//#include + /* Start codes. */ #define SEQ_END_CODE 0x000001b7 @@ -476,12 +479,12 @@ } static inline void put_mb_modes(MpegEncContext *s, int n, int bits, - int has_mv) + int has_mv, int field_motion) { put_bits(&s->pb, n, bits); if (!s->frame_pred_frame_dct) { if (has_mv) - put_bits(&s->pb, 2, 2); /* motion_type: frame */ + put_bits(&s->pb, 2, 2 - field_motion); /* motion_type: frame/field */ put_bits(&s->pb, 1, s->interlaced_dct); } } @@ -501,9 +504,9 @@ if (s->block_last_index[i] >= 0) cbp |= 1 << (5 - i); } - + if (cbp == 0 && !first_mb && (mb_x != s->mb_width - 1 || (mb_y != s->mb_height - 1 && s->codec_id == CODEC_ID_MPEG1VIDEO)) && - ((s->pict_type == P_TYPE && (motion_x | motion_y) == 0) || + ((s->pict_type == P_TYPE && s->mv_type == MV_TYPE_16X16 && (motion_x | motion_y) == 0) || (s->pict_type == B_TYPE && s->mv_dir == s->last_mv_dir && (((s->mv_dir & MV_DIR_FORWARD) ? ((s->mv[0][0][0] - s->last_mv[0][0][0])|(s->mv[0][0][1] - s->last_mv[0][0][1])) : 0) | ((s->mv_dir & MV_DIR_BACKWARD) ? ((s->mv[1][0][0] - s->last_mv[1][0][0])|(s->mv[1][0][1] - s->last_mv[1][0][1])) : 0)) == 0))) { s->mb_skip_run++; @@ -511,6 +514,10 @@ s->skip_count++; s->misc_bits++; s->last_bits++; + if(s->pict_type == P_TYPE){ + s->last_mv[0][1][0]= s->last_mv[0][0][0]= + s->last_mv[0][1][1]= s->last_mv[0][0][1]= 0; + } } else { if(first_mb){ assert(s->mb_skip_run == 0); @@ -521,150 +528,167 @@ if (s->pict_type == I_TYPE) { if(s->dquant && cbp){ - put_mb_modes(s, 2, 1, 0); /* macroblock_type : macroblock_quant = 1 */ + put_mb_modes(s, 2, 1, 0, 0); /* macroblock_type : macroblock_quant = 1 */ put_bits(&s->pb, 5, s->qscale); }else{ - put_mb_modes(s, 1, 1, 0); /* macroblock_type : macroblock_quant = 0 */ + put_mb_modes(s, 1, 1, 0, 0); /* macroblock_type : macroblock_quant = 0 */ s->qscale -= s->dquant; } s->misc_bits+= get_bits_diff(s); s->i_count++; } else if (s->mb_intra) { if(s->dquant && cbp){ - put_mb_modes(s, 6, 0x01, 0); + put_mb_modes(s, 6, 0x01, 0, 0); put_bits(&s->pb, 5, s->qscale); }else{ - put_mb_modes(s, 5, 0x03, 0); + put_mb_modes(s, 5, 0x03, 0, 0); s->qscale -= s->dquant; } s->misc_bits+= get_bits_diff(s); s->i_count++; - s->last_mv[0][0][0] = - s->last_mv[0][0][1] = 0; + memset(s->last_mv, 0, sizeof(s->last_mv)); } else if (s->pict_type == P_TYPE) { + if(s->mv_type == MV_TYPE_16X16){ if (cbp != 0) { - if (motion_x == 0 && motion_y == 0) { + if ((motion_x|motion_y) == 0) { if(s->dquant){ - put_mb_modes(s, 5, 1, 0); /* macroblock_pattern & quant */ + put_mb_modes(s, 5, 1, 0, 0); /* macroblock_pattern & quant */ put_bits(&s->pb, 5, s->qscale); }else{ - put_mb_modes(s, 2, 1, 0); /* macroblock_pattern only */ + put_mb_modes(s, 2, 1, 0, 0); /* macroblock_pattern only */ } s->misc_bits+= get_bits_diff(s); - put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); } else { if(s->dquant){ - put_mb_modes(s, 5, 2, 1); /* motion + cbp */ + put_mb_modes(s, 5, 2, 1, 0); /* motion + cbp */ put_bits(&s->pb, 5, s->qscale); }else{ - put_mb_modes(s, 1, 1, 1); /* motion + cbp */ + put_mb_modes(s, 1, 1, 1, 0); /* motion + cbp */ } s->misc_bits+= get_bits_diff(s); mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); // RAL: f_code parameter added mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); // RAL: f_code parameter added s->mv_bits+= get_bits_diff(s); - put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); } } else { put_bits(&s->pb, 3, 1); /* motion only */ if (!s->frame_pred_frame_dct) put_bits(&s->pb, 2, 2); /* motion_type: frame */ + s->misc_bits+= get_bits_diff(s); mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); // RAL: f_code parameter added mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); // RAL: f_code parameter added s->qscale -= s->dquant; s->mv_bits+= get_bits_diff(s); } - s->f_count++; - } else - { // RAL: All the following bloc added for B frames: - if (cbp != 0) - { // With coded bloc pattern - if (s->mv_dir == (MV_DIR_FORWARD | MV_DIR_BACKWARD)) - { // Bi-directional motion - if (s->dquant) { - put_mb_modes(s, 5, 2, 1); - put_bits(&s->pb, 5, s->qscale); - } else { - put_mb_modes(s, 2, 3, 1); - } - s->misc_bits += get_bits_diff(s); - mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); - mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); - mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); - mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); - s->b_count++; - s->f_count++; - s->mv_bits += get_bits_diff(s); - put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); - } - else if (s->mv_dir == MV_DIR_BACKWARD) - { // Backward motion - if (s->dquant) { - put_mb_modes(s, 6, 2, 1); - put_bits(&s->pb, 5, s->qscale); - } else { - put_mb_modes(s, 3, 3, 1); - } - s->misc_bits += get_bits_diff(s); - mpeg1_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code); - mpeg1_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code); - s->b_count++; - s->mv_bits += get_bits_diff(s); - put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); - } - else if (s->mv_dir == MV_DIR_FORWARD) - { // Forward motion - if (s->dquant) { - put_mb_modes(s, 6, 3, 1); - put_bits(&s->pb, 5, s->qscale); - } else { - put_mb_modes(s, 4, 3, 1); - } - s->misc_bits += get_bits_diff(s); - mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); - mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); - s->f_count++; - s->mv_bits += get_bits_diff(s); - put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); - } + s->last_mv[0][1][0]= s->last_mv[0][0][0]= motion_x; + s->last_mv[0][1][1]= s->last_mv[0][0][1]= motion_y; + }else{ + assert(!s->frame_pred_frame_dct && s->mv_type == MV_TYPE_FIELD); + + if (cbp) { + if(s->dquant){ + put_mb_modes(s, 5, 2, 1, 1); /* motion + cbp */ + put_bits(&s->pb, 5, s->qscale); + }else{ + put_mb_modes(s, 1, 1, 1, 1); /* motion + cbp */ + } + } else { + put_bits(&s->pb, 3, 1); /* motion only */ + put_bits(&s->pb, 2, 1); /* motion_type: field */ + s->qscale -= s->dquant; + } + s->misc_bits+= get_bits_diff(s); + for(i=0; i<2; i++){ + put_bits(&s->pb, 1, s->field_select[0][i]); + mpeg1_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0] , s->f_code); + mpeg1_encode_motion(s, s->mv[0][i][1] - (s->last_mv[0][i][1]>>1), s->f_code); + s->last_mv[0][i][0]= s->mv[0][i][0]; + s->last_mv[0][i][1]= 2*s->mv[0][i][1]; + } + s->mv_bits+= get_bits_diff(s); + } + if(cbp) + put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); + s->f_count++; + } else{ + static const int mb_type_len[4]={0,3,4,2}; //bak,for,bi + + if(s->mv_type == MV_TYPE_16X16){ + if (cbp){ // With coded bloc pattern + if (s->dquant) { + if(s->mv_dir == MV_DIR_FORWARD) + put_mb_modes(s, 6, 3, 1, 0); + else + put_mb_modes(s, mb_type_len[s->mv_dir]+3, 2, 1, 0); + put_bits(&s->pb, 5, s->qscale); + } else { + put_mb_modes(s, mb_type_len[s->mv_dir], 3, 1, 0); } - else - { // No coded bloc pattern - if (s->mv_dir == (MV_DIR_FORWARD | MV_DIR_BACKWARD)) - { // Bi-directional motion - put_bits(&s->pb, 2, 2); /* backward & forward motion */ - if (!s->frame_pred_frame_dct) - put_bits(&s->pb, 2, 2); /* motion_type: frame */ - mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); - mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); - mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); - mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); - s->b_count++; - s->f_count++; - } - else if (s->mv_dir == MV_DIR_BACKWARD) - { // Backward motion - put_bits(&s->pb, 3, 2); /* backward motion only */ - if (!s->frame_pred_frame_dct) - put_bits(&s->pb, 2, 2); /* motion_type: frame */ - mpeg1_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code); - mpeg1_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code); - s->b_count++; - } - else if (s->mv_dir == MV_DIR_FORWARD) - { // Forward motion - put_bits(&s->pb, 4, 2); /* forward motion only */ - if (!s->frame_pred_frame_dct) - put_bits(&s->pb, 2, 2); /* motion_type: frame */ - mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); - mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); - s->f_count++; - } + }else{ // No coded bloc pattern + put_bits(&s->pb, mb_type_len[s->mv_dir], 2); + if (!s->frame_pred_frame_dct) + put_bits(&s->pb, 2, 2); /* motion_type: frame */ s->qscale -= s->dquant; - s->mv_bits += get_bits_diff(s); + } + s->misc_bits += get_bits_diff(s); + if (s->mv_dir&MV_DIR_FORWARD){ + mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); + mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); + s->last_mv[0][0][0]=s->last_mv[0][1][0]= s->mv[0][0][0]; + s->last_mv[0][0][1]=s->last_mv[0][1][1]= s->mv[0][0][1]; + s->f_count++; + } + if (s->mv_dir&MV_DIR_BACKWARD){ + mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); + mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); + s->last_mv[1][0][0]=s->last_mv[1][1][0]= s->mv[1][0][0]; + s->last_mv[1][0][1]=s->last_mv[1][1][1]= s->mv[1][0][1]; + s->b_count++; + } + }else{ + assert(s->mv_type == MV_TYPE_FIELD); + assert(!s->frame_pred_frame_dct); + if (cbp){ // With coded bloc pattern + if (s->dquant) { + if(s->mv_dir == MV_DIR_FORWARD) + put_mb_modes(s, 6, 3, 1, 1); + else + put_mb_modes(s, mb_type_len[s->mv_dir]+3, 2, 1, 1); + put_bits(&s->pb, 5, s->qscale); + } else { + put_mb_modes(s, mb_type_len[s->mv_dir], 3, 1, 1); } - // End of bloc from RAL + }else{ // No coded bloc pattern + put_bits(&s->pb, mb_type_len[s->mv_dir], 2); + put_bits(&s->pb, 2, 1); /* motion_type: field */ + s->qscale -= s->dquant; + } + s->misc_bits += get_bits_diff(s); + if (s->mv_dir&MV_DIR_FORWARD){ + for(i=0; i<2; i++){ + put_bits(&s->pb, 1, s->field_select[0][i]); + mpeg1_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0] , s->f_code); + mpeg1_encode_motion(s, s->mv[0][i][1] - (s->last_mv[0][i][1]>>1), s->f_code); + s->last_mv[0][i][0]= s->mv[0][i][0]; + s->last_mv[0][i][1]= 2*s->mv[0][i][1]; + } + s->f_count++; + } + if (s->mv_dir&MV_DIR_BACKWARD){ + for(i=0; i<2; i++){ + put_bits(&s->pb, 1, s->field_select[1][i]); + mpeg1_encode_motion(s, s->mv[1][i][0] - s->last_mv[1][i][0] , s->b_code); + mpeg1_encode_motion(s, s->mv[1][i][1] - (s->last_mv[1][i][1]>>1), s->b_code); + s->last_mv[1][i][0]= s->mv[1][i][0]; + s->last_mv[1][i][1]= 2*s->mv[1][i][1]; + } + s->b_count++; + } } + s->mv_bits += get_bits_diff(s); + if(cbp) + put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); + } for(i=0;i<6;i++) { if (cbp & (1 << (5 - i))) { mpeg1_encode_block(s, block[i], i); @@ -676,18 +700,6 @@ else s->p_tex_bits+= get_bits_diff(s); } - - // RAL: By this: - if (s->mv_dir & MV_DIR_FORWARD) - { - s->last_mv[0][0][0]= s->mv[0][0][0]; - s->last_mv[0][0][1]= s->mv[0][0][1]; - } - if (s->mv_dir & MV_DIR_BACKWARD) - { - s->last_mv[1][0][0]= s->mv[1][0][0]; - s->last_mv[1][0][1]= s->mv[1][0][1]; - } } // RAL: Parameter added: f_or_b_code @@ -1952,7 +1964,7 @@ s->repeat_first_field = get_bits1(&s->gb); s->chroma_420_type = get_bits1(&s->gb); s->progressive_frame = get_bits1(&s->gb); - + if(s->picture_structure == PICT_FRAME) s->first_field=0; else{ @@ -1963,13 +1975,9 @@ if(s->alternate_scan){ ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_alternate_vertical_scan); ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_alternate_vertical_scan); - ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan); - ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); }else{ ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct); ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct); - ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan); - ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); } /* composite display not parsed */ @@ -2103,10 +2111,10 @@ s->qscale = get_qscale(s); if (s->first_slice && (s->first_field || s->picture_structure==PICT_FRAME)) { if(s->avctx->debug&FF_DEBUG_PICT_INFO){ - av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", + av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1], s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), - s->progressive_sequence ? "pro" :"", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", + s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors, s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :""); } diff -r 027545a2fdbe -r dea5b2946999 mpegvideo.c --- a/mpegvideo.c Tue Dec 30 15:29:38 2003 +0000 +++ b/mpegvideo.c Tue Dec 30 16:07:57 2003 +0000 @@ -252,8 +252,13 @@ /* load & permutate scantables note: only wmv uses differnt ones */ - ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct); - ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct); + if(s->alternate_scan){ + ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_alternate_vertical_scan); + ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_alternate_vertical_scan); + }else{ + ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct); + ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct); + } ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan); ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); @@ -394,7 +399,7 @@ /* init common structure for both encoder and decoder */ int MPV_common_init(MpegEncContext *s) { - int y_size, c_size, yc_size, i, mb_array_size, x, y; + int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y; dsputil_init(&s->dsp, s->avctx); DCT_common_init(s); @@ -407,6 +412,7 @@ s->b8_stride = s->mb_width*2 + 1; s->b4_stride = s->mb_width*4 + 1; mb_array_size= s->mb_height * s->mb_stride; + mv_table_size= (s->mb_height+2) * s->mb_stride + 1; /* set default edge pos, will be overriden in decode_header if needed */ s->h_edge_pos= s->mb_width*16; @@ -458,8 +464,6 @@ s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed? if (s->encoding) { - int mv_table_size= s->mb_stride * (s->mb_height+2) + 1; - /* Allocate MV tables */ CHECKED_ALLOCZ(s->p_mv_table_base , mv_table_size * 2 * sizeof(int16_t)) CHECKED_ALLOCZ(s->b_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t)) @@ -491,7 +495,7 @@ CHECKED_ALLOCZ(s->avctx->stats_out, 256); /* Allocate MB type table */ - CHECKED_ALLOCZ(s->mb_type , mb_array_size * sizeof(uint8_t)) //needed for encoding + CHECKED_ALLOCZ(s->mb_type , mb_array_size * sizeof(uint16_t)) //needed for encoding CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int)) @@ -513,10 +517,21 @@ CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t)) - if(s->codec_id==CODEC_ID_MPEG4){ + if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){ /* interlaced direct mode decoding tables */ - CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t)) - CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t)) + for(i=0; i<2; i++){ + int j, k; + for(j=0; j<2; j++){ + for(k=0; k<2; k++){ + CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k] , mv_table_size * 2 * sizeof(int16_t)) + s->b_field_mv_table[i][j][k] = s->b_field_mv_table_base[i][j][k] + s->mb_stride + 1; + } + CHECKED_ALLOCZ(s->b_field_select_table[i][j] , mb_array_size * 2 * sizeof(uint8_t)) + CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j] , mv_table_size * 2 * sizeof(int16_t)) + s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j] + s->mb_stride + 1; + } + CHECKED_ALLOCZ(s->p_field_select_table[i] , mb_array_size * 2 * sizeof(uint8_t)) + } } if (s->out_format == FMT_H263) { /* ac values */ @@ -583,7 +598,7 @@ /* init common structure for both encoder and decoder */ void MPV_common_end(MpegEncContext *s) { - int i; + int i, j, k; av_freep(&s->parse_context.buffer); s->parse_context.buffer_size=0; @@ -601,6 +616,18 @@ s->b_bidir_forw_mv_table= NULL; s->b_bidir_back_mv_table= NULL; s->b_direct_mv_table= NULL; + for(i=0; i<2; i++){ + for(j=0; j<2; j++){ + for(k=0; k<2; k++){ + av_freep(&s->b_field_mv_table_base[i][j][k]); + s->b_field_mv_table[i][j][k]=NULL; + } + av_freep(&s->b_field_select_table[i][j]); + av_freep(&s->p_field_mv_table_base[i][j]); + s->p_field_mv_table[i][j]=NULL; + } + av_freep(&s->p_field_select_table[i]); + } av_freep(&s->dc_val[0]); av_freep(&s->ac_val[0]); @@ -618,8 +645,6 @@ av_freep(&s->tex_pb_buffer); av_freep(&s->pb2_buffer); av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL; - av_freep(&s->field_mv_table); - av_freep(&s->field_select_table); av_freep(&s->avctx->stats_out); av_freep(&s->ac_stats); av_freep(&s->error_status_table); @@ -692,7 +717,7 @@ s->me_method = avctx->me_method; /* Fixed QSCALE */ - s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE); + s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE); s->adaptive_quant= ( s->avctx->lumi_masking || s->avctx->dark_masking @@ -702,8 +727,9 @@ || (s->flags&CODEC_FLAG_QP_RD)) && !s->fixed_qscale; - s->obmc= (s->flags & CODEC_FLAG_OBMC); - s->loop_filter= (s->flags & CODEC_FLAG_LOOP_FILTER); + s->obmc= !!(s->flags & CODEC_FLAG_OBMC); + s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER); + s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN); if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){ @@ -934,7 +960,7 @@ if(s->modified_quant) s->chroma_qscale_table= ff_h263_chroma_qscale_table; s->progressive_frame= - s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT); + s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME)); ff_init_me(s); @@ -1610,7 +1636,7 @@ for(y=0; ydsp.pix_abs16x16(src + offset, ref + offset, stride); + int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16); int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8; int sae = get_sae(src + offset, mean, stride); @@ -1906,7 +1932,7 @@ if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate){ int vbv_delay; - assert(s->repeat_first_field==0 && s->avctx->repeat_pic==0); + assert(s->repeat_first_field==0); vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate); assert(vbv_delay < 0xFFFF); @@ -3300,7 +3326,7 @@ if(s->flags&CODEC_FLAG_INTERLACED_DCT){ int progressive_score, interlaced_score; - + progressive_score= pix_vcmp16x8(ptr, wrap_y ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y ); interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y , wrap_y*2); @@ -3417,12 +3443,12 @@ /* pre quantization */ if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){ //FIXME optimize - if(s->dsp.pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1; - if(s->dsp.pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1; - if(s->dsp.pix_abs8x8(ptr_y +dct_offset , dest_y +dct_offset , wrap_y) < 20*s->qscale) skip_dct[2]= 1; - if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1; - if(s->dsp.pix_abs8x8(ptr_cb , dest_cb , wrap_c) < 20*s->qscale) skip_dct[4]= 1; - if(s->dsp.pix_abs8x8(ptr_cr , dest_cr , wrap_c) < 20*s->qscale) skip_dct[5]= 1; + if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1; + if(s->dsp.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1; + if(s->dsp.sad[1](NULL, ptr_y +dct_offset , dest_y +dct_offset , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1; + if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1; + if(s->dsp.sad[1](NULL, ptr_cb , dest_cb , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1; + if(s->dsp.sad[1](NULL, ptr_cr , dest_cr , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1; #if 0 { static int stat[7]; @@ -3484,6 +3510,19 @@ s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale; } + //non c quantize code returns incorrect block_last_index FIXME + if(s->alternate_scan && s->dct_quantize != dct_quantize_c){ + for(i=0; i<6; i++){ + int j; + if(s->block_last_index[i]>0){ + for(j=63; j>0; j--){ + if(s->block[i][ s->intra_scantable.permutated[j] ]) break; + } + s->block_last_index[i]= j; + } + } + } + /* huffman encode */ switch(s->codec_id){ //FIXME funct ptr could be slightly faster case CODEC_ID_MPEG1VIDEO: @@ -3724,9 +3763,9 @@ int x,y; if(w==16 && h==16) - return s->dsp.sse[0](NULL, src1, src2, stride); + return s->dsp.sse[0](NULL, src1, src2, stride, 16); else if(w==8 && h==8) - return s->dsp.sse[1](NULL, src1, src2, stride); + return s->dsp.sse[1](NULL, src1, src2, stride, 8); for(y=0; ymb_y*16 + 16 > s->height) h= s->height- s->mb_y*16; if(w==16 && h==16) - return s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize) - +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize) - +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize); + return s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16) + +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8) + +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8); else return sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize) +sse(s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize) @@ -3759,7 +3798,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) { int mb_x, mb_y, pdif = 0; - int i; + int i, j; int bits; MpegEncContext best_s, backup_s; uint8_t bit_buf[2][3000]; @@ -3843,7 +3882,8 @@ //FIXME do we need to zero them? memset(s->current_picture.motion_val[0][0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2); memset(s->p_mv_table , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2); - memset(s->mb_type , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height); + for(i=0; imb_stride*s->mb_height; i++) + s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA; if(!s->fixed_qscale){ /* finding spatial complexity for I-frame rate control */ @@ -3868,32 +3908,61 @@ if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){ s->pict_type= I_TYPE; - memset(s->mb_type , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height); + for(i=0; imb_stride*s->mb_height; i++) + s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA; //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum); } if(!s->umvplus){ if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) { - s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER); - + s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER); + + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + int a,b; + a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select + b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I); + s->f_code= FFMAX(s->f_code, FFMAX(a,b)); + } + ff_fix_long_p_mvs(s); + ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0); + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + for(i=0; i<2; i++){ + for(j=0; j<2; j++) + ff_fix_long_mvs(s, s->p_field_select_table[i], j, + s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0); + } + } } if(s->pict_type==B_TYPE){ int a, b; - a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD); - b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR); + a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD); + b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR); s->f_code = FFMAX(a, b); - a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD); - b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR); + a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD); + b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR); s->b_code = FFMAX(a, b); - ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD); - ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD); - ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR); - ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR); + ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1); + ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1); + ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1); + ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1); + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + int dir; + for(dir=0; dir<2; dir++){ + for(i=0; i<2; i++){ + for(j=0; j<2; j++){ + int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) + : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I); + ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, + s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1); + } + } + } + } } } @@ -3990,10 +4059,7 @@ s->current_picture_ptr->error[i] = 0; } s->mb_skip_run = 0; - s->last_mv[0][0][0] = 0; - s->last_mv[0][0][1] = 0; - s->last_mv[1][0][0] = 0; - s->last_mv[1][0][1] = 0; + memset(s->last_mv, 0, sizeof(s->last_mv)); s->last_mv_dir = 0; @@ -4027,6 +4093,7 @@ int mb_type= s->mb_type[xy]; // int d; int dmin= INT_MAX; + int dir; s->mb_x = mb_x; ff_update_block_index(s); @@ -4134,25 +4201,37 @@ backup_s.tex_pb= s->tex_pb; } - if(mb_type&MB_TYPE_INTER){ + if(mb_type&CANDIDATE_MB_TYPE_INTER){ s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_16X16; s->mb_intra= 0; s->mv[0][0][0] = s->p_mv_table[xy][0]; s->mv[0][0][1] = s->p_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb, &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]); } - if(mb_type&MB_TYPE_SKIPED){ + if(mb_type&CANDIDATE_MB_TYPE_INTER_I){ + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[0][i] = s->p_field_select_table[i][xy]; + s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0]; + s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1]; + } + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb, + &dmin, &next_block, 0, 0); + } + if(mb_type&CANDIDATE_MB_TYPE_SKIPED){ s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_16X16; s->mb_intra= 0; s->mv[0][0][0] = 0; s->mv[0][0][1] = 0; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_SKIPED, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb, &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]); } - if(mb_type&MB_TYPE_INTER4V){ + if(mb_type&CANDIDATE_MB_TYPE_INTER4V){ s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_8X8; s->mb_intra= 0; @@ -4160,28 +4239,28 @@ s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0]; s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1]; } - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb, &dmin, &next_block, 0, 0); } - if(mb_type&MB_TYPE_FORWARD){ + if(mb_type&CANDIDATE_MB_TYPE_FORWARD){ s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_16X16; s->mb_intra= 0; s->mv[0][0][0] = s->b_forw_mv_table[xy][0]; s->mv[0][0][1] = s->b_forw_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb, &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]); } - if(mb_type&MB_TYPE_BACKWARD){ + if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){ s->mv_dir = MV_DIR_BACKWARD; s->mv_type = MV_TYPE_16X16; s->mb_intra= 0; s->mv[1][0][0] = s->b_back_mv_table[xy][0]; s->mv[1][0][1] = s->b_back_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb, &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]); } - if(mb_type&MB_TYPE_BIDIR){ + if(mb_type&CANDIDATE_MB_TYPE_BIDIR){ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; s->mv_type = MV_TYPE_16X16; s->mb_intra= 0; @@ -4189,10 +4268,10 @@ s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1]; s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0]; s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb, &dmin, &next_block, 0, 0); } - if(mb_type&MB_TYPE_DIRECT){ + if(mb_type&CANDIDATE_MB_TYPE_DIRECT){ int mx= s->b_direct_mv_table[xy][0]; int my= s->b_direct_mv_table[xy][1]; @@ -4201,16 +4280,54 @@ #ifdef CONFIG_RISKY ff_mpeg4_set_direct_mv(s, mx, my); #endif - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, &dmin, &next_block, mx, my); } - if(mb_type&MB_TYPE_INTRA){ + if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){ + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[0][i] = s->b_field_select_table[0][i][xy]; + s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0]; + s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1]; + } + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb, + &dmin, &next_block, 0, 0); + } + if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){ + s->mv_dir = MV_DIR_BACKWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[1][i] = s->b_field_select_table[1][i][xy]; + s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0]; + s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1]; + } + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb, + &dmin, &next_block, 0, 0); + } + if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){ + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(dir=0; dir<2; dir++){ + for(i=0; i<2; i++){ + j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy]; + s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0]; + s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1]; + } + } + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb, + &dmin, &next_block, 0, 0); + } + if(mb_type&CANDIDATE_MB_TYPE_INTRA){ s->mv_dir = 0; s->mv_type = MV_TYPE_16X16; s->mb_intra= 1; s->mv[0][0][0] = 0; s->mv[0][0][1] = 0; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb, &dmin, &next_block, 0, 0); if(s->h263_pred || s->h263_aic){ if(best_s.mb_intra) @@ -4252,7 +4369,7 @@ } } - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]); if(best_s.qscale != qp){ if(s->mb_intra){ @@ -4312,19 +4429,30 @@ // only one MB-Type possible switch(mb_type){ - case MB_TYPE_INTRA: + case CANDIDATE_MB_TYPE_INTRA: s->mv_dir = 0; s->mb_intra= 1; motion_x= s->mv[0][0][0] = 0; motion_y= s->mv[0][0][1] = 0; break; - case MB_TYPE_INTER: + case CANDIDATE_MB_TYPE_INTER: s->mv_dir = MV_DIR_FORWARD; s->mb_intra= 0; motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0]; motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1]; break; - case MB_TYPE_INTER4V: + case CANDIDATE_MB_TYPE_INTER_I: + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[0][i] = s->p_field_select_table[i][xy]; + s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0]; + s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1]; + } + motion_x = motion_y = 0; + break; + case CANDIDATE_MB_TYPE_INTER4V: s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_8X8; s->mb_intra= 0; @@ -4334,7 +4462,7 @@ } motion_x= motion_y= 0; break; - case MB_TYPE_DIRECT: + case CANDIDATE_MB_TYPE_DIRECT: s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; s->mb_intra= 0; motion_x=s->b_direct_mv_table[xy][0]; @@ -4343,7 +4471,7 @@ ff_mpeg4_set_direct_mv(s, motion_x, motion_y); #endif break; - case MB_TYPE_BIDIR: + case CANDIDATE_MB_TYPE_BIDIR: s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; s->mb_intra= 0; motion_x=0; @@ -4353,19 +4481,54 @@ s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0]; s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1]; break; - case MB_TYPE_BACKWARD: + case CANDIDATE_MB_TYPE_BACKWARD: s->mv_dir = MV_DIR_BACKWARD; s->mb_intra= 0; motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0]; motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1]; break; - case MB_TYPE_FORWARD: + case CANDIDATE_MB_TYPE_FORWARD: s->mv_dir = MV_DIR_FORWARD; s->mb_intra= 0; motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0]; motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1]; // printf(" %d %d ", motion_x, motion_y); break; + case CANDIDATE_MB_TYPE_FORWARD_I: + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[0][i] = s->b_field_select_table[0][i][xy]; + s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0]; + s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1]; + } + motion_x=motion_y=0; + break; + case CANDIDATE_MB_TYPE_BACKWARD_I: + s->mv_dir = MV_DIR_BACKWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[1][i] = s->b_field_select_table[1][i][xy]; + s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0]; + s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1]; + } + motion_x=motion_y=0; + break; + case CANDIDATE_MB_TYPE_BIDIR_I: + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(dir=0; dir<2; dir++){ + for(i=0; i<2; i++){ + j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy]; + s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0]; + s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1]; + } + } + motion_x=motion_y=0; + break; default: motion_x=motion_y=0; //gcc warning fix av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n"); diff -r 027545a2fdbe -r dea5b2946999 mpegvideo.h --- a/mpegvideo.h Tue Dec 30 15:29:38 2003 +0000 +++ b/mpegvideo.h Tue Dec 30 16:07:57 2003 +0000 @@ -137,6 +137,7 @@ int16_t (*motion_val_base[2])[2]; int8_t *ref_index[2]; uint32_t *mb_type_base; +#define MB_TYPE_INTRA MB_TYPE_INTRA4x4 //default mb_type if theres just one type #define IS_INTRA4x4(a) ((a)&MB_TYPE_INTRA4x4) #define IS_INTRA16x16(a) ((a)&MB_TYPE_INTRA16x16) #define IS_PCM(a) ((a)&MB_TYPE_INTRA_PCM) @@ -206,23 +207,28 @@ int mb_penalty_factor; int pre_pass; ///< = 1 for the pre pass int dia_size; + int xmin; + int xmax; + int ymin; + int ymax; uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV int (*sub_motion_search)(struct MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *ref_picture, - int n, int size, uint8_t * const mv_penalty); - int (*motion_search[7])(struct MpegEncContext * s, int block, + int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, + int size, int h, uint8_t * const mv_penalty); + int (*motion_search[7])(struct MpegEncContext * s, int *mx_ptr, int *my_ptr, - int P[10][2], int pred_x, int pred_y, - int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], + int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, int16_t (*last_mv)[2], int ref_mv_scale, uint8_t * const mv_penalty); - int (*pre_motion_search)(struct MpegEncContext * s, int block, + int (*pre_motion_search)(struct MpegEncContext * s, int *mx_ptr, int *my_ptr, - int P[10][2], int pred_x, int pred_y, - int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], + int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, int16_t (*last_mv)[2], int ref_mv_scale, uint8_t * const mv_penalty); - int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, + int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, uint8_t * const mv_penalty); }MotionEstContext; @@ -351,12 +357,18 @@ int16_t (*b_bidir_forw_mv_table_base)[2]; int16_t (*b_bidir_back_mv_table_base)[2]; int16_t (*b_direct_mv_table_base)[2]; + int16_t (*p_field_mv_table_base[2][2])[2]; + int16_t (*b_field_mv_table_base[2][2][2])[2]; int16_t (*p_mv_table)[2]; ///< MV table (1MV per MB) p-frame encoding int16_t (*b_forw_mv_table)[2]; ///< MV table (1MV per MB) forward mode b-frame encoding int16_t (*b_back_mv_table)[2]; ///< MV table (1MV per MB) backward mode b-frame encoding int16_t (*b_bidir_forw_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding int16_t (*b_bidir_back_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding int16_t (*b_direct_mv_table)[2]; ///< MV table (1MV per MB) direct mode b-frame encoding + int16_t (*p_field_mv_table[2][2])[2]; ///< MV table (2MV per MB) interlaced p-frame encoding + int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced b-frame encoding + uint8_t (*p_field_select_table[2]); + uint8_t (*b_field_select_table[2][2]); int me_method; ///< ME algorithm int scene_change_score; int mv_dir; @@ -391,17 +403,22 @@ int mb_x, mb_y; int mb_skip_run; int mb_intra; - uint8_t *mb_type; ///< Table for MB type FIXME remove and use picture->mb_type -#define MB_TYPE_INTRA 0x01 -#define MB_TYPE_INTER 0x02 -#define MB_TYPE_INTER4V 0x04 -#define MB_TYPE_SKIPED 0x08 + uint16_t *mb_type; ///< Table for candidate MB types for encoding +#define CANDIDATE_MB_TYPE_INTRA 0x01 +#define CANDIDATE_MB_TYPE_INTER 0x02 +#define CANDIDATE_MB_TYPE_INTER4V 0x04 +#define CANDIDATE_MB_TYPE_SKIPED 0x08 //#define MB_TYPE_GMC 0x10 -#define MB_TYPE_DIRECT 0x10 -#define MB_TYPE_FORWARD 0x20 -#define MB_TYPE_BACKWARD 0x40 -#define MB_TYPE_BIDIR 0x80 +#define CANDIDATE_MB_TYPE_DIRECT 0x10 +#define CANDIDATE_MB_TYPE_FORWARD 0x20 +#define CANDIDATE_MB_TYPE_BACKWARD 0x40 +#define CANDIDATE_MB_TYPE_BIDIR 0x80 + +#define CANDIDATE_MB_TYPE_INTER_I 0x100 +#define CANDIDATE_MB_TYPE_FORWARD_I 0x200 +#define CANDIDATE_MB_TYPE_BACKWARD_I 0x400 +#define CANDIDATE_MB_TYPE_BIDIR_I 0x800 int block_index[6]; ///< index to current MB in block based arrays with edges int block_wrap[6]; @@ -551,8 +568,6 @@ uint8_t *tex_pb_buffer; uint8_t *pb2_buffer; int mpeg_quant; - int16_t (*field_mv_table)[2][2]; ///< used for interlaced b frame decoding - int8_t (*field_select_table)[2]; ///< wtf, no really another table for interlaced b frames int t_frame; ///< time distance of first I -> B, used for interlaced b frames int padding_bug_score; ///< used to detect the VERY common padding bug in MPEG4 @@ -748,7 +763,8 @@ int mb_x, int mb_y); int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type); void ff_fix_long_p_mvs(MpegEncContext * s); -void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type); +void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select, + int16_t (*mv_table)[2], int f_code, int type, int truncate); void ff_init_me(MpegEncContext *s); int ff_pre_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y); diff -r 027545a2fdbe -r dea5b2946999 ppc/dsputil_altivec.c --- a/ppc/dsputil_altivec.c Tue Dec 30 15:29:38 2003 +0000 +++ b/ppc/dsputil_altivec.c Tue Dec 30 16:07:57 2003 +0000 @@ -45,7 +45,7 @@ } #endif /* CONFIG_DARWIN */ -int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) +int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int i; int s __attribute__((aligned(16))); @@ -57,7 +57,7 @@ s = 0; sad = (vector unsigned int)vec_splat_u32(0); - for(i=0;i<16;i++) { + for(i=0;ipix_abs16x16_x2 = pix_abs16x16_x2_altivec; - c->pix_abs16x16_y2 = pix_abs16x16_y2_altivec; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_altivec; - c->pix_abs16x16 = pix_abs16x16_altivec; - c->pix_abs8x8 = pix_abs8x8_altivec; - c->sad[0]= sad16x16_altivec; - c->sad[1]= sad8x8_altivec; + c->pix_abs[0][1] = sad16_x2_altivec; + c->pix_abs[0][2] = sad16_y2_altivec; + c->pix_abs[0][3] = sad16_xy2_altivec; + c->pix_abs[0][0] = sad16_altivec; + c->pix_abs[1][0] = sad8_altivec; + c->sad[0]= sad16_altivec; + c->sad[1]= sad8_altivec; c->pix_norm1 = pix_norm1_altivec; c->sse[1]= sse8_altivec; c->sse[0]= sse16_altivec; diff -r 027545a2fdbe -r dea5b2946999 ratecontrol.c --- a/ratecontrol.c Tue Dec 30 15:29:38 2003 +0000 +++ b/ratecontrol.c Tue Dec 30 16:07:57 2003 +0000 @@ -520,7 +520,7 @@ if(spat_cplx < 4) spat_cplx= 4; //FIXME finetune if(temp_cplx < 4) temp_cplx= 4; //FIXME finetune - if((s->mb_type[mb_xy]&MB_TYPE_INTRA)){//FIXME hq mode + if((s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTRA)){//FIXME hq mode cplx= spat_cplx; factor= 1.0 + p_masking; }else{