# HG changeset patch # User michael # Date 1216504434 0 # Node ID 483421b11d98e3286b519caf967d607d3a024b26 # Parent fe8a7f5905e47b9b5458269f17756af0517db8a6 Fix h264_loop_filter_strength_mmx2() so it works with PAFF. fixed at least: CVFI1_Sony_D.jsv CVFI1_SVA_C.264 MR6_BT_B.h264 diff -r fe8a7f5905e4 -r 483421b11d98 dsputil.h --- a/dsputil.h Sat Jul 19 18:53:04 2008 +0000 +++ b/dsputil.h Sat Jul 19 21:53:54 2008 +0000 @@ -347,7 +347,7 @@ void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta); // h264_loop_filter_strength: simd only. the C version is inlined in h264.c void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], - int bidir, int edges, int step, int mask_mv0, int mask_mv1); + int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); diff -r fe8a7f5905e4 -r 483421b11d98 h264.c --- a/h264.c Sat Jul 19 18:53:04 2008 +0000 +++ b/h264.c Sat Jul 19 21:53:54 2008 +0000 @@ -6495,7 +6495,7 @@ int step = IS_8x8DCT(mb_type) ? 2 : 1; edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, - (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 ); + (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); } if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) ) bSv[0][0] = 0x0004000400040004ULL; diff -r fe8a7f5905e4 -r 483421b11d98 i386/h264dsp_mmx.c --- a/i386/h264dsp_mmx.c Sat Jul 19 18:53:04 2008 +0000 +++ b/i386/h264dsp_mmx.c Sat Jul 19 21:53:54 2008 +0000 @@ -20,6 +20,9 @@ #include "dsputil_mmx.h" +DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL; +DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL; + /***********************************/ /* IDCT */ @@ -623,7 +626,7 @@ } static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], - int bidir, int edges, int step, int mask_mv0, int mask_mv1 ) { + int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) { int dir; asm volatile( "pxor %%mm7, %%mm7 \n\t" @@ -632,6 +635,13 @@ "movq %2, %%mm4 \n\t" ::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7) ); + if(field) + asm volatile( + "movq %0, %%mm5 \n\t" + "movq %1, %%mm4 \n\t" + ::"m"(ff_pb_3_1), "m"(ff_pb_7_3) + ); + // could do a special case for dir==0 && edges==1, but it only reduces the // average filter time by 1.2% for( dir=1; dir>=0; dir-- ) {