# HG changeset patch # User michael # Date 1117615420 0 # Node ID 135aa7d0bd1727d4b57670b09647f0e3d35e118e # Parent 6eded34ab57bc1e53fa0edf89d0e2dedb5b2b8fa avoid one transpose (730->680 dezicycles on duron) diff -r 6eded34ab57b -r 135aa7d0bd17 h264.c --- a/h264.c Wed Jun 01 04:51:46 2005 +0000 +++ b/h264.c Wed Jun 01 08:43:40 2005 +0000 @@ -333,6 +333,8 @@ uint8_t *direct_table; uint8_t direct_cache[5*8]; + uint8_t zigzag_scan[16]; + uint8_t field_scan[16]; }H264Context; static VLC coeff_token_vlc[4]; @@ -2721,6 +2723,18 @@ s->low_delay= 1; avctx->pix_fmt= PIX_FMT_YUV420P; + if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly + memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t)); + memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t)); + }else{ + int i; + for(i=0; i<16; i++){ +#define T(x) (x>>2) | ((x<<2) & 0xF) + h->zigzag_scan[i] = T(zigzag_scan[i]); + h-> field_scan[i] = T( field_scan[i]); + } + } + decode_init_vlc(h); if(avctx->extradata_size > 0 && avctx->extradata && @@ -4591,10 +4605,10 @@ // fill_non_zero_count_cache(h); if(IS_INTERLACED(mb_type)){ - scan= field_scan; + scan= h->field_scan; dc_scan= luma_dc_field_scan; }else{ - scan= zigzag_scan; + scan= h->zigzag_scan; dc_scan= luma_dc_zigzag_scan; } @@ -5575,10 +5589,10 @@ int dqp; if(IS_INTERLACED(mb_type)){ - scan= field_scan; + scan= h->field_scan; dc_scan= luma_dc_field_scan; }else{ - scan= zigzag_scan; + scan= h->zigzag_scan; dc_scan= luma_dc_zigzag_scan; } diff -r 6eded34ab57b -r 135aa7d0bd17 i386/idct_mmx.c --- a/i386/idct_mmx.c Wed Jun 01 04:51:46 2005 +0000 +++ b/i386/idct_mmx.c Wed Jun 01 08:43:40 2005 +0000 @@ -673,14 +673,11 @@ /* mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 */ IDCT4_1D( %%mm3, %%mm2, %%mm1, %%mm0, %%mm4, %%mm5 ) - /* in: 2,4,1,3 out: 2,3,0,1 */ - TRANSPOSE4( %%mm2, %%mm4, %%mm1, %%mm3, %%mm0 ) - "pxor %%mm7, %%mm7 \n\t" :: "m"(ff_pw_32)); - STORE_DIFF_4P( %%mm2, %%mm4, %%mm7, &dst[0*stride] ); - STORE_DIFF_4P( %%mm3, %%mm4, %%mm7, &dst[1*stride] ); - STORE_DIFF_4P( %%mm0, %%mm4, %%mm7, &dst[2*stride] ); - STORE_DIFF_4P( %%mm1, %%mm4, %%mm7, &dst[3*stride] ); + STORE_DIFF_4P( %%mm2, %%mm0, %%mm7, &dst[0*stride] ); + STORE_DIFF_4P( %%mm4, %%mm0, %%mm7, &dst[1*stride] ); + STORE_DIFF_4P( %%mm1, %%mm0, %%mm7, &dst[2*stride] ); + STORE_DIFF_4P( %%mm3, %%mm0, %%mm7, &dst[3*stride] ); }