Mercurial > libavcodec.hg
diff i386/h264dsp_mmx.c @ 8375:de2509cf3c44 libavcodec
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
thus avoiding the calling overhead.
New functions are not yet used.
author | michael |
---|---|
date | Thu, 18 Dec 2008 02:36:48 +0000 |
parents | eebc7209c47f |
children |
line wrap: on
line diff
--- a/i386/h264dsp_mmx.c Thu Dec 18 01:10:51 2008 +0000 +++ b/i386/h264dsp_mmx.c Thu Dec 18 02:36:48 2008 +0000 @@ -376,6 +376,101 @@ } } +//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split +static const uint8_t scan8[16 + 2*4]={ + 4+1*8, 5+1*8, 4+2*8, 5+2*8, + 6+1*8, 7+1*8, 6+2*8, 7+2*8, + 4+3*8, 5+3*8, 4+4*8, 5+4*8, + 6+3*8, 7+3*8, 6+4*8, 7+4*8, + 1+1*8, 2+1*8, + 1+2*8, 2+2*8, + 1+4*8, 2+4*8, + 1+5*8, 2+5*8, +}; + +static void ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=0; i<16; i++){ + if(nnzc[ scan8[i] ]) + ff_h264_idct_add_mmx(dst + block_offset[i], block + i*16, stride); + } +} + +static void ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=0; i<16; i+=4){ + if(nnzc[ scan8[i] ]) + ff_h264_idct8_add_mmx(dst + block_offset[i], block + i*16, stride); + } +} + + +static void ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=0; i<16; i++){ + int nnz = nnzc[ scan8[i] ]; + if(nnz){ + if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_mmx2(dst + block_offset[i], block + i*16, stride); + else ff_h264_idct_add_mmx (dst + block_offset[i], block + i*16, stride); + } + } +} + +static void ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=0; i<16; i++){ + if(nnzc[ scan8[i] ] || block[i*16]) + ff_h264_idct_add_mmx(dst + block_offset[i], block + i*16, stride); + } +} + +static void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=0; i<16; i++){ + if(nnzc[ scan8[i] ]) ff_h264_idct_add_mmx (dst + block_offset[i], block + i*16, stride); + else if(block[i*16]) ff_h264_idct_dc_add_mmx2(dst + block_offset[i], block + i*16, stride); + } +} + +static void ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=0; i<16; i+=4){ + int nnz = nnzc[ scan8[i] ]; + if(nnz){ + if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_mmx2(dst + block_offset[i], block + i*16, stride); + else ff_h264_idct8_add_mmx (dst + block_offset[i], block + i*16, stride); + } + } +} + +static void ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=0; i<16; i+=4){ + int nnz = nnzc[ scan8[i] ]; + if(nnz){ + if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_mmx2(dst + block_offset[i], block + i*16, stride); + else ff_h264_idct8_add_sse2 (dst + block_offset[i], block + i*16, stride); + } + } +} + +static void ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=16; i<16+8; i++){ + if(nnzc[ scan8[i] ] || block[i*16]) + ff_h264_idct_add_mmx (dest[(i&4)>>2] + block_offset[i], block + i*16, stride); + } +} + +static void ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ + int i; + for(i=16; i<16+8; i++){ + if(nnzc[ scan8[i] ]) + ff_h264_idct_add_mmx (dest[(i&4)>>2] + block_offset[i], block + i*16, stride); + else if(block[i*16]) + ff_h264_idct_dc_add_mmx2(dest[(i&4)>>2] + block_offset[i], block + i*16, stride); + } +} /***********************************/ /* deblocking */