# HG changeset patch # User mru # Date 1268702220 0 # Node ID 2a4dc3c0b012d77498af6a0250e28510b668690d # Parent 8e889ce0d6165a2dd84f153e625d036f7d17342e Move H264 dsputil functions into their own struct This moves the H264-specific functions from DSPContext to the new H264DSPContext. The code is made conditional on CONFIG_H264DSP which is set by the codecs requiring it. The qpel and chroma MC functions are not moved as these are used by non-h264 code. diff -r 8e889ce0d616 -r 2a4dc3c0b012 Makefile --- a/Makefile Mon Mar 15 23:40:51 2010 +0000 +++ b/Makefile Tue Mar 16 01:17:00 2010 +0000 @@ -33,6 +33,7 @@ FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o OBJS-$(CONFIG_FFT) += avfft.o fft.o $(FFT-OBJS-yes) OBJS-$(CONFIG_GOLOMB) += golomb.o +OBJS-$(CONFIG_H264DSP) += h264dsp.o h264idct.o h264pred.o OBJS-$(CONFIG_LPC) += lpc.o OBJS-$(CONFIG_LSP) += lsp.o OBJS-$(CONFIG_MDCT) += mdct.o @@ -146,7 +147,7 @@ ratecontrol.o h263.o ituh263enc.o \ flvenc.o mpeg12data.o \ mpegvideo.o error_resilience.o -OBJS-$(CONFIG_H264_DECODER) += h264.o h264idct.o h264pred.o \ +OBJS-$(CONFIG_H264_DECODER) += h264.o \ h264_loopfilter.o h264_direct.o \ cabac.o h264_sei.o h264_ps.o \ h264_refs.o h264_cavlc.o h264_cabac.o\ @@ -282,9 +283,9 @@ OBJS-$(CONFIG_RV10_ENCODER) += rv10enc.o OBJS-$(CONFIG_RV20_DECODER) += rv10.o OBJS-$(CONFIG_RV20_ENCODER) += rv20enc.o -OBJS-$(CONFIG_RV30_DECODER) += rv30.o rv34.o h264pred.o rv30dsp.o \ +OBJS-$(CONFIG_RV30_DECODER) += rv30.o rv34.o rv30dsp.o \ mpegvideo.o error_resilience.o -OBJS-$(CONFIG_RV40_DECODER) += rv40.o rv34.o h264pred.o rv40dsp.o \ +OBJS-$(CONFIG_RV40_DECODER) += rv40.o rv34.o rv40dsp.o \ mpegvideo.o error_resilience.o OBJS-$(CONFIG_SGI_DECODER) += sgidec.o OBJS-$(CONFIG_SGI_ENCODER) += sgienc.o rle.o @@ -315,7 +316,7 @@ mpegvideo.o error_resilience.o \ ituh263enc.o mpegvideo_enc.o \ ratecontrol.o mpeg12data.o -OBJS-$(CONFIG_SVQ3_DECODER) += h264.o svq3.o h264idct.o h264pred.o \ +OBJS-$(CONFIG_SVQ3_DECODER) += h264.o svq3.o \ h264_loopfilter.o h264_direct.o \ h264_sei.o h264_ps.o h264_refs.o \ h264_cavlc.o h264_cabac.o cabac.o \ @@ -538,8 +539,8 @@ OBJS-$(CONFIG_DVDSUB_PARSER) += dvdsub_parser.o OBJS-$(CONFIG_H261_PARSER) += h261_parser.o OBJS-$(CONFIG_H263_PARSER) += h263_parser.o -OBJS-$(CONFIG_H264_PARSER) += h264_parser.o h264.o h264idct.o \ - h264pred.o cabac.o \ +OBJS-$(CONFIG_H264_PARSER) += h264_parser.o h264.o \ + cabac.o \ h264_refs.o h264_sei.o h264_direct.o \ h264_loopfilter.o h264_cabac.o \ h264_cavlc.o h264_ps.o \ @@ -631,13 +632,16 @@ alpha/mpegvideo_alpha.o \ alpha/simple_idct_alpha.o \ +ARM-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o \ + arm/h264pred_init_arm.o \ + OBJS-$(ARCH_ARM) += arm/dsputil_init_arm.o \ arm/dsputil_arm.o \ arm/fft_init_arm.o \ - arm/h264pred_init_arm.o \ arm/jrevdct_arm.o \ arm/mpegvideo_arm.o \ arm/simple_idct_arm.o \ + $(ARM-OBJS-yes) OBJS-$(HAVE_ARMV5TE) += arm/dsputil_init_armv5te.o \ arm/mpegvideo_armv5te.o \ @@ -658,7 +662,7 @@ NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \ -NEON-OBJS-$(CONFIG_H264_DECODER) += arm/h264dsp_neon.o \ +NEON-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_neon.o \ arm/h264idct_neon.o \ arm/h264pred_neon.o \ @@ -680,7 +684,7 @@ OBJS-$(ARCH_PPC) += ppc/dsputil_ppc.o \ -ALTIVEC-OBJS-$(CONFIG_H264_DECODER) += ppc/h264_altivec.o +ALTIVEC-OBJS-$(CONFIG_H264DSP) += ppc/h264_altivec.o ALTIVEC-OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_VP3_DECODER) += ppc/vp3dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_VP5_DECODER) += ppc/vp3dsp_altivec.o diff -r 8e889ce0d616 -r 2a4dc3c0b012 arm/dsputil_init_neon.c --- a/arm/dsputil_init_neon.c Mon Mar 15 23:40:51 2010 +0000 +++ b/arm/dsputil_init_neon.c Tue Mar 16 01:17:00 2010 +0000 @@ -131,69 +131,6 @@ void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); -void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, - int beta, int8_t *tc0); -void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, - int beta, int8_t *tc0); -void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, - int beta, int8_t *tc0); -void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, - int beta, int8_t *tc0); - -void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); -void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den, - int weight, int offset); - -void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); -void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride, - int log2_den, int weightd, int weights, - int offset); - -void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset, - DCTELEM *block, int stride, - const uint8_t nnzc[6*8]); -void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset, - DCTELEM *block, int stride, - const uint8_t nnzc[6*8]); -void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset, - DCTELEM *block, int stride, - const uint8_t nnzc[6*8]); - void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); @@ -352,35 +289,6 @@ c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon; c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon; c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; - - c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; - c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; - c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; - c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; - - c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon; - c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon; - c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon; - c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon; - c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon; - c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon; - c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon; - c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon; - - c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon; - c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon; - c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon; - c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon; - c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon; - c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon; - c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon; - c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon; - - c->h264_idct_add = ff_h264_idct_add_neon; - c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; - c->h264_idct_add16 = ff_h264_idct_add16_neon; - c->h264_idct_add16intra = ff_h264_idct_add16intra_neon; - c->h264_idct_add8 = ff_h264_idct_add8_neon; } if (CONFIG_VP3_DECODER) { diff -r 8e889ce0d616 -r 2a4dc3c0b012 arm/h264dsp_init_arm.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arm/h264dsp_init_arm.c Tue Mar 16 01:17:00 2010 +0000 @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2010 Mans Rullgard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavcodec/dsputil.h" +#include "libavcodec/h264dsp.h" + +void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); +void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); +void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); +void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); + +void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den, + int weight, int offset); +void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den, + int weight, int offset); +void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den, + int weight, int offset); +void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den, + int weight, int offset); +void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den, + int weight, int offset); +void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den, + int weight, int offset); +void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den, + int weight, int offset); +void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den, + int weight, int offset); + +void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride, + int log2_den, int weightd, int weights, + int offset); +void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride, + int log2_den, int weightd, int weights, + int offset); +void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride, + int log2_den, int weightd, int weights, + int offset); +void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride, + int log2_den, int weightd, int weights, + int offset); +void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride, + int log2_den, int weightd, int weights, + int offset); +void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride, + int log2_den, int weightd, int weights, + int offset); +void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride, + int log2_den, int weightd, int weights, + int offset); +void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride, + int log2_den, int weightd, int weights, + int offset); + +void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); +void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); +void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset, + DCTELEM *block, int stride, + const uint8_t nnzc[6*8]); +void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset, + DCTELEM *block, int stride, + const uint8_t nnzc[6*8]); +void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset, + DCTELEM *block, int stride, + const uint8_t nnzc[6*8]); + +#if HAVE_NEON +static void ff_h264dsp_init_neon(H264DSPContext *c) +{ + c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; + c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; + c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; + c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; + + c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon; + c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon; + c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon; + c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon; + c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon; + c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon; + c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon; + c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon; + + c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon; + c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon; + c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon; + c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon; + c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon; + c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon; + c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon; + c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon; + + c->h264_idct_add = ff_h264_idct_add_neon; + c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; + c->h264_idct_add16 = ff_h264_idct_add16_neon; + c->h264_idct_add16intra = ff_h264_idct_add16intra_neon; + c->h264_idct_add8 = ff_h264_idct_add8_neon; +} +#endif + +void ff_h264dsp_init_arm(H264DSPContext *c) +{ + if (HAVE_NEON) ff_h264dsp_init_neon(c); +} diff -r 8e889ce0d616 -r 2a4dc3c0b012 dsputil.c --- a/dsputil.c Mon Mar 15 23:40:51 2010 +0000 +++ b/dsputil.c Tue Mar 16 01:17:00 2010 +0000 @@ -2597,76 +2597,6 @@ #undef op2_put #endif -#define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom ) -#define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) -#define H264_WEIGHT(W,H) \ -static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ - int y; \ - offset <<= log2_denom; \ - if(log2_denom) offset += 1<<(log2_denom-1); \ - for(y=0; y> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); - tc++; - } - if( FFABS( q2 - q0 ) < beta ) { - if(tc0[i]) - pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); - tc++; - } - - i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); - pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */ - pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ - } - pix += ystride; - } - } -} -static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) -{ - h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); -} -static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) -{ - h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); -} - -static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) -{ - int d; - for( d = 0; d < 16; d++ ) { - const int p2 = pix[-3*xstride]; - const int p1 = pix[-2*xstride]; - const int p0 = pix[-1*xstride]; - - const int q0 = pix[ 0*xstride]; - const int q1 = pix[ 1*xstride]; - const int q2 = pix[ 2*xstride]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - - if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ - if( FFABS( p2 - p0 ) < beta) - { - const int p3 = pix[-4*xstride]; - /* p0', p1', p2' */ - pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; - pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; - pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; - } else { - /* p0' */ - pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; - } - if( FFABS( q2 - q0 ) < beta) - { - const int q3 = pix[3*xstride]; - /* q0', q1', q2' */ - pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; - pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; - pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; - } else { - /* q0' */ - pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; - } - }else{ - /* p0', q0' */ - pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; - pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; - } - } - pix += ystride; - } -} -static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) -{ - h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta); -} -static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) -{ - h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta); -} - -static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) -{ - int i, d; - for( i = 0; i < 4; i++ ) { - const int tc = tc0[i]; - if( tc <= 0 ) { - pix += 2*ystride; - continue; - } - for( d = 0; d < 2; d++ ) { - const int p0 = pix[-1*xstride]; - const int p1 = pix[-2*xstride]; - const int q0 = pix[0]; - const int q1 = pix[1*xstride]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - - int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); - - pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */ - pix[0] = av_clip_uint8( q0 - delta ); /* q0' */ - } - pix += ystride; - } - } -} -static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) -{ - h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); -} -static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) -{ - h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); -} - -static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) -{ - int d; - for( d = 0; d < 8; d++ ) { - const int p0 = pix[-1*xstride]; - const int p1 = pix[-2*xstride]; - const int q0 = pix[0]; - const int q1 = pix[1*xstride]; - - if( FFABS( p0 - q0 ) < alpha && - FFABS( p1 - p0 ) < beta && - FFABS( q1 - q0 ) < beta ) { - - pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ - pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ - } - pix += ystride; - } -} -static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) -{ - h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); -} -static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) -{ - h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); -} - static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int s, i; @@ -4502,17 +4256,6 @@ } } - if (CONFIG_H264_DECODER) { - c->h264_idct_add= ff_h264_idct_add_c; - c->h264_idct8_add= ff_h264_idct8_add_c; - c->h264_idct_dc_add= ff_h264_idct_dc_add_c; - c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; - c->h264_idct_add16 = ff_h264_idct_add16_c; - c->h264_idct8_add4 = ff_h264_idct8_add4_c; - c->h264_idct_add8 = ff_h264_idct_add8_c; - c->h264_idct_add16intra= ff_h264_idct_add16intra_c; - } - c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; c->put_pixels_clamped = put_pixels_clamped_c; @@ -4635,27 +4378,6 @@ c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c; c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c; - c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; - c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; - c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; - c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; - c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; - c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; - c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; - c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; - c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; - c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; - c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; - c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; - c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; - c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; - c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; - c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; - c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; - c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; - c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; - c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; - c->draw_edges = draw_edges_c; #if CONFIG_CAVS_DECODER @@ -4737,16 +4459,6 @@ c->add_png_paeth_prediction= ff_add_png_paeth_prediction; #endif - c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; - c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; - c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c; - c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c; - c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; - c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; - c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; - c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; - c->h264_loop_filter_strength= NULL; - if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { c->h263_h_loop_filter= h263_h_loop_filter_c; c->h263_v_loop_filter= h263_v_loop_filter_c; diff -r 8e889ce0d616 -r 2a4dc3c0b012 dsputil.h --- a/dsputil.h Mon Mar 15 23:40:51 2010 +0000 +++ b/dsputil.h Tue Mar 16 01:17:00 2010 +0000 @@ -149,8 +149,6 @@ typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); -typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); -typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h); @@ -340,9 +338,6 @@ qpel_mc_func put_2tap_qpel_pixels_tab[4][16]; qpel_mc_func avg_2tap_qpel_pixels_tab[4][16]; - h264_weight_func weight_h264_pixels_tab[10]; - h264_biweight_func biweight_h264_pixels_tab[10]; - /* AVS specific */ qpel_mc_func put_cavs_qpel_pixels_tab[2][16]; qpel_mc_func avg_cavs_qpel_pixels_tab[2][16]; @@ -370,19 +365,6 @@ void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); - void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); - /* v/h_loop_filter_luma_intra: align 16 */ - void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - // h264_loop_filter_strength: simd only. the C version is inlined in h264.c - void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], - int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); - void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); @@ -517,21 +499,6 @@ void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w); #define EDGE_WIDTH 16 - /* h264 functions */ - /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them - NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them - The reason for above, is that no 2 out of one list may use a different permutation. - */ - void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_dct)(DCTELEM block[4][4]); - void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*prefetch)(void *mem, int stride, int h); void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); diff -r 8e889ce0d616 -r 2a4dc3c0b012 h264.c --- a/h264.c Mon Mar 15 23:40:51 2010 +0000 +++ b/h264.c Tue Mar 16 01:17:00 2010 +0000 @@ -678,7 +678,7 @@ static void init_dequant8_coeff_table(H264Context *h){ int i,q,x; - const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly + const int transpose = (h->h264dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly h->dequant8_coeff[0] = h->dequant8_buffer[0]; h->dequant8_coeff[1] = h->dequant8_buffer[1]; @@ -701,7 +701,7 @@ static void init_dequant4_coeff_table(H264Context *h){ int i,j,q,x; - const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly + const int transpose = (h->h264dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly for(i=0; i<6; i++ ){ h->dequant4_coeff[i] = h->dequant4_buffer[i]; for(j=0; jheight = s->avctx->height; s->codec_id= s->avctx->codec->id; + ff_h264dsp_init(&h->h264dsp); ff_h264_pred_init(&h->hpc, s->codec_id); h->dequant_coeff_pps= -1; @@ -1159,8 +1160,8 @@ idct_dc_add = idct_add = s->dsp.add_pixels8; }else{ - idct_dc_add = s->dsp.h264_idct8_dc_add; - idct_add = s->dsp.h264_idct8_add; + idct_dc_add = h->h264dsp.h264_idct8_dc_add; + idct_add = h->h264dsp.h264_idct8_add; } for(i=0; i<16; i+=4){ uint8_t * const ptr= dest_y + block_offset[i]; @@ -1184,8 +1185,8 @@ idct_dc_add = idct_add = s->dsp.add_pixels4; }else{ - idct_dc_add = s->dsp.h264_idct_dc_add; - idct_add = s->dsp.h264_idct_add; + idct_dc_add = h->h264dsp.h264_idct_dc_add; + idct_add = h->h264dsp.h264_idct_add; } for(i=0; i<16; i++){ uint8_t * const ptr= dest_y + block_offset[i]; @@ -1236,7 +1237,7 @@ hl_motion(h, dest_y, dest_cb, dest_cr, s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, - s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab); + h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab); } @@ -1253,7 +1254,7 @@ } } }else{ - s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); + h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); } }else if(h->cbp&15){ if(transform_bypass){ @@ -1266,9 +1267,9 @@ } }else{ if(IS_8x8DCT(mb_type)){ - s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); + h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); }else{ - s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); + h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); } } } @@ -1299,8 +1300,8 @@ chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); if(is_h264){ - idct_add = s->dsp.h264_idct_add; - idct_dc_add = s->dsp.h264_idct_dc_add; + idct_add = h->h264dsp.h264_idct_add; + idct_dc_add = h->h264dsp.h264_idct_dc_add; for(i=16; i<16+8; i++){ if(h->non_zero_count_cache[ scan8[i] ]) idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); @@ -1560,7 +1561,7 @@ static void init_scan_tables(H264Context *h){ MpegEncContext * const s = &h->s; int i; - if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly + if(h->h264dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t)); memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t)); }else{ @@ -1571,7 +1572,7 @@ #undef T } } - if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){ + if(h->h264dsp.h264_idct8_add == ff_h264_idct8_add_c){ memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t)); memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t)); memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t)); @@ -3003,7 +3004,7 @@ } // printf("\n"); - s->dsp.h264_idct_add(ref, block, 4); + h->h264dsp.h264_idct_add(ref, block, 4); /* for(j=0; j<16; j++){ printf("%d ", ref[j]); } diff -r 8e889ce0d616 -r 2a4dc3c0b012 h264.h --- a/h264.h Mon Mar 15 23:40:51 2010 +0000 +++ b/h264.h Tue Mar 16 01:17:00 2010 +0000 @@ -32,6 +32,7 @@ #include "dsputil.h" #include "cabac.h" #include "mpegvideo.h" +#include "h264dsp.h" #include "h264pred.h" #include "rectangle.h" @@ -262,6 +263,7 @@ */ typedef struct H264Context{ MpegEncContext s; + H264DSPContext h264dsp; int chroma_qp[2]; //QPc int qp_thresh; ///< QP threshold to skip loopfilter diff -r 8e889ce0d616 -r 2a4dc3c0b012 h264_loopfilter.c --- a/h264_loopfilter.c Mon Mar 15 23:40:51 2010 +0000 +++ b/h264_loopfilter.c Tue Mar 16 01:17:00 2010 +0000 @@ -112,9 +112,9 @@ tc[1] = tc0_table[index_a][bS[1]]; tc[2] = tc0_table[index_a][bS[2]]; tc[3] = tc0_table[index_a][bS[3]]; - h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); + h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); } else { - h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); + h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); } } static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { @@ -129,9 +129,9 @@ tc[1] = tc0_table[index_a][bS[1]]+1; tc[2] = tc0_table[index_a][bS[2]]+1; tc[3] = tc0_table[index_a][bS[3]]+1; - h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); + h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); } else { - h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); + h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); } } @@ -282,9 +282,9 @@ tc[1] = tc0_table[index_a][bS[1]]; tc[2] = tc0_table[index_a][bS[2]]; tc[3] = tc0_table[index_a][bS[3]]; - h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); + h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); } else { - h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); + h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); } } @@ -300,9 +300,9 @@ tc[1] = tc0_table[index_a][bS[1]]+1; tc[2] = tc0_table[index_a][bS[2]]+1; tc[3] = tc0_table[index_a][bS[3]]+1; - h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); + h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); } else { - h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); + h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); } } @@ -314,7 +314,7 @@ mb_xy = h->mb_xy; - if(!h->top_type || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { + if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); return; } @@ -381,7 +381,7 @@ int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0; int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1; edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; - s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, + h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); } if( IS_INTRA(left_type) ) diff -r 8e889ce0d616 -r 2a4dc3c0b012 h264dsp.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/h264dsp.c Tue Mar 16 01:17:00 2010 +0000 @@ -0,0 +1,320 @@ +/* + * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder + * Copyright (c) 2003-2010 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file libavcodec/h264dsp.c + * H.264 / AVC / MPEG4 part10 DSP functions. + * @author Michael Niedermayer + */ + +#include +#include "avcodec.h" +#include "h264dsp.h" + +#define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom ) +#define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) +#define H264_WEIGHT(W,H) \ +static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ + int y; \ + offset <<= log2_denom; \ + if(log2_denom) offset += 1<<(log2_denom-1); \ + for(y=0; y> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); + tc++; + } + if( FFABS( q2 - q0 ) < beta ) { + if(tc0[i]) + pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); + tc++; + } + + i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */ + pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ + } + pix += ystride; + } + } +} +static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); +} +static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); +} + +static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) +{ + int d; + for( d = 0; d < 16; d++ ) { + const int p2 = pix[-3*xstride]; + const int p1 = pix[-2*xstride]; + const int p0 = pix[-1*xstride]; + + const int q0 = pix[ 0*xstride]; + const int q1 = pix[ 1*xstride]; + const int q2 = pix[ 2*xstride]; + + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { + + if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ + if( FFABS( p2 - p0 ) < beta) + { + const int p3 = pix[-4*xstride]; + /* p0', p1', p2' */ + pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; + pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; + pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; + } else { + /* p0' */ + pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + } + if( FFABS( q2 - q0 ) < beta) + { + const int q3 = pix[3*xstride]; + /* q0', q1', q2' */ + pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; + pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; + pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; + } else { + /* q0' */ + pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + }else{ + /* p0', q0' */ + pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + } + pix += ystride; + } +} +static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) +{ + h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta); +} +static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) +{ + h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta); +} + +static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) +{ + int i, d; + for( i = 0; i < 4; i++ ) { + const int tc = tc0[i]; + if( tc <= 0 ) { + pix += 2*ystride; + continue; + } + for( d = 0; d < 2; d++ ) { + const int p0 = pix[-1*xstride]; + const int p1 = pix[-2*xstride]; + const int q0 = pix[0]; + const int q1 = pix[1*xstride]; + + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { + + int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + + pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */ + pix[0] = av_clip_uint8( q0 - delta ); /* q0' */ + } + pix += ystride; + } + } +} +static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); +} +static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); +} + +static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) +{ + int d; + for( d = 0; d < 8; d++ ) { + const int p0 = pix[-1*xstride]; + const int p1 = pix[-2*xstride]; + const int q0 = pix[0]; + const int q1 = pix[1*xstride]; + + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { + + pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ + } + pix += ystride; + } +} +static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) +{ + h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); +} +static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) +{ + h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); +} + +void ff_h264dsp_init(H264DSPContext *c) +{ + c->h264_idct_add= ff_h264_idct_add_c; + c->h264_idct8_add= ff_h264_idct8_add_c; + c->h264_idct_dc_add= ff_h264_idct_dc_add_c; + c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; + c->h264_idct_add16 = ff_h264_idct_add16_c; + c->h264_idct8_add4 = ff_h264_idct8_add4_c; + c->h264_idct_add8 = ff_h264_idct_add8_c; + c->h264_idct_add16intra= ff_h264_idct_add16intra_c; + + c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; + c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; + c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; + c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; + c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; + c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; + c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; + c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; + c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; + c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; + c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; + c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; + c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; + c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; + c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; + c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; + c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; + c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; + c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; + c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; + + c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; + c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; + c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c; + c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c; + c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; + c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; + c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; + c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; + c->h264_loop_filter_strength= NULL; + + if (ARCH_ARM) ff_h264dsp_init_arm(c); + if (ARCH_PPC) ff_h264dsp_init_ppc(c); + if (HAVE_MMX) ff_h264dsp_init_x86(c); +} diff -r 8e889ce0d616 -r 2a4dc3c0b012 h264dsp.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/h264dsp.h Tue Mar 16 01:17:00 2010 +0000 @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2003-2010 Michael Niedermayer + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file libavcodec/h264dsp.h + * H.264 DSP functions. + * @author Michael Niedermayer + */ + +#ifndef AVCODEC_H264DSP_H +#define AVCODEC_H264DSP_H + +#include +#include "dsputil.h" + +//typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); +typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); +typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); + +/** + * Context for storing H.264 DSP functions + */ +typedef struct H264DSPContext{ + /* weighted MC */ + h264_weight_func weight_h264_pixels_tab[10]; + h264_biweight_func biweight_h264_pixels_tab[10]; + + /* loop filter */ + void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); + /* v/h_loop_filter_luma_intra: align 16 */ + void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); + void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); + void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); + // h264_loop_filter_strength: simd only. the C version is inlined in h264.c + void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], + int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); + + /* IDCT */ + /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them + NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them + The reason for above, is that no 2 out of one list may use a different permutation. + */ + void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); + void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); + void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); + void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); + void (*h264_dct)(DCTELEM block[4][4]); + void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); + void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); + void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); + void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); +}H264DSPContext; + +void ff_h264dsp_init(H264DSPContext *c); +void ff_h264dsp_init_arm(H264DSPContext *c); +void ff_h264dsp_init_ppc(H264DSPContext *c); +void ff_h264dsp_init_x86(H264DSPContext *c); + +#endif /* AVCODEC_H264DSP_H */ diff -r 8e889ce0d616 -r 2a4dc3c0b012 ppc/h264_altivec.c --- a/ppc/h264_altivec.c Mon Mar 15 23:40:51 2010 +0000 +++ b/ppc/h264_altivec.c Tue Mar 16 01:17:00 2010 +0000 @@ -20,6 +20,7 @@ #include "libavcodec/dsputil.h" #include "libavcodec/h264data.h" +#include "libavcodec/h264dsp.h" #include "dsputil_ppc.h" #include "dsputil_altivec.h" @@ -974,16 +975,6 @@ c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; c->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec; c->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_altivec; - c->h264_idct_add = ff_h264_idct_add_altivec; - c->h264_idct_add8 = ff_h264_idct_add8_altivec; - c->h264_idct_add16 = ff_h264_idct_add16_altivec; - c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec; - c->h264_idct_dc_add= h264_idct_dc_add_altivec; - c->h264_idct8_dc_add = ff_h264_idct8_dc_add_altivec; - c->h264_idct8_add = ff_h264_idct8_add_altivec; - c->h264_idct8_add4 = ff_h264_idct8_add4_altivec; - c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec; - c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec; #define dspfunc(PFX, IDX, NUM) \ c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \ @@ -1006,6 +997,22 @@ dspfunc(put_h264_qpel, 0, 16); dspfunc(avg_h264_qpel, 0, 16); #undef dspfunc + } +} + +void ff_h264dsp_init_ppc(H264DSPContext *c) +{ + if (has_altivec()) { + c->h264_idct_add = ff_h264_idct_add_altivec; + c->h264_idct_add8 = ff_h264_idct_add8_altivec; + c->h264_idct_add16 = ff_h264_idct_add16_altivec; + c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec; + c->h264_idct_dc_add= h264_idct_dc_add_altivec; + c->h264_idct8_dc_add = ff_h264_idct8_dc_add_altivec; + c->h264_idct8_add = ff_h264_idct8_add_altivec; + c->h264_idct8_add4 = ff_h264_idct8_add4_altivec; + c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec; + c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec; c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec; diff -r 8e889ce0d616 -r 2a4dc3c0b012 x86/dsputil_mmx.c --- a/x86/dsputil_mmx.c Mon Mar 15 23:40:51 2010 +0000 +++ b/x86/dsputil_mmx.c Tue Mar 16 01:17:00 2010 +0000 @@ -24,6 +24,7 @@ #include "libavutil/x86_cpu.h" #include "libavcodec/dsputil.h" +#include "libavcodec/h264dsp.h" #include "libavcodec/mpegvideo.h" #include "libavcodec/simple_idct.h" #include "dsputil_mmx.h" @@ -2618,16 +2619,6 @@ c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx; c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx; - c->h264_idct_dc_add= - c->h264_idct_add= ff_h264_idct_add_mmx; - c->h264_idct8_dc_add= - c->h264_idct8_add= ff_h264_idct8_add_mmx; - - c->h264_idct_add16 = ff_h264_idct_add16_mmx; - c->h264_idct8_add4 = ff_h264_idct8_add4_mmx; - c->h264_idct_add8 = ff_h264_idct_add8_mmx; - c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx; - if (CONFIG_VP6_DECODER) { c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; } @@ -2649,13 +2640,6 @@ c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; - c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; - c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; - c->h264_idct_add16 = ff_h264_idct_add16_mmx2; - c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2; - c->h264_idct_add8 = ff_h264_idct_add8_mmx2; - c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; - if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; @@ -2716,31 +2700,6 @@ c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2; c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2; c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2; - c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2; - c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2; - c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2; - c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2; - c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2; - c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2; - c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; - - c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; - c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; - c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2; - c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2; - c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2; - c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2; - c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2; - c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2; - - c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2; - c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2; - c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2; - c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2; - c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; - c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; - c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; - c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; #if HAVE_YASM c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; @@ -2825,9 +2784,6 @@ H264_QPEL_FUNCS(0, 0, sse2); } if(mm_flags & FF_MM_SSE2){ - c->h264_idct8_add = ff_h264_idct8_add_sse2; - c->h264_idct8_add4= ff_h264_idct8_add4_sse2; - H264_QPEL_FUNCS(0, 1, sse2); H264_QPEL_FUNCS(0, 2, sse2); H264_QPEL_FUNCS(0, 3, sse2); @@ -2874,26 +2830,6 @@ } #endif -#if CONFIG_GPL && HAVE_YASM - if (mm_flags & FF_MM_MMX2){ -#if ARCH_X86_32 - c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; - c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; -#endif - if( mm_flags&FF_MM_SSE2 ){ -#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110 - c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; - c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; - c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; - c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; -#endif - c->h264_idct_add16 = ff_h264_idct_add16_sse2; - c->h264_idct_add8 = ff_h264_idct_add8_sse2; - c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; - } - } -#endif - if(mm_flags & FF_MM_3DNOW){ c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; c->vector_fmul = vector_fmul_3dnow; @@ -2983,3 +2919,81 @@ //ff_idct = just_return; #endif } + +#if CONFIG_H264DSP +void ff_h264dsp_init_x86(H264DSPContext *c) +{ + mm_flags = mm_support(); + + if (mm_flags & FF_MM_MMX) { + c->h264_idct_dc_add= + c->h264_idct_add= ff_h264_idct_add_mmx; + c->h264_idct8_dc_add= + c->h264_idct8_add= ff_h264_idct8_add_mmx; + + c->h264_idct_add16 = ff_h264_idct_add16_mmx; + c->h264_idct8_add4 = ff_h264_idct8_add4_mmx; + c->h264_idct_add8 = ff_h264_idct_add8_mmx; + c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx; + + if (mm_flags & FF_MM_MMX2) { + c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; + c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; + c->h264_idct_add16 = ff_h264_idct_add16_mmx2; + c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2; + c->h264_idct_add8 = ff_h264_idct_add8_mmx2; + c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; + + c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2; + c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2; + c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2; + c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2; + c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2; + c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2; + c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; + + c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; + c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; + c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2; + c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2; + c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2; + c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2; + c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2; + c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2; + + c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2; + c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2; + c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2; + c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2; + c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; + c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; + c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; + c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; + } + if(mm_flags & FF_MM_SSE2){ + c->h264_idct8_add = ff_h264_idct8_add_sse2; + c->h264_idct8_add4= ff_h264_idct8_add4_sse2; + } + +#if CONFIG_GPL && HAVE_YASM + if (mm_flags & FF_MM_MMX2){ +#if ARCH_X86_32 + c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; + c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; +#endif + if( mm_flags&FF_MM_SSE2 ){ +#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110 + c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; + c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; + c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; + c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; +#endif + c->h264_idct_add16 = ff_h264_idct_add16_sse2; + c->h264_idct_add8 = ff_h264_idct_add8_sse2; + c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; + } + } +#endif + } +} +#endif /* CONFIG_H264DSP */