Mercurial > libavcodec.hg
view h264dsp.c @ 12266:48d6738904a9 libavcodec
Fix SPLATB_REG mess. Used to be a if/elseif/elseif/elseif spaghetti, so this
splits it into small optimization-specific macros which are selected for each
DSP function. The advantage of this approach is that the sse4 functions now
use the ssse3 codepath also without needing an explicit sse4 codepath.
author | rbultje |
---|---|
date | Sat, 24 Jul 2010 19:33:05 +0000 |
parents | 7dd2a45249a9 |
children |
line wrap: on
line source
/* * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at> * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /** * @file * H.264 / AVC / MPEG4 part10 DSP functions. * @author Michael Niedermayer <michaelni@gmx.at> */ #include <stdint.h> #include "avcodec.h" #include "h264dsp.h" #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom ) #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) #define H264_WEIGHT(W,H) \ static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ int y; \ offset <<= log2_denom; \ if(log2_denom) offset += 1<<(log2_denom-1); \ for(y=0; y<H; y++, block += stride){ \ op_scale1(0); \ op_scale1(1); \ if(W==2) continue; \ op_scale1(2); \ op_scale1(3); \ if(W==4) continue; \ op_scale1(4); \ op_scale1(5); \ op_scale1(6); \ op_scale1(7); \ if(W==8) continue; \ op_scale1(8); \ op_scale1(9); \ op_scale1(10); \ op_scale1(11); \ op_scale1(12); \ op_scale1(13); \ op_scale1(14); \ op_scale1(15); \ } \ } \ static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ int y; \ offset = ((offset + 1) | 1) << log2_denom; \ for(y=0; y<H; y++, dst += stride, src += stride){ \ op_scale2(0); \ op_scale2(1); \ if(W==2) continue; \ op_scale2(2); \ op_scale2(3); \ if(W==4) continue; \ op_scale2(4); \ op_scale2(5); \ op_scale2(6); \ op_scale2(7); \ if(W==8) continue; \ op_scale2(8); \ op_scale2(9); \ op_scale2(10); \ op_scale2(11); \ op_scale2(12); \ op_scale2(13); \ op_scale2(14); \ op_scale2(15); \ } \ } H264_WEIGHT(16,16) H264_WEIGHT(16,8) H264_WEIGHT(8,16) H264_WEIGHT(8,8) H264_WEIGHT(8,4) H264_WEIGHT(4,8) H264_WEIGHT(4,4) H264_WEIGHT(4,2) H264_WEIGHT(2,4) H264_WEIGHT(2,2) #undef op_scale1 #undef op_scale2 #undef H264_WEIGHT static av_always_inline av_flatten void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) { int i, d; for( i = 0; i < 4; i++ ) { if( tc0[i] < 0 ) { pix += 4*ystride; continue; } for( d = 0; d < 4; d++ ) { const int p0 = pix[-1*xstride]; const int p1 = pix[-2*xstride]; const int p2 = pix[-3*xstride]; const int q0 = pix[0]; const int q1 = pix[1*xstride]; const int q2 = pix[2*xstride]; if( FFABS( p0 - q0 ) < alpha && FFABS( p1 - p0 ) < beta && FFABS( q1 - q0 ) < beta ) { int tc = tc0[i]; int i_delta; if( FFABS( p2 - p0 ) < beta ) { if(tc0[i]) pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); tc++; } if( FFABS( q2 - q0 ) < beta ) { if(tc0[i]) pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); tc++; } i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */ pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ } pix += ystride; } } } static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); } static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); } static av_always_inline av_flatten void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) { int d; for( d = 0; d < 16; d++ ) { const int p2 = pix[-3*xstride]; const int p1 = pix[-2*xstride]; const int p0 = pix[-1*xstride]; const int q0 = pix[ 0*xstride]; const int q1 = pix[ 1*xstride]; const int q2 = pix[ 2*xstride]; if( FFABS( p0 - q0 ) < alpha && FFABS( p1 - p0 ) < beta && FFABS( q1 - q0 ) < beta ) { if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ if( FFABS( p2 - p0 ) < beta) { const int p3 = pix[-4*xstride]; /* p0', p1', p2' */ pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; } else { /* p0' */ pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; } if( FFABS( q2 - q0 ) < beta) { const int q3 = pix[3*xstride]; /* q0', q1', q2' */ pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; } else { /* q0' */ pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; } }else{ /* p0', q0' */ pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; } } pix += ystride; } } static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) { h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta); } static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) { h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta); } static av_always_inline av_flatten void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) { int i, d; for( i = 0; i < 4; i++ ) { const int tc = tc0[i]; if( tc <= 0 ) { pix += 2*ystride; continue; } for( d = 0; d < 2; d++ ) { const int p0 = pix[-1*xstride]; const int p1 = pix[-2*xstride]; const int q0 = pix[0]; const int q1 = pix[1*xstride]; if( FFABS( p0 - q0 ) < alpha && FFABS( p1 - p0 ) < beta && FFABS( q1 - q0 ) < beta ) { int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */ pix[0] = av_clip_uint8( q0 - delta ); /* q0' */ } pix += ystride; } } } static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); } static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); } static av_always_inline av_flatten void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) { int d; for( d = 0; d < 8; d++ ) { const int p0 = pix[-1*xstride]; const int p1 = pix[-2*xstride]; const int q0 = pix[0]; const int q1 = pix[1*xstride]; if( FFABS( p0 - q0 ) < alpha && FFABS( p1 - p0 ) < beta && FFABS( q1 - q0 ) < beta ) { pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ } pix += ystride; } } static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) { h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); } static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) { h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); } void ff_h264dsp_init(H264DSPContext *c) { c->h264_idct_add= ff_h264_idct_add_c; c->h264_idct8_add= ff_h264_idct8_add_c; c->h264_idct_dc_add= ff_h264_idct_dc_add_c; c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; c->h264_idct_add16 = ff_h264_idct_add16_c; c->h264_idct8_add4 = ff_h264_idct8_add4_c; c->h264_idct_add8 = ff_h264_idct_add8_c; c->h264_idct_add16intra= ff_h264_idct_add16intra_c; c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c; c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c; c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; c->h264_loop_filter_strength= NULL; if (ARCH_ARM) ff_h264dsp_init_arm(c); if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c); if (HAVE_MMX) ff_h264dsp_init_x86(c); }