changeset 12366:09a31ef6ed58 libavcodec

H.264: SSE2/SSSE3 weighted prediction asm Patch by Eli Friedman <eli.friedman at gmail dot com>
author darkshikari
date Thu, 05 Aug 2010 00:13:38 +0000
parents 7c54834209f6
children 06bdd447f4f7
files x86/Makefile x86/dsputil_mmx.c x86/h264dsp_mmx.c
diffstat 3 files changed, 23 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/x86/Makefile	Wed Aug 04 23:04:05 2010 +0000
+++ b/x86/Makefile	Thu Aug 05 00:13:38 2010 +0000
@@ -10,6 +10,7 @@
 
 YASM-OBJS-$(CONFIG_H264DSP)            += x86/h264_deblock_sse2.o       \
                                           x86/h264_intrapred.o          \
+                                          x86/h264_weight_sse2.o        \
 
 YASM-OBJS-$(CONFIG_VC1_DECODER)        += x86/vc1dsp_yasm.o
 
--- a/x86/dsputil_mmx.c	Wed Aug 04 23:04:05 2010 +0000
+++ b/x86/dsputil_mmx.c	Thu Aug 05 00:13:38 2010 +0000
@@ -3000,6 +3000,8 @@
             c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
 #endif
             if( mm_flags&FF_MM_SSE2 ){
+                c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;
+                c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
 #if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
                 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
                 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
@@ -3012,6 +3014,10 @@
                 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
 #endif
             }
+            if ( mm_flags&FF_MM_SSSE3 ){
+                c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
+                c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
+            }
         }
 #endif
     }
--- a/x86/h264dsp_mmx.c	Wed Aug 04 23:04:05 2010 +0000
+++ b/x86/h264dsp_mmx.c	Thu Aug 05 00:13:38 2010 +0000
@@ -2323,6 +2323,22 @@
 H264_WEIGHT( 4, 4)
 H264_WEIGHT( 4, 2)
 
+void ff_h264_biweight_8x8_sse2(uint8_t *dst, uint8_t *src, int stride,
+                               int log2_denom, int weightd, int weights,
+                               int offset);
+
+void ff_h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride,
+                                 int log2_denom, int weightd, int weights,
+                                 int offset);
+
+void ff_h264_biweight_8x8_ssse3(uint8_t *dst, uint8_t *src, int stride,
+                                int log2_denom, int weightd, int weights,
+                                int offset);
+
+void ff_h264_biweight_16x16_ssse3(uint8_t *dst, uint8_t *src, int stride,
+                                  int log2_denom, int weightd, int weights,
+                                  int offset);
+
 void ff_pred16x16_vertical_mmx     (uint8_t *src, int stride);
 void ff_pred16x16_vertical_sse     (uint8_t *src, int stride);
 void ff_pred16x16_horizontal_mmx   (uint8_t *src, int stride);