diff x86/snowdsp_mmx.c @ 11485:0f0cd6b5791f libavcodec

Separate DWT from snow and dsputil This moves the DWT functions from snow.c and dsputil.c to a file of their own. A new struct, DWTContext, holds the function pointers previously part of DSPContext.
author mru
date Sun, 14 Mar 2010 17:50:12 +0000
parents b57409c0c286
children 3fc4c625b6f3
line wrap: on
line diff
--- a/x86/snowdsp_mmx.c	Sun Mar 14 01:52:31 2010 +0000
+++ b/x86/snowdsp_mmx.c	Sun Mar 14 17:50:12 2010 +0000
@@ -22,9 +22,10 @@
 #include "libavutil/x86_cpu.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/snow.h"
+#include "libavcodec/dwt.h"
 #include "dsputil_mmx.h"
 
-void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
+static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
     const int w2= (width+1)>>1;
     DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1];
     const int w_l= (width>>1);
@@ -213,7 +214,7 @@
     }
 }
 
-void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
+static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
     const int w2= (width+1)>>1;
     IDWTELEM temp[width >> 1];
     const int w_l= (width>>1);
@@ -436,7 +437,7 @@
         "movdqa %%"s2", %%"t2" \n\t"\
         "movdqa %%"s3", %%"t3" \n\t"
 
-void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
+static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
     x86_reg i = width;
 
     while(i & 0x1F)
@@ -534,7 +535,7 @@
         "movq %%"s3", %%"t3" \n\t"
 
 
-void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
+static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
     x86_reg i = width;
     while(i & 15)
     {
@@ -847,7 +848,7 @@
 snow_inner_add_yblock_mmx_end("32")
 }
 
-void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
                            int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
 
     if (b_w == 16)
@@ -861,7 +862,7 @@
          ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
 }
 
-void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
                           int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
     if (b_w == 16)
         inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
@@ -870,3 +871,27 @@
     else
         ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
 }
+
+void ff_dwt_init_x86(DWTContext *c)
+{
+    mm_flags = mm_support();
+
+    if (mm_flags & FF_MM_MMX) {
+        if(mm_flags & FF_MM_SSE2 & 0){
+            c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
+#if HAVE_7REGS
+            c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
+#endif
+            c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
+        }
+        else{
+            if(mm_flags & FF_MM_MMX2){
+            c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
+#if HAVE_7REGS
+            c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
+#endif
+            }
+            c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
+        }
+    }
+}