diff i386/dsputil_mmx.c @ 6336:ef3fb5a7e275 libavcodec

sse2 h264 motion compensation. not new code, just separate out the cases that didn't need ssse3.
author lorenm
date Wed, 06 Feb 2008 12:32:31 +0000
parents 950811a14eb3
children 0a403ade8c81
line wrap: on
line diff
--- a/i386/dsputil_mmx.c	Wed Feb 06 04:44:21 2008 +0000
+++ b/i386/dsputil_mmx.c	Wed Feb 06 12:32:31 2008 +0000
@@ -3523,20 +3523,48 @@
             c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
         }
 
-/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma
+
+#define H264_QPEL_FUNCS(x, y, CPU)\
+            c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\
+            c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\
+            c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\
+            c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
         if((mm_flags & MM_SSE2) && !(mm_flags & MM_3DNOW)){
             // these functions are slower than mmx on AMD, but faster on Intel
+/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma
             c->put_pixels_tab[0][0] = put_pixels16_sse2;
             c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
+*/
+            H264_QPEL_FUNCS(0, 0, sse2);
         }
-*/
-
+        if(mm_flags & MM_SSE2){
+            H264_QPEL_FUNCS(0, 1, sse2);
+            H264_QPEL_FUNCS(0, 2, sse2);
+            H264_QPEL_FUNCS(0, 3, sse2);
+            H264_QPEL_FUNCS(1, 1, sse2);
+            H264_QPEL_FUNCS(1, 2, sse2);
+            H264_QPEL_FUNCS(1, 3, sse2);
+            H264_QPEL_FUNCS(2, 1, sse2);
+            H264_QPEL_FUNCS(2, 2, sse2);
+            H264_QPEL_FUNCS(2, 3, sse2);
+            H264_QPEL_FUNCS(3, 1, sse2);
+            H264_QPEL_FUNCS(3, 2, sse2);
+            H264_QPEL_FUNCS(3, 3, sse2);
+        }
 #ifdef HAVE_SSSE3
         if(mm_flags & MM_SSSE3){
-            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, ssse3);
-            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, ssse3);
-            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, ssse3);
-            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, ssse3);
+            H264_QPEL_FUNCS(1, 0, ssse3);
+            H264_QPEL_FUNCS(1, 1, ssse3);
+            H264_QPEL_FUNCS(1, 2, ssse3);
+            H264_QPEL_FUNCS(1, 3, ssse3);
+            H264_QPEL_FUNCS(2, 0, ssse3);
+            H264_QPEL_FUNCS(2, 1, ssse3);
+            H264_QPEL_FUNCS(2, 2, ssse3);
+            H264_QPEL_FUNCS(2, 3, ssse3);
+            H264_QPEL_FUNCS(3, 0, ssse3);
+            H264_QPEL_FUNCS(3, 1, ssse3);
+            H264_QPEL_FUNCS(3, 2, ssse3);
+            H264_QPEL_FUNCS(3, 3, ssse3);
         }
 #endif