diff x86/h264dsp_mmx.c @ 10938:2a50b786b888 libavcodec

Fix h264_loop_filter_strength_mmx2() so it works with b frames.
author michael
date Tue, 19 Jan 2010 16:40:36 +0000
parents afb9e56fbee9
children 3a723e8dcd26
line wrap: on
line diff
--- a/x86/h264dsp_mmx.c	Tue Jan 19 14:28:19 2010 +0000
+++ b/x86/h264dsp_mmx.c	Tue Jan 19 16:40:36 2010 +0000
@@ -850,6 +850,40 @@
                           "m"(mv[l][b_idx+d_idx+2][0])
                     );
                 }
+                if(bidir==1){
+                    __asm__ volatile("pxor %%mm3, %%mm3 \n\t":);
+                    for( l = bidir; l >= 0; l-- ) {
+                    __asm__ volatile(
+                        "movd %0, %%mm1 \n\t"
+                        "punpckldq %1, %%mm1 \n\t"
+                        "punpckldq %%mm1, %%mm2 \n\t"
+                        "pcmpeqb %%mm2, %%mm1 \n\t"
+                        "paddb %%mm6, %%mm1 \n\t"
+                        "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
+                        "por %%mm1, %%mm3 \n\t"
+
+                        "movq %2, %%mm1 \n\t"
+                        "movq %3, %%mm2 \n\t"
+                        "psubw %4, %%mm1 \n\t"
+                        "psubw %5, %%mm2 \n\t"
+                        "packsswb %%mm2, %%mm1 \n\t"
+                        "paddb %%mm5, %%mm1 \n\t"
+                        "pminub %%mm4, %%mm1 \n\t"
+                        "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
+                        "por %%mm1, %%mm3 \n\t"
+                        ::"m"(ref[l][b_idx]),
+                          "m"(ref[1-l][b_idx+d_idx]),
+                          "m"(mv[l][b_idx][0]),
+                          "m"(mv[l][b_idx+2][0]),
+                          "m"(mv[1-l][b_idx+d_idx][0]),
+                          "m"(mv[1-l][b_idx+d_idx+2][0])
+                    );
+                    }
+                    __asm__ volatile(
+                        "pcmpeqw %%mm7, %%mm3 \n\t"
+                        "psubusw %%mm3, %%mm0 \n\t"
+                    :);
+                }
             }
             __asm__ volatile(
                 "movd %0, %%mm1 \n\t"