changeset 8623:8f9f86b93f88 libavcodec

Faster VC-1 C loopfilter using lots of xor magic
author darkshikari
date Sun, 18 Jan 2009 07:14:36 +0000
parents a754939aea51
children b1663f732e67
files vc1.c
diffstat 1 files changed, 27 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/vc1.c	Sun Jan 18 05:52:20 2009 +0000
+++ b/vc1.c	Sun Jan 18 07:14:36 2009 +0000
@@ -314,30 +314,39 @@
  * @return whether other 3 pairs should be filtered or not
  * @see 8.6
  */
-static int vc1_filter_line(uint8_t* src, int stride, int pq){
-    int a0, a1, a2, a3, d, clip, filt3 = 0;
+static int av_always_inline vc1_filter_line(uint8_t* src, int stride, int pq){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
-    a0     = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3;
-    if(FFABS(a0) < pq){
-        a1 = (2*(src[-4*stride] - src[-1*stride]) - 5*(src[-3*stride] - src[-2*stride]) + 4) >> 3;
-        a2 = (2*(src[ 0*stride] - src[ 3*stride]) - 5*(src[ 1*stride] - src[ 2*stride]) + 4) >> 3;
-        a3 = FFMIN(FFABS(a1), FFABS(a2));
-        if(a3 < FFABS(a0)){
-            d = 5 * ((a0 >=0 ? a3 : -a3) - a0) / 8;
-            clip = (src[-1*stride] - src[ 0*stride])/2;
+    int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3;
+    int a0_sign = a0 >> 31;        /* Store sign */
+    a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
+    if(a0 < pq){
+        int a1 = FFABS((2*(src[-4*stride] - src[-1*stride]) - 5*(src[-3*stride] - src[-2*stride]) + 4) >> 3);
+        int a2 = FFABS((2*(src[ 0*stride] - src[ 3*stride]) - 5*(src[ 1*stride] - src[ 2*stride]) + 4) >> 3);
+        if(a1 < a0 || a2 < a0){
+            int clip = src[-1*stride] - src[ 0*stride];
+            int clip_sign = clip >> 31;
+            clip = ((clip ^ clip_sign) - clip_sign)>>1;
             if(clip){
-                filt3 = 1;
-                if(clip > 0)
-                    d = av_clip(d, 0, clip);
-                else
-                    d = av_clip(d, clip, 0);
-                src[-1*stride] = cm[src[-1*stride] - d];
-                src[ 0*stride] = cm[src[ 0*stride] + d];
+                int a3 = FFMIN(a1, a2);
+                int d = 5 * (a3 - a0);
+                int d_sign = (d >> 31);
+                d = ((d ^ d_sign) - d_sign) >> 3;
+                d_sign ^= a0_sign;
+
+                if( (d_sign ^ clip_sign) | ~d )
+                    d = 0;
+                else{
+                    d = FFMIN(d, clip);
+                    d = (d ^ d_sign) - d_sign;          /* Restore sign */
+                    src[-1*stride] = cm[src[-1*stride] - d];
+                    src[ 0*stride] = cm[src[ 0*stride] + d];
+                }
+                return 1;
             }
         }
     }
-    return filt3;
+    return 0;
 }
 
 /**