changeset 8541:16a315fdad0b libavcodec

add AltiVec implementation of weight_h264_pixels(16|8)x(16|8|4) Patch by David Conrad %lessen42 A gmail P com%
author gpoirier
date Tue, 06 Jan 2009 22:29:26 +0000
parents 1d9421a599ae
children 5a923bd4f5c6
files ppc/h264_altivec.c
diffstat 1 files changed, 52 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/ppc/h264_altivec.c	Tue Jan 06 22:01:57 2009 +0000
+++ b/ppc/h264_altivec.c	Tue Jan 06 22:29:26 2009 +0000
@@ -935,6 +935,50 @@
     write16x4(pix-2, stride, line1, line2, line3, line4);
 }
 
+static av_always_inline
+void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h)
+{
+    int y, aligned;
+    vec_u8 vblock;
+    vec_s16 vtemp, vweight, voffset, v0, v1;
+    vec_u16 vlog2_denom;
+    DECLARE_ALIGNED_16(int32_t, temp[4]);
+    LOAD_ZERO;
+
+    offset <<= log2_denom;
+    if(log2_denom) offset += 1<<(log2_denom-1);
+    temp[0] = log2_denom;
+    temp[1] = weight;
+    temp[2] = offset;
+
+    vtemp = (vec_s16)vec_ld(0, temp);
+    vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
+    vweight = vec_splat(vtemp, 3);
+    voffset = vec_splat(vtemp, 5);
+    aligned = !((unsigned long)block & 0xf);
+
+    for (y=0; y<h; y++) {
+        vblock = vec_ld(0, block);
+
+        v0 = (vec_s16)vec_mergeh(zero_u8v, vblock);
+        v1 = (vec_s16)vec_mergel(zero_u8v, vblock);
+
+        if (w == 16 || aligned) {
+            v0 = vec_mladd(v0, vweight, zero_s16v);
+            v0 = vec_adds(v0, voffset);
+            v0 = vec_sra(v0, vlog2_denom);
+        }
+        if (w == 16 || !aligned) {
+            v1 = vec_mladd(v1, vweight, zero_s16v);
+            v1 = vec_adds(v1, voffset);
+            v1 = vec_sra(v1, vlog2_denom);
+        }
+        vblock = vec_packsu(v0, v1);
+        vec_st(vblock, 0, block);
+
+        block += stride;
+    }
+}
 
 static av_always_inline
 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
@@ -1002,6 +1046,9 @@
 }
 
 #define H264_WEIGHT(W,H) \
+static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
+    weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \
+}\
 static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
     biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
 }
@@ -1051,6 +1098,11 @@
         dspfunc(avg_h264_qpel, 0, 16);
 #undef dspfunc
 
+        c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec;
+        c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec;
+        c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;
+        c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;
+        c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
         c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
         c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
         c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;