changeset 2057:4c663228e020 libavcodec

avg_pixels8_xy2_altivec in AltiVec, enabling avg_pixels8_altivec, hadamard fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michael
date Sat, 05 Jun 2004 22:29:37 +0000
parents 31bf68b1792b
children 781ba1c85d46
files ppc/dsputil_altivec.c ppc/dsputil_altivec.h ppc/dsputil_ppc.c ppc/dsputil_ppc.h
diffstat 4 files changed, 114 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/ppc/dsputil_altivec.c	Sat Jun 05 22:26:03 2004 +0000
+++ b/ppc/dsputil_altivec.c	Sat Jun 05 22:29:37 2004 +0000
@@ -1647,3 +1647,113 @@
 #endif /* CONFIG_DARWIN */
     return 0;
 }
+
+/* next one assumes that ((line_size % 8) == 0) */
+void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+
+    int j;
+POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
+ for (j = 0; j < 2; j++) {
+   int             i;
+   const uint32_t  a = (((const struct unaligned_32 *) (pixels))->l);
+   const uint32_t  b = (((const struct unaligned_32 *) (pixels + 1))->l);
+   uint32_t        l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
+   uint32_t        h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+   uint32_t        l1, h1;
+   pixels += line_size;
+   for (i = 0; i < h; i += 2) {
+     uint32_t        a = (((const struct unaligned_32 *) (pixels))->l);
+     uint32_t        b = (((const struct unaligned_32 *) (pixels + 1))->l);
+     l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
+     h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+     *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
+     pixels += line_size;
+     block += line_size;
+     a = (((const struct unaligned_32 *) (pixels))->l);
+     b = (((const struct unaligned_32 *) (pixels + 1))->l);
+     l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
+     h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+     *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
+     pixels += line_size;
+     block += line_size;
+   } pixels += 4 - line_size * (h + 1);
+   block += 4 - line_size * h;
+ }
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+   register int i;
+   register vector unsigned char
+     pixelsv1, pixelsv2,
+     pixelsavg;
+   register vector unsigned char
+     blockv, temp1, temp2, blocktemp;
+   register vector unsigned short
+     pixelssum1, pixelssum2, temp3;
+   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
+   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
+   
+   temp1 = vec_ld(0, pixels);
+   temp2 = vec_ld(16, pixels);
+   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
+   {
+     pixelsv2 = temp2;
+   }
+   else
+   {
+     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+   }
+   pixelsv1 = vec_mergeh(vczero, pixelsv1);
+   pixelsv2 = vec_mergeh(vczero, pixelsv2);
+   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+                        (vector unsigned short)pixelsv2);
+   pixelssum1 = vec_add(pixelssum1, vctwo);
+   
+POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); 
+   for (i = 0; i < h ; i++) {
+     int rightside = ((unsigned long)block & 0x0000000F);
+     blockv = vec_ld(0, block);
+
+     temp1 = vec_ld(line_size, pixels);
+     temp2 = vec_ld(line_size + 16, pixels);
+     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
+     {
+       pixelsv2 = temp2;
+     }
+     else
+     {
+       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+     }
+
+     pixelsv1 = vec_mergeh(vczero, pixelsv1);
+     pixelsv2 = vec_mergeh(vczero, pixelsv2);
+     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+                          (vector unsigned short)pixelsv2);
+     temp3 = vec_add(pixelssum1, pixelssum2);
+     temp3 = vec_sra(temp3, vctwo);
+     pixelssum1 = vec_add(pixelssum2, vctwo);
+     pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
+     
+     if (rightside)
+     {
+       blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
+     }
+     else
+     {
+       blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
+     }
+     
+     blockv = vec_avg(blocktemp, blockv);
+     vec_st(blockv, 0, block);
+     
+     block += line_size;
+     pixels += line_size;
+   }
+   
+POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
--- a/ppc/dsputil_altivec.h	Sat Jun 05 22:26:03 2004 +0000
+++ b/ppc/dsputil_altivec.h	Sat Jun 05 22:29:37 2004 +0000
@@ -48,6 +48,7 @@
 extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
 extern int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
 extern int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
+extern void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
 
 extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder);
 
--- a/ppc/dsputil_ppc.c	Sat Jun 05 22:26:03 2004 +0000
+++ b/ppc/dsputil_ppc.c	Sat Jun 05 22:29:37 2004 +0000
@@ -62,6 +62,7 @@
   "put_no_rnd_pixels16_xy2_altivec",
   "hadamard8_diff8x8_altivec",
   "hadamard8_diff16_altivec",
+  "avg_pixels8_xy2_altivec",
   "clear_blocks_dcbz32_ppc",
   "clear_blocks_dcbz128_ppc"
 };
@@ -268,10 +269,8 @@
         /* the two functions do the same thing, so use the same code */
         c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
         c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
-// next one disabled as it's untested.
-#if 0
         c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
-#endif /* 0 */
+	c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
         c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
         c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
         c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
--- a/ppc/dsputil_ppc.h	Sat Jun 05 22:26:03 2004 +0000
+++ b/ppc/dsputil_ppc.h	Sat Jun 05 22:29:37 2004 +0000
@@ -52,6 +52,7 @@
   altivec_put_no_rnd_pixels16_xy2_num,
   altivec_hadamard8_diff8x8_num,
   altivec_hadamard8_diff16_num,
+  altivec_avg_pixels8_xy2_num,
   powerpc_clear_blocks_dcbz32,
   powerpc_clear_blocks_dcbz128,
   powerpc_perf_total