diff ppc/dsputil_altivec.c @ 1015:35cf2f4a0f8c libavcodec

PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michaelni
date Sun, 19 Jan 2003 19:00:45 +0000
parents 3b7cc8e4b83f
children 9cc1031e1864
line wrap: on
line diff
--- a/ppc/dsputil_altivec.c	Sun Jan 19 18:30:29 2003 +0000
+++ b/ppc/dsputil_altivec.c	Sun Jan 19 19:00:45 2003 +0000
@@ -24,22 +24,6 @@
 #include <sys/sysctl.h>
 #endif
 
-#ifdef ALTIVEC_TBL_PERFORMANCE_REPORT
-unsigned long long perfdata[altivec_perf_total][altivec_data_total];
-/* list below must match enum in dsputil_altivec.h */
-static unsigned char* perfname[] = {
-  "fft_calc",
-  "gmc1",
-  "dct_unquantize_h263",
-  "idct_add",
-  "idct_put",
-  "put_pixels_clamped",
-  "put_pixels16",
-  "avg_pixels16"
-};
-#include <stdio.h>
-#endif
-
 int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
 {
     int i;
@@ -648,12 +632,12 @@
 void put_pixels_clamped_altivec(const DCTELEM *block, UINT8 *restrict pixels,
                                 int line_size)
 {
-ALTIVEC_TBL_DECLARE(altivec_put_pixels_clamped_num, 1);
+POWERPC_TBL_DECLARE(altivec_put_pixels_clamped_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int i;
     UINT8 *cm = cropTbl + MAX_NEG_CROP;
 
-ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
+POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
     
     /* read the pixels */
     for(i=0;i<8;i++) {
@@ -670,7 +654,7 @@
         block += 8;
     }
 
-ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     register const vector short vczero = (const vector short)(0);
@@ -681,7 +665,7 @@
       pixelsv0, pixelsv1, pixelsv2, pixelsv3, pixelsv4,
       pixelsv0old, pixelsv4old;
 
-ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
+POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1);
 
     blockv0 = vec_ld(0, block);
     blockv1 = vec_ld(16, block);
@@ -720,17 +704,17 @@
       vec_st(pixelsv3, 48, pixels);
     }
 
-ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
 
 void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
-ALTIVEC_TBL_DECLARE(altivec_put_pixels16_num, 1);
+POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int i;
 
-ALTIVEC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
 
     for(i=0; i<h; i++) {
       *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l);
@@ -741,25 +725,24 @@
       block +=line_size;
     }
 
-ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
-
-    register vector unsigned char perm = vec_lvsl(0, pixels); 
     register vector unsigned char pixelsv1, pixelsv2;
     int i;
 
-ALTIVEC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1);
 
     for(i=0; i<h; i++) {
       pixelsv1 = vec_ld(0, (unsigned char*)pixels);
       pixelsv2 = vec_ld(16, (unsigned char*)pixels);
-      vec_st(vec_perm(pixelsv1, pixelsv2, perm), 0, (unsigned char*)block);
+      vec_st(vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)),
+             0, (unsigned char*)block);
       pixels+=line_size;
       block +=line_size;
     }
 
-ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1);
 
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
@@ -767,11 +750,11 @@
 #define op_avg(a,b)  a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
 void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
-ALTIVEC_TBL_DECLARE(altivec_avg_pixels16_num, 1);
+POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int i;
 
-ALTIVEC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
 
     for(i=0; i<h; i++) {
       op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l));
@@ -782,29 +765,207 @@
       block +=line_size;
     }
 
-ALTIVEC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
-
-    register vector unsigned char perm = vec_lvsl(0, pixels); 
     register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
     int i;
 
-ALTIVEC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1);
 
     for(i=0; i<h; i++) {
       pixelsv1 = vec_ld(0, (unsigned char*)pixels);
       pixelsv2 = vec_ld(16, (unsigned char*)pixels);
       blockv = vec_ld(0, block);
-      pixelsv = vec_perm(pixelsv1, pixelsv2, perm);
+      pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
       blockv = vec_avg(blockv,pixelsv);
       vec_st(blockv, 0, (unsigned char*)block);
       pixels+=line_size;
       block +=line_size;
     }
 
-ALTIVEC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1);
+
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
+
+void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels,
+                         int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+    int i;
+POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
+    for (i = 0; i < h; i++) {
+        *((uint32_t *) (block)) =
+            (((*((uint32_t *) (block))) |
+              ((((const struct unaligned_32 *) (pixels))->l))) -
+             ((((*((uint32_t *) (block))) ^
+                ((((const struct unaligned_32 *) (pixels))->
+                  l))) & 0xFEFEFEFEUL) >> 1));
+        *((uint32_t *) (block + 4)) =
+            (((*((uint32_t *) (block + 4))) |
+              ((((const struct unaligned_32 *) (pixels + 4))->l))) -
+             ((((*((uint32_t *) (block + 4))) ^
+                ((((const struct unaligned_32 *) (pixels +
+                                                  4))->
+                  l))) & 0xFEFEFEFEUL) >> 1));
+        pixels += line_size;
+        block += line_size;
+    }
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
+    int i;
+
+POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1);
+ 
+   for (i = 0; i < h; i++) {
+     /*
+       block is 8 bytes-aligned, so we're either in the
+       left block (16 bytes-aligned) or in the right block (not)
+     */
+     int rightside = ((unsigned long)block & 0x0000000F);
+     
+     blockv = vec_ld(0, block);
+     pixelsv1 = vec_ld(0, (unsigned char*)pixels);
+     pixelsv2 = vec_ld(16, (unsigned char*)pixels);
+     pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
+     
+     if (rightside)
+     {
+       pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1));
+     }
+     else
+     {
+       pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3));
+     }
+     
+     blockv = vec_avg(blockv, pixelsv);
+
+     vec_st(blockv, 0, block);
+     
+     pixels += line_size;
+     block += line_size;
+   }
+   
+POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1);
+ 
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+}
 
+void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1);
+#ifdef ALTIVEC_USE_REFERENCE_C_CODE
+    int j;
+POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1);
+    for (j = 0; j < 2; j++) {
+      int i;
+      const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+      const uint32_t b =
+        (((const struct unaligned_32 *) (pixels + 1))->l);
+      uint32_t l0 =
+        (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
+      uint32_t h0 =
+        ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+      uint32_t l1, h1;
+      pixels += line_size;
+      for (i = 0; i < h; i += 2) {
+        uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
+        uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
+        l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
+        h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+        *((uint32_t *) block) =
+          h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+        pixels += line_size;
+        block += line_size;
+        a = (((const struct unaligned_32 *) (pixels))->l);
+        b = (((const struct unaligned_32 *) (pixels + 1))->l);
+        l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
+        h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
+        *((uint32_t *) block) =
+          h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
+        pixels += line_size;
+        block += line_size;
+      } pixels += 4 - line_size * (h + 1);
+      block += 4 - line_size * h;
+    }
+
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
+
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+   register int i;
+   register vector unsigned char
+     pixelsv1, pixelsv2,
+     pixelsavg;
+   register vector unsigned char
+     blockv, temp1, temp2;
+   register vector unsigned short
+     pixelssum1, pixelssum2, temp3;
+   register const vector unsigned char vczero = (const vector unsigned char)(0);
+   register const vector unsigned short vctwo = (const vector unsigned short)(2);
+   
+   temp1 = vec_ld(0, pixels);
+   temp2 = vec_ld(16, pixels);
+   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
+   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
+   {
+     pixelsv2 = temp2;
+   }
+   else
+   {
+     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
+   }
+   pixelsv1 = vec_mergeh(vczero, pixelsv1);
+   pixelsv2 = vec_mergeh(vczero, pixelsv2);
+   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
+                        (vector unsigned short)pixelsv2);
+   pixelssum1 = vec_add(pixelssum1, vctwo);
+   
+POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); 
+   for (i = 0; i < h ; i++) {
+     int rightside = ((unsigned long)block & 0x0000000F);
+     blockv = vec_ld(0, block);
+
+     temp1 = vec_ld(line_size, pixels);
+     temp2 = vec_ld(line_size + 16, pixels);
+     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
+     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
+     {
+       pixelsv2 = temp2;
+     }
+     else
+     {
+       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
+     }
+
+     pixelsv1 = vec_mergeh(vczero, pixelsv1);
+     pixelsv2 = vec_mergeh(vczero, pixelsv2);
+     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
+                          (vector unsigned short)pixelsv2);
+     temp3 = vec_add(pixelssum1, pixelssum2);
+     temp3 = vec_sra(temp3, vctwo);
+     pixelssum1 = vec_add(pixelssum2, vctwo);
+     pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
+     
+     if (rightside)
+     {
+       blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
+     }
+     else
+     {
+       blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
+     }
+     
+     vec_st(blockv, 0, block);
+     
+     block += line_size;
+     pixels += line_size;
+   }
+   
+POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
 
@@ -822,22 +983,3 @@
 #endif
     return 0;
 }
-
-#ifdef ALTIVEC_TBL_PERFORMANCE_REPORT
-void altivec_display_perf_report(void)
-{
-  int i;
-  fprintf(stderr, "AltiVec performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n");
-  for(i = 0 ; i < altivec_perf_total ; i++)
-  {
-    if (perfdata[i][altivec_data_num] != (unsigned long long)0)
-      fprintf(stderr, " Function \"%s\":\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
-              perfname[i],
-              perfdata[i][altivec_data_min],
-              perfdata[i][altivec_data_max],
-              (double)perfdata[i][altivec_data_sum] /
-              (double)perfdata[i][altivec_data_num],
-              perfdata[i][altivec_data_num]);
-  }
-}
-#endif /* ALTIVEC_TBL_PERFORMANCE_REPORT */