changeset 1340:09b8fe0f0139 libavcodec

PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michaelni
date Fri, 04 Jul 2003 09:39:05 +0000
parents 338a2f6e6402
children 6e5d4ec4f3ab
files ppc/dsputil_altivec.c ppc/dsputil_ppc.c ppc/dsputil_ppc.h ppc/gcc_fixes.h ppc/gmc_altivec.c
diffstat 5 files changed, 85 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/ppc/dsputil_altivec.c	Thu Jul 03 23:29:00 2003 +0000
+++ b/ppc/dsputil_altivec.c	Fri Jul 04 09:39:05 2003 +0000
@@ -1086,7 +1086,9 @@
      pixelssum3, pixelssum4, temp4;
    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
-   
+
+POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
+ 
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
@@ -1109,7 +1111,6 @@
                         (vector unsigned short)pixelsv2);
    pixelssum1 = vec_add(pixelssum1, vctwo);
    
-POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); 
    for (i = 0; i < h ; i++) {
      blockv = vec_ld(0, block);
 
@@ -1207,7 +1208,9 @@
    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
    register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
-   
+
+POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
+ 
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
@@ -1230,7 +1233,6 @@
                         (vector unsigned short)pixelsv2);
    pixelssum1 = vec_add(pixelssum1, vcone);
    
-POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); 
    for (i = 0; i < h ; i++) {
      blockv = vec_ld(0, block);
 
--- a/ppc/dsputil_ppc.c	Thu Jul 03 23:29:00 2003 +0000
+++ b/ppc/dsputil_ppc.c	Fri Jul 04 09:39:05 2003 +0000
@@ -61,7 +61,8 @@
   "clear_blocks_dcbz128_ppc"
 };
 #ifdef POWERPC_PERF_USE_PMC
-unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total];
+unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total];
+unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total];
 #endif
 #include <stdio.h>
 #endif
@@ -86,14 +87,22 @@
               (double)perfdata[i][powerpc_data_num],
               perfdata[i][powerpc_data_num]);
 #ifdef POWERPC_PERF_USE_PMC
-    if (perfdata_miss[i][powerpc_data_num] != (unsigned long long)0)
+    if (perfdata_pmc2[i][powerpc_data_num] != (unsigned long long)0)
       fprintf(stderr, " Function \"%s\" (pmc2):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
               perfname[i],
-              perfdata_miss[i][powerpc_data_min],
-              perfdata_miss[i][powerpc_data_max],
-              (double)perfdata_miss[i][powerpc_data_sum] /
-              (double)perfdata_miss[i][powerpc_data_num],
-              perfdata_miss[i][powerpc_data_num]);
+              perfdata_pmc2[i][powerpc_data_min],
+              perfdata_pmc2[i][powerpc_data_max],
+              (double)perfdata_pmc2[i][powerpc_data_sum] /
+              (double)perfdata_pmc2[i][powerpc_data_num],
+              perfdata_pmc2[i][powerpc_data_num]);
+    if (perfdata_pmc3[i][powerpc_data_num] != (unsigned long long)0)
+      fprintf(stderr, " Function \"%s\" (pmc3):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
+              perfname[i],
+              perfdata_pmc3[i][powerpc_data_min],
+              perfdata_pmc3[i][powerpc_data_max],
+              (double)perfdata_pmc3[i][powerpc_data_sum] /
+              (double)perfdata_pmc3[i][powerpc_data_num],
+              perfdata_pmc3[i][powerpc_data_num]);
 #endif
   }
 }
@@ -139,7 +148,7 @@
       i += 16;
     }
     for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) {
-      asm volatile("dcbz %0,%1" : : "r" (i), "r" (blocks) : "memory");
+      asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
     }
     if (misal) {
       ((unsigned long*)blocks)[188] = 0L;
@@ -172,7 +181,7 @@
     }
     else
       for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
-	asm volatile("dcbzl %0,%1" : : "r" (i), "r" (blocks) : "memory");
+	asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
       }
 #else
     memset(blocks, 0, sizeof(DCTELEM)*6*64);
@@ -209,7 +218,9 @@
 
   memset(fakedata, 0xFF, 1024);
 
-  asm volatile("dcbzl %0, %1" : : "r" (fakedata_middle), "r" (zero));
+  /* below the constraint "b" seems to mean "Address base register"
+     in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
+  asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
 
   for (i = 0; i < 1024 ; i ++)
   {
@@ -300,10 +311,14 @@
             perfdata[i][powerpc_data_sum] = 0x0000000000000000;
             perfdata[i][powerpc_data_num] = 0x0000000000000000;
 #ifdef POWERPC_PERF_USE_PMC
-            perfdata_miss[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
-            perfdata_miss[i][powerpc_data_max] = 0x0000000000000000;
-            perfdata_miss[i][powerpc_data_sum] = 0x0000000000000000;
-            perfdata_miss[i][powerpc_data_num] = 0x0000000000000000;
+            perfdata_pmc2[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
+            perfdata_pmc2[i][powerpc_data_max] = 0x0000000000000000;
+            perfdata_pmc2[i][powerpc_data_sum] = 0x0000000000000000;
+            perfdata_pmc2[i][powerpc_data_num] = 0x0000000000000000;
+            perfdata_pmc3[i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFF;
+            perfdata_pmc3[i][powerpc_data_max] = 0x0000000000000000;
+            perfdata_pmc3[i][powerpc_data_sum] = 0x0000000000000000;
+            perfdata_pmc3[i][powerpc_data_num] = 0x0000000000000000;
 #endif /* POWERPC_PERF_USE_PMC */
           }
         }
--- a/ppc/dsputil_ppc.h	Thu Jul 03 23:29:00 2003 +0000
+++ b/ppc/dsputil_ppc.h	Fri Jul 04 09:39:05 2003 +0000
@@ -19,6 +19,17 @@
 #ifndef _DSPUTIL_PPC_
 #define _DSPUTIL_PPC_
 
+#ifdef CONFIG_DARWIN
+/* The Apple assembler shipped w/ gcc-3.3 knows about DCBZL, previous assemblers don't
+   We assume here that the Darwin GCC is from Apple.... */
+#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
+#define NO_DCBZL
+#endif
+#else /* CONFIG_DARWIN */
+/* I don't think any non-Apple assembler knows about DCBZL */
+#define NO_DCBZL
+#endif /* CONFIG_DARWIN */
+
 #ifdef POWERPC_TBL_PERFORMANCE_REPORT
 void powerpc_display_perf_report(void);
 /* if you add to the enum below, also add to the perfname array
@@ -49,7 +60,8 @@
 };
 extern unsigned long long perfdata[powerpc_perf_total][powerpc_data_total];
 #ifdef POWERPC_PERF_USE_PMC
-extern unsigned long long perfdata_miss[powerpc_perf_total][powerpc_data_total];
+extern unsigned long long perfdata_pmc2[powerpc_perf_total][powerpc_data_total];
+extern unsigned long long perfdata_pmc3[powerpc_perf_total][powerpc_data_total];
 #endif
 
 #ifndef POWERPC_PERF_USE_PMC
@@ -75,12 +87,17 @@
 
 #else /* POWERPC_PERF_USE_PMC */
 #define POWERPC_GET_CYCLES(a) asm volatile("mfspr %0, 937" : "=r" (a))
-#define POWERPC_GET_MISS(a) asm volatile("mfspr %0, 938" : "=r" (a))
-#define POWERPC_TBL_DECLARE(a, cond) register unsigned long cycles_start, cycles_stop, miss_start, miss_stop
-#define POWERPC_TBL_START_COUNT(a, cond) do { POWERPC_GET_MISS(miss_start); POWERPC_GET_CYCLES(cycles_start); } while (0)
+#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
+#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
+#define POWERPC_TBL_DECLARE(a, cond) register unsigned long cycles_start, cycles_stop, pmc2_start, pmc2_stop, pmc3_start, pmc3_stop
+#define POWERPC_TBL_START_COUNT(a, cond) do {    \
+  POWERPC_GET_PMC3(pmc3_start);                  \
+  POWERPC_GET_PMC2(pmc2_start);                  \
+  POWERPC_GET_CYCLES(cycles_start); } while (0)
 #define POWERPC_TBL_STOP_COUNT(a, cond) do {     \
   POWERPC_GET_CYCLES(cycles_stop);               \
-  POWERPC_GET_MISS(miss_stop);                   \
+  POWERPC_GET_PMC2(pmc2_stop);                   \
+  POWERPC_GET_PMC3(pmc3_stop);                   \
   if (cycles_stop >= cycles_start)               \
   {                                              \
     unsigned long diff =                         \
@@ -95,18 +112,32 @@
       perfdata[a][powerpc_data_num] ++;          \
     }                                            \
   }                                              \
-  if (miss_stop >= miss_start)                   \
+  if (pmc2_stop >= pmc2_start)                   \
   {                                              \
     unsigned long diff =                         \
-                miss_stop - miss_start;          \
+                pmc2_stop - pmc2_start;          \
     if (cond)                                    \
     {                                            \
-      if (diff < perfdata_miss[a][powerpc_data_min]) \
-        perfdata_miss[a][powerpc_data_min] = diff;   \
-      if (diff > perfdata_miss[a][powerpc_data_max]) \
-        perfdata_miss[a][powerpc_data_max] = diff;   \
-      perfdata_miss[a][powerpc_data_sum] += diff;    \
-      perfdata_miss[a][powerpc_data_num] ++;         \
+      if (diff < perfdata_pmc2[a][powerpc_data_min]) \
+        perfdata_pmc2[a][powerpc_data_min] = diff;   \
+      if (diff > perfdata_pmc2[a][powerpc_data_max]) \
+        perfdata_pmc2[a][powerpc_data_max] = diff;   \
+      perfdata_pmc2[a][powerpc_data_sum] += diff;    \
+      perfdata_pmc2[a][powerpc_data_num] ++;         \
+    }                                            \
+  }                                              \
+  if (pmc3_stop >= pmc3_start)                   \
+  {                                              \
+    unsigned long diff =                         \
+                pmc3_stop - pmc3_start;          \
+    if (cond)                                    \
+    {                                            \
+      if (diff < perfdata_pmc3[a][powerpc_data_min]) \
+        perfdata_pmc3[a][powerpc_data_min] = diff;   \
+      if (diff > perfdata_pmc3[a][powerpc_data_max]) \
+        perfdata_pmc3[a][powerpc_data_max] = diff;   \
+      perfdata_pmc3[a][powerpc_data_sum] += diff;    \
+      perfdata_pmc3[a][powerpc_data_num] ++;         \
     }                                            \
   }                                              \
 } while (0)
--- a/ppc/gcc_fixes.h	Thu Jul 03 23:29:00 2003 +0000
+++ b/ppc/gcc_fixes.h	Fri Jul 04 09:39:05 2003 +0000
@@ -13,15 +13,8 @@
 
 #ifdef CONFIG_DARWIN
 #define AVV(x...) (x)
-/* The Apple assembler shipped w/ gcc-3.3 knows about DCBZL, previous assemblers don't
-   We assume here that the Darwin GCC is from Apple.... */
-#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
-#define NO_DCBZL
-#endif
 #else
 #define AVV(x...) {x}
-/* I don't think any non-Apple assembler knows about DCBZL */
-#define NO_DCBZL
 #if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)  
 
 /* This code was provided to me by Bartosch Pixa
--- a/ppc/gmc_altivec.c	Thu Jul 03 23:29:00 2003 +0000
+++ b/ppc/gmc_altivec.c	Fri Jul 04 09:39:05 2003 +0000
@@ -28,9 +28,10 @@
   altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
   to preserve proper dst alignement.
 */
+#define GMC1_PERF_COND (h==8)
 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
 {
-POWERPC_TBL_DECLARE(altivec_gmc1_num, h == 8);
+POWERPC_TBL_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     const int A=(16-x16)*(16-y16);
     const int B=(   x16)*(16-y16);
@@ -38,7 +39,7 @@
     const int D=(   x16)*(   y16);
     int i;
 
-POWERPC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
+POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
     
     for(i=0; i<h; i++)
     {
@@ -54,7 +55,7 @@
         src+= stride;
     }
 
-POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8);
+POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     const unsigned short __attribute__ ((aligned(16))) rounder_a[8] =
@@ -77,7 +78,7 @@
     unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
 
 
-POWERPC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
+POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
 
     tempA = vec_ld(0, (unsigned short*)ABCD);
     Av = vec_splat(tempA, 0);
@@ -165,7 +166,7 @@
       src += stride;
     }
 
-POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8);
+POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
 
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }