changeset 7333:a8a79f5385f6 libavcodec

cosmetics: Reformat PPC code in libavcodec according to style guidelines. This includes indentation changes, comment reformatting, consistent brace placement and some prettyprinting.
author diego
date Sun, 20 Jul 2008 18:58:30 +0000
parents b1003e468c3d
children 3a93377e8b76
files ppc/dsputil_ppc.c ppc/dsputil_ppc.h ppc/fft_altivec.c ppc/gmc_altivec.c ppc/h264_altivec.c ppc/h264_template_altivec.c ppc/idct_altivec.c ppc/imgresample_altivec.c ppc/int_altivec.c ppc/mathops.h ppc/mpegvideo_altivec.c ppc/snow_altivec.c
diffstat 12 files changed, 794 insertions(+), 847 deletions(-) [+]
line wrap: on
line diff
--- a/ppc/dsputil_ppc.c	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/dsputil_ppc.c	Sun Jul 20 18:58:30 2008 +0000
@@ -60,33 +60,33 @@
 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
 /* list below must match enum in dsputil_ppc.h */
 static unsigned char* perfname[] = {
-  "ff_fft_calc_altivec",
-  "gmc1_altivec",
-  "dct_unquantize_h263_altivec",
-  "fdct_altivec",
-  "idct_add_altivec",
-  "idct_put_altivec",
-  "put_pixels16_altivec",
-  "avg_pixels16_altivec",
-  "avg_pixels8_altivec",
-  "put_pixels8_xy2_altivec",
-  "put_no_rnd_pixels8_xy2_altivec",
-  "put_pixels16_xy2_altivec",
-  "put_no_rnd_pixels16_xy2_altivec",
-  "hadamard8_diff8x8_altivec",
-  "hadamard8_diff16_altivec",
-  "avg_pixels8_xy2_altivec",
-  "clear_blocks_dcbz32_ppc",
-  "clear_blocks_dcbz128_ppc",
-  "put_h264_chroma_mc8_altivec",
-  "avg_h264_chroma_mc8_altivec",
-  "put_h264_qpel16_h_lowpass_altivec",
-  "avg_h264_qpel16_h_lowpass_altivec",
-  "put_h264_qpel16_v_lowpass_altivec",
-  "avg_h264_qpel16_v_lowpass_altivec",
-  "put_h264_qpel16_hv_lowpass_altivec",
-  "avg_h264_qpel16_hv_lowpass_altivec",
-  ""
+    "ff_fft_calc_altivec",
+    "gmc1_altivec",
+    "dct_unquantize_h263_altivec",
+    "fdct_altivec",
+    "idct_add_altivec",
+    "idct_put_altivec",
+    "put_pixels16_altivec",
+    "avg_pixels16_altivec",
+    "avg_pixels8_altivec",
+    "put_pixels8_xy2_altivec",
+    "put_no_rnd_pixels8_xy2_altivec",
+    "put_pixels16_xy2_altivec",
+    "put_no_rnd_pixels16_xy2_altivec",
+    "hadamard8_diff8x8_altivec",
+    "hadamard8_diff16_altivec",
+    "avg_pixels8_xy2_altivec",
+    "clear_blocks_dcbz32_ppc",
+    "clear_blocks_dcbz128_ppc",
+    "put_h264_chroma_mc8_altivec",
+    "avg_h264_chroma_mc8_altivec",
+    "put_h264_qpel16_h_lowpass_altivec",
+    "avg_h264_qpel16_h_lowpass_altivec",
+    "put_h264_qpel16_v_lowpass_altivec",
+    "avg_h264_qpel16_v_lowpass_altivec",
+    "put_h264_qpel16_hv_lowpass_altivec",
+    "avg_h264_qpel16_hv_lowpass_altivec",
+    ""
 };
 #include <stdio.h>
 #endif
@@ -94,51 +94,44 @@
 #ifdef CONFIG_POWERPC_PERF
 void powerpc_display_perf_report(void)
 {
-  int i, j;
-  av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
-  for(i = 0 ; i < powerpc_perf_total ; i++)
-  {
-    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
-      {
-        if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
-          av_log(NULL, AV_LOG_INFO,
-                  " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
-                  perfname[i],
-                  j+1,
-                  perfdata[j][i][powerpc_data_min],
-                  perfdata[j][i][powerpc_data_max],
-                  (double)perfdata[j][i][powerpc_data_sum] /
-                  (double)perfdata[j][i][powerpc_data_num],
-                  perfdata[j][i][powerpc_data_num]);
-      }
-  }
+    int i, j;
+    av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
+    for(i = 0 ; i < powerpc_perf_total ; i++) {
+        for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
+            if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
+                av_log(NULL, AV_LOG_INFO,
+                       " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
+                       perfname[i],
+                       j+1,
+                       perfdata[j][i][powerpc_data_min],
+                       perfdata[j][i][powerpc_data_max],
+                       (double)perfdata[j][i][powerpc_data_sum] /
+                       (double)perfdata[j][i][powerpc_data_num],
+                       perfdata[j][i][powerpc_data_num]);
+        }
+    }
 }
 #endif /* CONFIG_POWERPC_PERF */
 
 /* ***** WARNING ***** WARNING ***** WARNING ***** */
 /*
-  clear_blocks_dcbz32_ppc will not work properly
-  on PowerPC processors with a cache line size
-  not equal to 32 bytes.
-  Fortunately all processor used by Apple up to
-  at least the 7450 (aka second generation G4)
-  use 32 bytes cache line.
-  This is due to the use of the 'dcbz' instruction.
-  It simply clear to zero a single cache line,
-  so you need to know the cache line size to use it !
-  It's absurd, but it's fast...
+clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
+cache line size not equal to 32 bytes.
+Fortunately all processor used by Apple up to at least the 7450 (aka second
+generation G4) use 32 bytes cache line.
+This is due to the use of the 'dcbz' instruction. It simply clear to zero a
+single cache line, so you need to know the cache line size to use it !
+It's absurd, but it's fast...
 
-  update 24/06/2003 : Apple released yesterday the G5,
-  with a PPC970. cache line size : 128 bytes. Oups.
-  The semantic of dcbz was changed, it always clear
-  32 bytes. so the function below will work, but will
-  be slow. So I fixed check_dcbz_effect to use dcbzl,
-  which is defined to clear a cache line (as dcbz before).
-  So we still can distinguish, and use dcbz (32 bytes)
-  or dcbzl (one cache line) as required.
+update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line
+size: 128 bytes. Oups.
+The semantic of dcbz was changed, it always clear 32 bytes. so the function
+below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
+which is defined to clear a cache line (as dcbz before). So we still can
+distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
 
-  see <http://developer.apple.com/technotes/tn/tn2087.html>
-  and <http://developer.apple.com/technotes/tn/tn2086.html>
+see <http://developer.apple.com/technotes/tn/tn2087.html>
+and <http://developer.apple.com/technotes/tn/tn2086.html>
 */
 void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
 {
@@ -148,21 +141,21 @@
 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
 #if 1
     if (misal) {
-      ((unsigned long*)blocks)[0] = 0L;
-      ((unsigned long*)blocks)[1] = 0L;
-      ((unsigned long*)blocks)[2] = 0L;
-      ((unsigned long*)blocks)[3] = 0L;
-      i += 16;
+        ((unsigned long*)blocks)[0] = 0L;
+        ((unsigned long*)blocks)[1] = 0L;
+        ((unsigned long*)blocks)[2] = 0L;
+        ((unsigned long*)blocks)[3] = 0L;
+        i += 16;
     }
     for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
-      asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
+        asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
     }
     if (misal) {
-      ((unsigned long*)blocks)[188] = 0L;
-      ((unsigned long*)blocks)[189] = 0L;
-      ((unsigned long*)blocks)[190] = 0L;
-      ((unsigned long*)blocks)[191] = 0L;
-      i += 16;
+        ((unsigned long*)blocks)[188] = 0L;
+        ((unsigned long*)blocks)[189] = 0L;
+        ((unsigned long*)blocks)[190] = 0L;
+        ((unsigned long*)blocks)[191] = 0L;
+        i += 16;
     }
 #else
     memset(blocks, 0, sizeof(DCTELEM)*6*64);
@@ -180,16 +173,16 @@
     register int i = 0;
 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
 #if 1
- if (misal) {
-   // we could probably also optimize this case,
-   // but there's not much point as the machines
-   // aren't available yet (2003-06-26)
-      memset(blocks, 0, sizeof(DCTELEM)*6*64);
+    if (misal) {
+        // we could probably also optimize this case,
+        // but there's not much point as the machines
+        // aren't available yet (2003-06-26)
+        memset(blocks, 0, sizeof(DCTELEM)*6*64);
     }
     else
-      for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
-        asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
-      }
+        for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
+            asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
+        }
 #else
     memset(blocks, 0, sizeof(DCTELEM)*6*64);
 #endif
@@ -198,7 +191,7 @@
 #else
 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
 {
-  memset(blocks, 0, sizeof(DCTELEM)*6*64);
+    memset(blocks, 0, sizeof(DCTELEM)*6*64);
 }
 #endif
 
@@ -210,34 +203,32 @@
    knows about dcbzl ... */
 long check_dcbzl_effect(void)
 {
-  register char *fakedata = av_malloc(1024);
-  register char *fakedata_middle;
-  register long zero = 0;
-  register long i = 0;
-  long count = 0;
+    register char *fakedata = av_malloc(1024);
+    register char *fakedata_middle;
+    register long zero = 0;
+    register long i = 0;
+    long count = 0;
 
-  if (!fakedata)
-  {
-    return 0L;
-  }
+    if (!fakedata) {
+        return 0L;
+    }
 
-  fakedata_middle = (fakedata + 512);
+    fakedata_middle = (fakedata + 512);
 
-  memset(fakedata, 0xFF, 1024);
+    memset(fakedata, 0xFF, 1024);
 
-  /* below the constraint "b" seems to mean "Address base register"
-     in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
-  asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
+    /* below the constraint "b" seems to mean "Address base register"
+       in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
+    asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
 
-  for (i = 0; i < 1024 ; i ++)
-  {
-    if (fakedata[i] == (char)0)
-      count++;
-  }
+    for (i = 0; i < 1024 ; i ++) {
+        if (fakedata[i] == (char)0)
+            count++;
+    }
 
-  av_free(fakedata);
+    av_free(fakedata);
 
-  return count;
+    return count;
 }
 #else
 long check_dcbzl_effect(void)
@@ -286,36 +277,31 @@
 
 #ifdef CONFIG_ENCODERS
         if (avctx->dct_algo == FF_DCT_AUTO ||
-            avctx->dct_algo == FF_DCT_ALTIVEC)
-        {
+            avctx->dct_algo == FF_DCT_ALTIVEC) {
             c->fdct = fdct_altivec;
         }
 #endif //CONFIG_ENCODERS
 
-        if (avctx->lowres==0)
-        {
-        if ((avctx->idct_algo == FF_IDCT_AUTO) ||
-                (avctx->idct_algo == FF_IDCT_ALTIVEC))
-        {
-            c->idct_put = idct_put_altivec;
-            c->idct_add = idct_add_altivec;
-            c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
-        }
+        if (avctx->lowres==0) {
+            if ((avctx->idct_algo == FF_IDCT_AUTO) ||
+                (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
+                c->idct_put = idct_put_altivec;
+                c->idct_add = idct_add_altivec;
+                c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+            }
         }
 
 #ifdef CONFIG_POWERPC_PERF
         {
-          int i, j;
-          for (i = 0 ; i < powerpc_perf_total ; i++)
-          {
-            for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
-              {
-                perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
-                perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
-                perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
-                perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
+            int i, j;
+            for (i = 0 ; i < powerpc_perf_total ; i++) {
+                for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
+                    perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
+                    perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
+                    perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
+                    perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
+                  }
               }
-          }
         }
 #endif /* CONFIG_POWERPC_PERF */
     }
--- a/ppc/dsputil_ppc.h	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/dsputil_ppc.h	Sun Jul 20 18:58:30 2008 +0000
@@ -31,40 +31,40 @@
 /* if you add to the enum below, also add to the perfname array
    in dsputil_ppc.c */
 enum powerpc_perf_index {
-  altivec_fft_num = 0,
-  altivec_gmc1_num,
-  altivec_dct_unquantize_h263_num,
-  altivec_fdct,
-  altivec_idct_add_num,
-  altivec_idct_put_num,
-  altivec_put_pixels16_num,
-  altivec_avg_pixels16_num,
-  altivec_avg_pixels8_num,
-  altivec_put_pixels8_xy2_num,
-  altivec_put_no_rnd_pixels8_xy2_num,
-  altivec_put_pixels16_xy2_num,
-  altivec_put_no_rnd_pixels16_xy2_num,
-  altivec_hadamard8_diff8x8_num,
-  altivec_hadamard8_diff16_num,
-  altivec_avg_pixels8_xy2_num,
-  powerpc_clear_blocks_dcbz32,
-  powerpc_clear_blocks_dcbz128,
-  altivec_put_h264_chroma_mc8_num,
-  altivec_avg_h264_chroma_mc8_num,
-  altivec_put_h264_qpel16_h_lowpass_num,
-  altivec_avg_h264_qpel16_h_lowpass_num,
-  altivec_put_h264_qpel16_v_lowpass_num,
-  altivec_avg_h264_qpel16_v_lowpass_num,
-  altivec_put_h264_qpel16_hv_lowpass_num,
-  altivec_avg_h264_qpel16_hv_lowpass_num,
-  powerpc_perf_total
+    altivec_fft_num = 0,
+    altivec_gmc1_num,
+    altivec_dct_unquantize_h263_num,
+    altivec_fdct,
+    altivec_idct_add_num,
+    altivec_idct_put_num,
+    altivec_put_pixels16_num,
+    altivec_avg_pixels16_num,
+    altivec_avg_pixels8_num,
+    altivec_put_pixels8_xy2_num,
+    altivec_put_no_rnd_pixels8_xy2_num,
+    altivec_put_pixels16_xy2_num,
+    altivec_put_no_rnd_pixels16_xy2_num,
+    altivec_hadamard8_diff8x8_num,
+    altivec_hadamard8_diff16_num,
+    altivec_avg_pixels8_xy2_num,
+    powerpc_clear_blocks_dcbz32,
+    powerpc_clear_blocks_dcbz128,
+    altivec_put_h264_chroma_mc8_num,
+    altivec_avg_h264_chroma_mc8_num,
+    altivec_put_h264_qpel16_h_lowpass_num,
+    altivec_avg_h264_qpel16_h_lowpass_num,
+    altivec_put_h264_qpel16_v_lowpass_num,
+    altivec_avg_h264_qpel16_v_lowpass_num,
+    altivec_put_h264_qpel16_hv_lowpass_num,
+    altivec_avg_h264_qpel16_hv_lowpass_num,
+    powerpc_perf_total
 };
 enum powerpc_data_index {
-  powerpc_data_min = 0,
-  powerpc_data_max,
-  powerpc_data_sum,
-  powerpc_data_num,
-  powerpc_data_total
+    powerpc_data_min = 0,
+    powerpc_data_max,
+    powerpc_data_sum,
+    powerpc_data_num,
+    powerpc_data_total
 };
 extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
 
@@ -105,45 +105,42 @@
 #define POWERPC_GET_PMC6(a) do {} while (0)
 #endif
 #endif /* HAVE_PPC64 */
-#define POWERPC_PERF_DECLARE(a, cond)   \
-  POWERP_PMC_DATATYPE                   \
-    pmc_start[POWERPC_NUM_PMC_ENABLED], \
-    pmc_stop[POWERPC_NUM_PMC_ENABLED],  \
-    pmc_loop_index;
+#define POWERPC_PERF_DECLARE(a, cond)       \
+    POWERP_PMC_DATATYPE                     \
+        pmc_start[POWERPC_NUM_PMC_ENABLED], \
+        pmc_stop[POWERPC_NUM_PMC_ENABLED],  \
+        pmc_loop_index;
 #define POWERPC_PERF_START_COUNT(a, cond) do { \
-  POWERPC_GET_PMC6(pmc_start[5]); \
-  POWERPC_GET_PMC5(pmc_start[4]); \
-  POWERPC_GET_PMC4(pmc_start[3]); \
-  POWERPC_GET_PMC3(pmc_start[2]); \
-  POWERPC_GET_PMC2(pmc_start[1]); \
-  POWERPC_GET_PMC1(pmc_start[0]); \
-  } while (0)
+    POWERPC_GET_PMC6(pmc_start[5]); \
+    POWERPC_GET_PMC5(pmc_start[4]); \
+    POWERPC_GET_PMC4(pmc_start[3]); \
+    POWERPC_GET_PMC3(pmc_start[2]); \
+    POWERPC_GET_PMC2(pmc_start[1]); \
+    POWERPC_GET_PMC1(pmc_start[0]); \
+    } while (0)
 #define POWERPC_PERF_STOP_COUNT(a, cond) do { \
-  POWERPC_GET_PMC1(pmc_stop[0]); \
-  POWERPC_GET_PMC2(pmc_stop[1]); \
-  POWERPC_GET_PMC3(pmc_stop[2]); \
-  POWERPC_GET_PMC4(pmc_stop[3]); \
-  POWERPC_GET_PMC5(pmc_stop[4]); \
-  POWERPC_GET_PMC6(pmc_stop[5]); \
-  if (cond)                       \
-  {                               \
-    for(pmc_loop_index = 0;       \
-        pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
-        pmc_loop_index++)         \
-    {                             \
-      if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index])  \
-        {                                                         \
-        POWERP_PMC_DATATYPE diff =                                \
-          pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index];   \
-        if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
-          perfdata[pmc_loop_index][a][powerpc_data_min] = diff;   \
-        if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
-          perfdata[pmc_loop_index][a][powerpc_data_max] = diff;   \
-        perfdata[pmc_loop_index][a][powerpc_data_sum] += diff;    \
-        perfdata[pmc_loop_index][a][powerpc_data_num] ++;         \
-      }                           \
-    }                             \
-  }                               \
+    POWERPC_GET_PMC1(pmc_stop[0]);            \
+    POWERPC_GET_PMC2(pmc_stop[1]);            \
+    POWERPC_GET_PMC3(pmc_stop[2]);            \
+    POWERPC_GET_PMC4(pmc_stop[3]);            \
+    POWERPC_GET_PMC5(pmc_stop[4]);            \
+    POWERPC_GET_PMC6(pmc_stop[5]);            \
+    if (cond) {                               \
+        for(pmc_loop_index = 0;               \
+            pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
+            pmc_loop_index++) {               \
+            if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) {  \
+                POWERP_PMC_DATATYPE diff =                                \
+                  pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index];   \
+                if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
+                    perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \
+                if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
+                    perfdata[pmc_loop_index][a][powerpc_data_max] = diff; \
+                perfdata[pmc_loop_index][a][powerpc_data_sum] += diff;    \
+                perfdata[pmc_loop_index][a][powerpc_data_num] ++;         \
+            }                                 \
+        }                                     \
+    }                                         \
 } while (0)
 #else /* CONFIG_POWERPC_PERF */
 // those are needed to avoid empty statements.
--- a/ppc/fft_altivec.c	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/fft_altivec.c	Sun Jul 20 18:58:30 2008 +0000
@@ -33,21 +33,21 @@
 /* butter fly op */
 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
 {\
-  FFTSample ax, ay, bx, by;\
-  bx=pre1;\
-  by=pim1;\
-  ax=qre1;\
-  ay=qim1;\
-  pre = (bx + ax);\
-  pim = (by + ay);\
-  qre = (bx - ax);\
-  qim = (by - ay);\
+    FFTSample ax, ay, bx, by;\
+    bx=pre1;\
+    by=pim1;\
+    ax=qre1;\
+    ay=qim1;\
+    pre = (bx + ax);\
+    pim = (by + ay);\
+    qre = (bx - ax);\
+    qim = (by - ay);\
 }
 #define MUL16(a,b) ((a) * (b))
 #define CMUL(pre, pim, are, aim, bre, bim) \
 {\
-   pre = (MUL16(are, bre) - MUL16(aim, bim));\
-   pim = (MUL16(are, bim) + MUL16(bre, aim));\
+    pre = (MUL16(are, bre) - MUL16(aim, bim));\
+    pim = (MUL16(are, bim) + MUL16(bre, aim));\
 }
 
 
@@ -85,14 +85,11 @@
 
         c1 = vcii(p,p,n,n);
 
-        if (s->inverse)
-            {
-                c2 = vcii(p,p,n,p);
-            }
-        else
-            {
-                c2 = vcii(p,p,p,n);
-            }
+        if (s->inverse) {
+            c2 = vcii(p,p,n,p);
+        } else {
+            c2 = vcii(p,p,p,n);
+        }
 
         j = (np >> 2);
         do {
--- a/ppc/gmc_altivec.c	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/gmc_altivec.c	Sun Jul 20 18:58:30 2008 +0000
@@ -36,16 +36,16 @@
 {
 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
     const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) =
-      {rounder, rounder, rounder, rounder,
-       rounder, rounder, rounder, rounder};
+        {rounder, rounder, rounder, rounder,
+         rounder, rounder, rounder, rounder};
     const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) =
-      {
-        (16-x16)*(16-y16), /* A */
-        (   x16)*(16-y16), /* B */
-        (16-x16)*(   y16), /* C */
-        (   x16)*(   y16), /* D */
-        0, 0, 0, 0         /* padding */
-      };
+        {
+            (16-x16)*(16-y16), /* A */
+            (   x16)*(16-y16), /* B */
+            (16-x16)*(   y16), /* C */
+            (   x16)*(   y16), /* D */
+            0, 0, 0, 0         /* padding */
+        };
     register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
     register const vector unsigned short vcsr8 = (const vector unsigned short)vec_splat_u16(8);
     register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
@@ -74,73 +74,67 @@
     src_1 = vec_ld(16, src);
     srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
 
-    if (src_really_odd != 0x0000000F)
-    { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
-      srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
-    }
-    else
-    {
-      srcvB = src_1;
+    if (src_really_odd != 0x0000000F) {
+        // if src & 0xF == 0xF, then (src+1) is properly aligned
+        // on the second vector.
+        srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
+    } else {
+        srcvB = src_1;
     }
     srcvA = vec_mergeh(vczero, srcvA);
     srcvB = vec_mergeh(vczero, srcvB);
 
-    for(i=0; i<h; i++)
-    {
-      dst_odd = (unsigned long)dst & 0x0000000F;
-      src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
+    for(i=0; i<h; i++) {
+        dst_odd = (unsigned long)dst & 0x0000000F;
+        src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
 
-      dstv = vec_ld(0, dst);
+        dstv = vec_ld(0, dst);
 
-      // we we'll be able to pick-up our 9 char elements
-      // at src + stride from those 32 bytes
-      // then reuse the resulting 2 vectors srvcC and srcvD
-      // as the next srcvA and srcvB
-      src_0 = vec_ld(stride + 0, src);
-      src_1 = vec_ld(stride + 16, src);
-      srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
+        // we we'll be able to pick-up our 9 char elements
+        // at src + stride from those 32 bytes
+        // then reuse the resulting 2 vectors srvcC and srcvD
+        // as the next srcvA and srcvB
+        src_0 = vec_ld(stride + 0, src);
+        src_1 = vec_ld(stride + 16, src);
+        srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
 
-      if (src_really_odd != 0x0000000F)
-      { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
-        srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
-      }
-      else
-      {
-        srcvD = src_1;
-      }
+        if (src_really_odd != 0x0000000F) {
+            // if src & 0xF == 0xF, then (src+1) is properly aligned
+            // on the second vector.
+            srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
+        } else {
+            srcvD = src_1;
+        }
 
-      srcvC = vec_mergeh(vczero, srcvC);
-      srcvD = vec_mergeh(vczero, srcvD);
+        srcvC = vec_mergeh(vczero, srcvC);
+        srcvD = vec_mergeh(vczero, srcvD);
 
 
-      // OK, now we (finally) do the math :-)
-      // those four instructions replaces 32 int muls & 32 int adds.
-      // isn't AltiVec nice ?
-      tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
-      tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
-      tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
-      tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
+        // OK, now we (finally) do the math :-)
+        // those four instructions replaces 32 int muls & 32 int adds.
+        // isn't AltiVec nice ?
+        tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
+        tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
+        tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
+        tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
 
-      srcvA = srcvC;
-      srcvB = srcvD;
+        srcvA = srcvC;
+        srcvB = srcvD;
 
-      tempD = vec_sr(tempD, vcsr8);
+        tempD = vec_sr(tempD, vcsr8);
 
-      dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
+        dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
 
-      if (dst_odd)
-      {
-        dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
-      }
-      else
-      {
-        dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
-      }
+        if (dst_odd) {
+            dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
+        } else {
+            dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
+        }
 
-      vec_st(dstv2, 0, dst);
+        vec_st(dstv2, 0, dst);
 
-      dst += stride;
-      src += stride;
+        dst += stride;
+        src += stride;
     }
 
 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
--- a/ppc/h264_altivec.c	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/h264_altivec.c	Sun Jul 20 18:58:30 2008 +0000
@@ -196,7 +196,7 @@
     const vec_s16_t vD = vec_splat((vec_s16_t)vABCD, 7);
     LOAD_ZERO;
     const vec_s16_t v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
-    const vec_u16_t v6us = vec_splat_u16(6);
+    const vec_u16_t v6us  = vec_splat_u16(6);
     register int loadSecond     = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
     register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
 
@@ -392,8 +392,8 @@
 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  */
 
-  H264_MC(put_, 16, altivec)
-  H264_MC(avg_, 16, altivec)
+H264_MC(put_, 16, altivec)
+H264_MC(avg_, 16, altivec)
 
 
 /****************************************************************************
@@ -685,9 +685,9 @@
     r15 = vec_mergel(r3, r7);   /*3,7,11,15 set 1*/                        \
                                                                            \
     /*Third merge*/                                                        \
-    r0 = vec_mergeh(r8, r12);   /*0,2,4,6,8,10,12,14 set 0*/               \
-    r1 = vec_mergel(r8, r12);   /*0,2,4,6,8,10,12,14 set 1*/               \
-    r2 = vec_mergeh(r9, r13);   /*0,2,4,6,8,10,12,14 set 2*/               \
+    r0 = vec_mergeh(r8,  r12);  /*0,2,4,6,8,10,12,14 set 0*/               \
+    r1 = vec_mergel(r8,  r12);  /*0,2,4,6,8,10,12,14 set 1*/               \
+    r2 = vec_mergeh(r9,  r13);  /*0,2,4,6,8,10,12,14 set 2*/               \
     r4 = vec_mergeh(r10, r14);  /*1,3,5,7,9,11,13,15 set 0*/               \
     r5 = vec_mergel(r10, r14);  /*1,3,5,7,9,11,13,15 set 1*/               \
     r6 = vec_mergeh(r11, r15);  /*1,3,5,7,9,11,13,15 set 2*/               \
--- a/ppc/h264_template_altivec.c	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/h264_template_altivec.c	Sun Jul 20 18:58:30 2008 +0000
@@ -206,489 +206,489 @@
 
 /* this code assume stride % 16 == 0 */
 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
-  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
-  register int i;
+    POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
+    register int i;
 
-  LOAD_ZERO;
-  const vec_u8_t permM2 = vec_lvsl(-2, src);
-  const vec_u8_t permM1 = vec_lvsl(-1, src);
-  const vec_u8_t permP0 = vec_lvsl(+0, src);
-  const vec_u8_t permP1 = vec_lvsl(+1, src);
-  const vec_u8_t permP2 = vec_lvsl(+2, src);
-  const vec_u8_t permP3 = vec_lvsl(+3, src);
-  const vec_s16_t v5ss = vec_splat_s16(5);
-  const vec_u16_t v5us = vec_splat_u16(5);
-  const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
-  const vec_s16_t v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+    LOAD_ZERO;
+    const vec_u8_t permM2 = vec_lvsl(-2, src);
+    const vec_u8_t permM1 = vec_lvsl(-1, src);
+    const vec_u8_t permP0 = vec_lvsl(+0, src);
+    const vec_u8_t permP1 = vec_lvsl(+1, src);
+    const vec_u8_t permP2 = vec_lvsl(+2, src);
+    const vec_u8_t permP3 = vec_lvsl(+3, src);
+    const vec_s16_t v5ss = vec_splat_s16(5);
+    const vec_u16_t v5us = vec_splat_u16(5);
+    const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+    const vec_s16_t v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
 
-  vec_u8_t srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+    vec_u8_t srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
 
-  register int align = ((((unsigned long)src) - 2) % 16);
+    register int align = ((((unsigned long)src) - 2) % 16);
 
-  vec_s16_t srcP0A, srcP0B, srcP1A, srcP1B,
-                      srcP2A, srcP2B, srcP3A, srcP3B,
-                      srcM1A, srcM1B, srcM2A, srcM2B,
-                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
-                      pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
-                      psumA, psumB, sumA, sumB;
+    vec_s16_t srcP0A, srcP0B, srcP1A, srcP1B,
+              srcP2A, srcP2B, srcP3A, srcP3B,
+              srcM1A, srcM1B, srcM2A, srcM2B,
+              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+              pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+              psumA, psumB, sumA, sumB;
 
-  vec_u8_t sum, vdst, fsum;
+    vec_u8_t sum, vdst, fsum;
 
-  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+    POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
 
-  for (i = 0 ; i < 16 ; i ++) {
-    vec_u8_t srcR1 = vec_ld(-2, src);
-    vec_u8_t srcR2 = vec_ld(14, src);
+    for (i = 0 ; i < 16 ; i ++) {
+        vec_u8_t srcR1 = vec_ld(-2, src);
+        vec_u8_t srcR2 = vec_ld(14, src);
 
-    switch (align) {
-    default: {
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = vec_perm(srcR1, srcR2, permP2);
-      srcP3 = vec_perm(srcR1, srcR2, permP3);
-    } break;
-    case 11: {
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = vec_perm(srcR1, srcR2, permP2);
-      srcP3 = srcR2;
-    } break;
-    case 12: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = srcR2;
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 13: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = srcR2;
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 14: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = srcR2;
-      srcP1 = vec_perm(srcR2, srcR3, permP1);
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 15: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = srcR2;
-      srcP0 = vec_perm(srcR2, srcR3, permP0);
-      srcP1 = vec_perm(srcR2, srcR3, permP1);
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
+        switch (align) {
+        default: {
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = vec_perm(srcR1, srcR2, permP2);
+            srcP3 = vec_perm(srcR1, srcR2, permP3);
+        } break;
+        case 11: {
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = vec_perm(srcR1, srcR2, permP2);
+            srcP3 = srcR2;
+        } break;
+        case 12: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = srcR2;
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 13: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = srcR2;
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 14: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = srcR2;
+            srcP1 = vec_perm(srcR2, srcR3, permP1);
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 15: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = srcR2;
+            srcP0 = vec_perm(srcR2, srcR3, permP0);
+            srcP1 = vec_perm(srcR2, srcR3, permP1);
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        }
+
+        srcP0A = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
+        srcP0B = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
+        srcP1A = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
+        srcP1B = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
+
+        srcP2A = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
+        srcP2B = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
+        srcP3A = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
+        srcP3B = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
+
+        srcM1A = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
+        srcM1B = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
+        srcM2A = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
+        srcM2B = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
+
+        sum1A = vec_adds(srcP0A, srcP1A);
+        sum1B = vec_adds(srcP0B, srcP1B);
+        sum2A = vec_adds(srcM1A, srcP2A);
+        sum2B = vec_adds(srcM1B, srcP2B);
+        sum3A = vec_adds(srcM2A, srcP3A);
+        sum3B = vec_adds(srcM2B, srcP3B);
+
+        pp1A = vec_mladd(sum1A, v20ss, v16ss);
+        pp1B = vec_mladd(sum1B, v20ss, v16ss);
+
+        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
+        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
+
+        pp3A = vec_add(sum3A, pp1A);
+        pp3B = vec_add(sum3B, pp1B);
+
+        psumA = vec_sub(pp3A, pp2A);
+        psumB = vec_sub(pp3B, pp2B);
+
+        sumA = vec_sra(psumA, v5us);
+        sumB = vec_sra(psumB, v5us);
+
+        sum = vec_packsu(sumA, sumB);
+
+        ASSERT_ALIGNED(dst);
+        vdst = vec_ld(0, dst);
+
+        OP_U8_ALTIVEC(fsum, sum, vdst);
+
+        vec_st(fsum, 0, dst);
+
+        src += srcStride;
+        dst += dstStride;
     }
-
-    srcP0A = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
-    srcP0B = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
-    srcP1A = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
-    srcP1B = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
-
-    srcP2A = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
-    srcP2B = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
-    srcP3A = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
-    srcP3B = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
-
-    srcM1A = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
-    srcM1B = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
-    srcM2A = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
-    srcM2B = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
-
-    sum1A = vec_adds(srcP0A, srcP1A);
-    sum1B = vec_adds(srcP0B, srcP1B);
-    sum2A = vec_adds(srcM1A, srcP2A);
-    sum2B = vec_adds(srcM1B, srcP2B);
-    sum3A = vec_adds(srcM2A, srcP3A);
-    sum3B = vec_adds(srcM2B, srcP3B);
-
-    pp1A = vec_mladd(sum1A, v20ss, v16ss);
-    pp1B = vec_mladd(sum1B, v20ss, v16ss);
-
-    pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
-    pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
-
-    pp3A = vec_add(sum3A, pp1A);
-    pp3B = vec_add(sum3B, pp1B);
-
-    psumA = vec_sub(pp3A, pp2A);
-    psumB = vec_sub(pp3B, pp2B);
-
-    sumA = vec_sra(psumA, v5us);
-    sumB = vec_sra(psumB, v5us);
-
-    sum = vec_packsu(sumA, sumB);
-
-    ASSERT_ALIGNED(dst);
-    vdst = vec_ld(0, dst);
-
-    OP_U8_ALTIVEC(fsum, sum, vdst);
-
-    vec_st(fsum, 0, dst);
-
-    src += srcStride;
-    dst += dstStride;
-  }
-POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+    POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
 }
 
 /* this code assume stride % 16 == 0 */
 static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
-  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
+    POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
 
-  register int i;
+    register int i;
 
-  LOAD_ZERO;
-  const vec_u8_t perm = vec_lvsl(0, src);
-  const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
-  const vec_u16_t v5us = vec_splat_u16(5);
-  const vec_s16_t v5ss = vec_splat_s16(5);
-  const vec_s16_t v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+    LOAD_ZERO;
+    const vec_u8_t perm = vec_lvsl(0, src);
+    const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+    const vec_u16_t v5us = vec_splat_u16(5);
+    const vec_s16_t v5ss = vec_splat_s16(5);
+    const vec_s16_t v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
 
-  uint8_t *srcbis = src - (srcStride * 2);
+    uint8_t *srcbis = src - (srcStride * 2);
 
-  const vec_u8_t srcM2a = vec_ld(0, srcbis);
-  const vec_u8_t srcM2b = vec_ld(16, srcbis);
-  const vec_u8_t srcM2 = vec_perm(srcM2a, srcM2b, perm);
-//  srcbis += srcStride;
-  const vec_u8_t srcM1a = vec_ld(0, srcbis += srcStride);
-  const vec_u8_t srcM1b = vec_ld(16, srcbis);
-  const vec_u8_t srcM1 = vec_perm(srcM1a, srcM1b, perm);
-//  srcbis += srcStride;
-  const vec_u8_t srcP0a = vec_ld(0, srcbis += srcStride);
-  const vec_u8_t srcP0b = vec_ld(16, srcbis);
-  const vec_u8_t srcP0 = vec_perm(srcP0a, srcP0b, perm);
-//  srcbis += srcStride;
-  const vec_u8_t srcP1a = vec_ld(0, srcbis += srcStride);
-  const vec_u8_t srcP1b = vec_ld(16, srcbis);
-  const vec_u8_t srcP1 = vec_perm(srcP1a, srcP1b, perm);
-//  srcbis += srcStride;
-  const vec_u8_t srcP2a = vec_ld(0, srcbis += srcStride);
-  const vec_u8_t srcP2b = vec_ld(16, srcbis);
-  const vec_u8_t srcP2 = vec_perm(srcP2a, srcP2b, perm);
-//  srcbis += srcStride;
+    const vec_u8_t srcM2a = vec_ld(0, srcbis);
+    const vec_u8_t srcM2b = vec_ld(16, srcbis);
+    const vec_u8_t srcM2 = vec_perm(srcM2a, srcM2b, perm);
+    //srcbis += srcStride;
+    const vec_u8_t srcM1a = vec_ld(0, srcbis += srcStride);
+    const vec_u8_t srcM1b = vec_ld(16, srcbis);
+    const vec_u8_t srcM1 = vec_perm(srcM1a, srcM1b, perm);
+    //srcbis += srcStride;
+    const vec_u8_t srcP0a = vec_ld(0, srcbis += srcStride);
+    const vec_u8_t srcP0b = vec_ld(16, srcbis);
+    const vec_u8_t srcP0 = vec_perm(srcP0a, srcP0b, perm);
+    //srcbis += srcStride;
+    const vec_u8_t srcP1a = vec_ld(0, srcbis += srcStride);
+    const vec_u8_t srcP1b = vec_ld(16, srcbis);
+    const vec_u8_t srcP1 = vec_perm(srcP1a, srcP1b, perm);
+    //srcbis += srcStride;
+    const vec_u8_t srcP2a = vec_ld(0, srcbis += srcStride);
+    const vec_u8_t srcP2b = vec_ld(16, srcbis);
+    const vec_u8_t srcP2 = vec_perm(srcP2a, srcP2b, perm);
+    //srcbis += srcStride;
 
-  vec_s16_t srcM2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
-  vec_s16_t srcM2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
-  vec_s16_t srcM1ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
-  vec_s16_t srcM1ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
-  vec_s16_t srcP0ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
-  vec_s16_t srcP0ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
-  vec_s16_t srcP1ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
-  vec_s16_t srcP1ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
-  vec_s16_t srcP2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
-  vec_s16_t srcP2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
+    vec_s16_t srcM2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
+    vec_s16_t srcM2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
+    vec_s16_t srcM1ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
+    vec_s16_t srcM1ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
+    vec_s16_t srcP0ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
+    vec_s16_t srcP0ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
+    vec_s16_t srcP1ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
+    vec_s16_t srcP1ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
+    vec_s16_t srcP2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
+    vec_s16_t srcP2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
 
-  vec_s16_t pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
-                      psumA, psumB, sumA, sumB,
-                      srcP3ssA, srcP3ssB,
-                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
+    vec_s16_t pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+              psumA, psumB, sumA, sumB,
+              srcP3ssA, srcP3ssB,
+              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
 
-  vec_u8_t sum, vdst, fsum, srcP3a, srcP3b, srcP3;
+    vec_u8_t sum, vdst, fsum, srcP3a, srcP3b, srcP3;
 
-  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+    POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
 
-  for (i = 0 ; i < 16 ; i++) {
-    srcP3a = vec_ld(0, srcbis += srcStride);
-    srcP3b = vec_ld(16, srcbis);
-    srcP3 = vec_perm(srcP3a, srcP3b, perm);
-    srcP3ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
-    srcP3ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
-//    srcbis += srcStride;
+    for (i = 0 ; i < 16 ; i++) {
+        srcP3a = vec_ld(0, srcbis += srcStride);
+        srcP3b = vec_ld(16, srcbis);
+        srcP3 = vec_perm(srcP3a, srcP3b, perm);
+        srcP3ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
+        srcP3ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
+        //srcbis += srcStride;
 
-    sum1A = vec_adds(srcP0ssA, srcP1ssA);
-    sum1B = vec_adds(srcP0ssB, srcP1ssB);
-    sum2A = vec_adds(srcM1ssA, srcP2ssA);
-    sum2B = vec_adds(srcM1ssB, srcP2ssB);
-    sum3A = vec_adds(srcM2ssA, srcP3ssA);
-    sum3B = vec_adds(srcM2ssB, srcP3ssB);
+        sum1A = vec_adds(srcP0ssA, srcP1ssA);
+        sum1B = vec_adds(srcP0ssB, srcP1ssB);
+        sum2A = vec_adds(srcM1ssA, srcP2ssA);
+        sum2B = vec_adds(srcM1ssB, srcP2ssB);
+        sum3A = vec_adds(srcM2ssA, srcP3ssA);
+        sum3B = vec_adds(srcM2ssB, srcP3ssB);
 
-    srcM2ssA = srcM1ssA;
-    srcM2ssB = srcM1ssB;
-    srcM1ssA = srcP0ssA;
-    srcM1ssB = srcP0ssB;
-    srcP0ssA = srcP1ssA;
-    srcP0ssB = srcP1ssB;
-    srcP1ssA = srcP2ssA;
-    srcP1ssB = srcP2ssB;
-    srcP2ssA = srcP3ssA;
-    srcP2ssB = srcP3ssB;
+        srcM2ssA = srcM1ssA;
+        srcM2ssB = srcM1ssB;
+        srcM1ssA = srcP0ssA;
+        srcM1ssB = srcP0ssB;
+        srcP0ssA = srcP1ssA;
+        srcP0ssB = srcP1ssB;
+        srcP1ssA = srcP2ssA;
+        srcP1ssB = srcP2ssB;
+        srcP2ssA = srcP3ssA;
+        srcP2ssB = srcP3ssB;
 
-    pp1A = vec_mladd(sum1A, v20ss, v16ss);
-    pp1B = vec_mladd(sum1B, v20ss, v16ss);
+        pp1A = vec_mladd(sum1A, v20ss, v16ss);
+        pp1B = vec_mladd(sum1B, v20ss, v16ss);
 
-    pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
-    pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
+        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
+        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
 
-    pp3A = vec_add(sum3A, pp1A);
-    pp3B = vec_add(sum3B, pp1B);
+        pp3A = vec_add(sum3A, pp1A);
+        pp3B = vec_add(sum3B, pp1B);
 
-    psumA = vec_sub(pp3A, pp2A);
-    psumB = vec_sub(pp3B, pp2B);
+        psumA = vec_sub(pp3A, pp2A);
+        psumB = vec_sub(pp3B, pp2B);
 
-    sumA = vec_sra(psumA, v5us);
-    sumB = vec_sra(psumB, v5us);
+        sumA = vec_sra(psumA, v5us);
+        sumB = vec_sra(psumB, v5us);
 
-    sum = vec_packsu(sumA, sumB);
+        sum = vec_packsu(sumA, sumB);
 
-    ASSERT_ALIGNED(dst);
-    vdst = vec_ld(0, dst);
+        ASSERT_ALIGNED(dst);
+        vdst = vec_ld(0, dst);
 
-    OP_U8_ALTIVEC(fsum, sum, vdst);
+        OP_U8_ALTIVEC(fsum, sum, vdst);
 
-    vec_st(fsum, 0, dst);
+        vec_st(fsum, 0, dst);
 
-    dst += dstStride;
-  }
-  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+        dst += dstStride;
+    }
+    POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
 }
 
 /* this code assume stride % 16 == 0 *and* tmp is properly aligned */
 static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
-  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
-  register int i;
-  LOAD_ZERO;
-  const vec_u8_t permM2 = vec_lvsl(-2, src);
-  const vec_u8_t permM1 = vec_lvsl(-1, src);
-  const vec_u8_t permP0 = vec_lvsl(+0, src);
-  const vec_u8_t permP1 = vec_lvsl(+1, src);
-  const vec_u8_t permP2 = vec_lvsl(+2, src);
-  const vec_u8_t permP3 = vec_lvsl(+3, src);
-  const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
-  const vec_u32_t v10ui = vec_splat_u32(10);
-  const vec_s16_t v5ss = vec_splat_s16(5);
-  const vec_s16_t v1ss = vec_splat_s16(1);
-  const vec_s32_t v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
-  const vec_u32_t v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
+    POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+    register int i;
+    LOAD_ZERO;
+    const vec_u8_t permM2 = vec_lvsl(-2, src);
+    const vec_u8_t permM1 = vec_lvsl(-1, src);
+    const vec_u8_t permP0 = vec_lvsl(+0, src);
+    const vec_u8_t permP1 = vec_lvsl(+1, src);
+    const vec_u8_t permP2 = vec_lvsl(+2, src);
+    const vec_u8_t permP3 = vec_lvsl(+3, src);
+    const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+    const vec_u32_t v10ui = vec_splat_u32(10);
+    const vec_s16_t v5ss = vec_splat_s16(5);
+    const vec_s16_t v1ss = vec_splat_s16(1);
+    const vec_s32_t v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
+    const vec_u32_t v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
 
-  register int align = ((((unsigned long)src) - 2) % 16);
+    register int align = ((((unsigned long)src) - 2) % 16);
 
-  vec_s16_t srcP0A, srcP0B, srcP1A, srcP1B,
-                      srcP2A, srcP2B, srcP3A, srcP3B,
-                      srcM1A, srcM1B, srcM2A, srcM2B,
-                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
-                      pp1A, pp1B, pp2A, pp2B, psumA, psumB;
+    vec_s16_t srcP0A, srcP0B, srcP1A, srcP1B,
+              srcP2A, srcP2B, srcP3A, srcP3B,
+              srcM1A, srcM1B, srcM2A, srcM2B,
+              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+              pp1A, pp1B, pp2A, pp2B, psumA, psumB;
 
-  const vec_u8_t mperm = (const vec_u8_t)
-    AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
-        0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
-  int16_t *tmpbis = tmp;
+    const vec_u8_t mperm = (const vec_u8_t)
+      AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
+          0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
+    int16_t *tmpbis = tmp;
 
-  vec_s16_t tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
-                      tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
-                      tmpP2ssA, tmpP2ssB;
+    vec_s16_t tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
+              tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
+              tmpP2ssA, tmpP2ssB;
 
-  vec_s32_t pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
-                    pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
-                    pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
-                    ssumAe, ssumAo, ssumBe, ssumBo;
-  vec_u8_t fsum, sumv, sum, vdst;
-  vec_s16_t ssume, ssumo;
+    vec_s32_t pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
+              pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
+              pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
+              ssumAe, ssumAo, ssumBe, ssumBo;
+    vec_u8_t fsum, sumv, sum, vdst;
+    vec_s16_t ssume, ssumo;
 
-  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
-  src -= (2 * srcStride);
-  for (i = 0 ; i < 21 ; i ++) {
-    vec_u8_t srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
-    vec_u8_t srcR1 = vec_ld(-2, src);
-    vec_u8_t srcR2 = vec_ld(14, src);
+    POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+    src -= (2 * srcStride);
+    for (i = 0 ; i < 21 ; i ++) {
+        vec_u8_t srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+        vec_u8_t srcR1 = vec_ld(-2, src);
+        vec_u8_t srcR2 = vec_ld(14, src);
 
-    switch (align) {
-    default: {
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = vec_perm(srcR1, srcR2, permP2);
-      srcP3 = vec_perm(srcR1, srcR2, permP3);
-    } break;
-    case 11: {
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = vec_perm(srcR1, srcR2, permP2);
-      srcP3 = srcR2;
-    } break;
-    case 12: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = srcR2;
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 13: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = srcR2;
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 14: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = srcR2;
-      srcP1 = vec_perm(srcR2, srcR3, permP1);
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 15: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = srcR2;
-      srcP0 = vec_perm(srcR2, srcR3, permP0);
-      srcP1 = vec_perm(srcR2, srcR3, permP1);
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
+        switch (align) {
+        default: {
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = vec_perm(srcR1, srcR2, permP2);
+            srcP3 = vec_perm(srcR1, srcR2, permP3);
+        } break;
+        case 11: {
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = vec_perm(srcR1, srcR2, permP2);
+            srcP3 = srcR2;
+        } break;
+        case 12: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = srcR2;
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 13: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = srcR2;
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 14: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = srcR2;
+            srcP1 = vec_perm(srcR2, srcR3, permP1);
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 15: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = srcR2;
+            srcP0 = vec_perm(srcR2, srcR3, permP0);
+            srcP1 = vec_perm(srcR2, srcR3, permP1);
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        }
+
+        srcP0A = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
+        srcP0B = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
+        srcP1A = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
+        srcP1B = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
+
+        srcP2A = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
+        srcP2B = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
+        srcP3A = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
+        srcP3B = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
+
+        srcM1A = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
+        srcM1B = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
+        srcM2A = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
+        srcM2B = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
+
+        sum1A = vec_adds(srcP0A, srcP1A);
+        sum1B = vec_adds(srcP0B, srcP1B);
+        sum2A = vec_adds(srcM1A, srcP2A);
+        sum2B = vec_adds(srcM1B, srcP2B);
+        sum3A = vec_adds(srcM2A, srcP3A);
+        sum3B = vec_adds(srcM2B, srcP3B);
+
+        pp1A = vec_mladd(sum1A, v20ss, sum3A);
+        pp1B = vec_mladd(sum1B, v20ss, sum3B);
+
+        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
+        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
+
+        psumA = vec_sub(pp1A, pp2A);
+        psumB = vec_sub(pp1B, pp2B);
+
+        vec_st(psumA, 0, tmp);
+        vec_st(psumB, 16, tmp);
+
+        src += srcStride;
+        tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
     }
 
-    srcP0A = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
-    srcP0B = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
-    srcP1A = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
-    srcP1B = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
-
-    srcP2A = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
-    srcP2B = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
-    srcP3A = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
-    srcP3B = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
-
-    srcM1A = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
-    srcM1B = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
-    srcM2A = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
-    srcM2B = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
-
-    sum1A = vec_adds(srcP0A, srcP1A);
-    sum1B = vec_adds(srcP0B, srcP1B);
-    sum2A = vec_adds(srcM1A, srcP2A);
-    sum2B = vec_adds(srcM1B, srcP2B);
-    sum3A = vec_adds(srcM2A, srcP3A);
-    sum3B = vec_adds(srcM2B, srcP3B);
-
-    pp1A = vec_mladd(sum1A, v20ss, sum3A);
-    pp1B = vec_mladd(sum1B, v20ss, sum3B);
-
-    pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
-    pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
-
-    psumA = vec_sub(pp1A, pp2A);
-    psumB = vec_sub(pp1B, pp2B);
-
-    vec_st(psumA, 0, tmp);
-    vec_st(psumB, 16, tmp);
-
-    src += srcStride;
-    tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
-  }
-
-  tmpM2ssA = vec_ld(0, tmpbis);
-  tmpM2ssB = vec_ld(16, tmpbis);
-  tmpbis += tmpStride;
-  tmpM1ssA = vec_ld(0, tmpbis);
-  tmpM1ssB = vec_ld(16, tmpbis);
-  tmpbis += tmpStride;
-  tmpP0ssA = vec_ld(0, tmpbis);
-  tmpP0ssB = vec_ld(16, tmpbis);
-  tmpbis += tmpStride;
-  tmpP1ssA = vec_ld(0, tmpbis);
-  tmpP1ssB = vec_ld(16, tmpbis);
-  tmpbis += tmpStride;
-  tmpP2ssA = vec_ld(0, tmpbis);
-  tmpP2ssB = vec_ld(16, tmpbis);
-  tmpbis += tmpStride;
-
-  for (i = 0 ; i < 16 ; i++) {
-    const vec_s16_t tmpP3ssA = vec_ld(0, tmpbis);
-    const vec_s16_t tmpP3ssB = vec_ld(16, tmpbis);
-
-    const vec_s16_t sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
-    const vec_s16_t sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
-    const vec_s16_t sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
-    const vec_s16_t sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
-    const vec_s16_t sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
-    const vec_s16_t sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
-
+    tmpM2ssA = vec_ld(0, tmpbis);
+    tmpM2ssB = vec_ld(16, tmpbis);
+    tmpbis += tmpStride;
+    tmpM1ssA = vec_ld(0, tmpbis);
+    tmpM1ssB = vec_ld(16, tmpbis);
+    tmpbis += tmpStride;
+    tmpP0ssA = vec_ld(0, tmpbis);
+    tmpP0ssB = vec_ld(16, tmpbis);
+    tmpbis += tmpStride;
+    tmpP1ssA = vec_ld(0, tmpbis);
+    tmpP1ssB = vec_ld(16, tmpbis);
+    tmpbis += tmpStride;
+    tmpP2ssA = vec_ld(0, tmpbis);
+    tmpP2ssB = vec_ld(16, tmpbis);
     tmpbis += tmpStride;
 
-    tmpM2ssA = tmpM1ssA;
-    tmpM2ssB = tmpM1ssB;
-    tmpM1ssA = tmpP0ssA;
-    tmpM1ssB = tmpP0ssB;
-    tmpP0ssA = tmpP1ssA;
-    tmpP0ssB = tmpP1ssB;
-    tmpP1ssA = tmpP2ssA;
-    tmpP1ssB = tmpP2ssB;
-    tmpP2ssA = tmpP3ssA;
-    tmpP2ssB = tmpP3ssB;
+    for (i = 0 ; i < 16 ; i++) {
+        const vec_s16_t tmpP3ssA = vec_ld(0, tmpbis);
+        const vec_s16_t tmpP3ssB = vec_ld(16, tmpbis);
+
+        const vec_s16_t sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
+        const vec_s16_t sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
+        const vec_s16_t sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
+        const vec_s16_t sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
+        const vec_s16_t sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
+        const vec_s16_t sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
+
+        tmpbis += tmpStride;
 
-    pp1Ae = vec_mule(sum1A, v20ss);
-    pp1Ao = vec_mulo(sum1A, v20ss);
-    pp1Be = vec_mule(sum1B, v20ss);
-    pp1Bo = vec_mulo(sum1B, v20ss);
-
-    pp2Ae = vec_mule(sum2A, v5ss);
-    pp2Ao = vec_mulo(sum2A, v5ss);
-    pp2Be = vec_mule(sum2B, v5ss);
-    pp2Bo = vec_mulo(sum2B, v5ss);
+        tmpM2ssA = tmpM1ssA;
+        tmpM2ssB = tmpM1ssB;
+        tmpM1ssA = tmpP0ssA;
+        tmpM1ssB = tmpP0ssB;
+        tmpP0ssA = tmpP1ssA;
+        tmpP0ssB = tmpP1ssB;
+        tmpP1ssA = tmpP2ssA;
+        tmpP1ssB = tmpP2ssB;
+        tmpP2ssA = tmpP3ssA;
+        tmpP2ssB = tmpP3ssB;
 
-    pp3Ae = vec_sra((vec_s32_t)sum3A, v16ui);
-    pp3Ao = vec_mulo(sum3A, v1ss);
-    pp3Be = vec_sra((vec_s32_t)sum3B, v16ui);
-    pp3Bo = vec_mulo(sum3B, v1ss);
+        pp1Ae = vec_mule(sum1A, v20ss);
+        pp1Ao = vec_mulo(sum1A, v20ss);
+        pp1Be = vec_mule(sum1B, v20ss);
+        pp1Bo = vec_mulo(sum1B, v20ss);
 
-    pp1cAe = vec_add(pp1Ae, v512si);
-    pp1cAo = vec_add(pp1Ao, v512si);
-    pp1cBe = vec_add(pp1Be, v512si);
-    pp1cBo = vec_add(pp1Bo, v512si);
+        pp2Ae = vec_mule(sum2A, v5ss);
+        pp2Ao = vec_mulo(sum2A, v5ss);
+        pp2Be = vec_mule(sum2B, v5ss);
+        pp2Bo = vec_mulo(sum2B, v5ss);
 
-    pp32Ae = vec_sub(pp3Ae, pp2Ae);
-    pp32Ao = vec_sub(pp3Ao, pp2Ao);
-    pp32Be = vec_sub(pp3Be, pp2Be);
-    pp32Bo = vec_sub(pp3Bo, pp2Bo);
+        pp3Ae = vec_sra((vec_s32_t)sum3A, v16ui);
+        pp3Ao = vec_mulo(sum3A, v1ss);
+        pp3Be = vec_sra((vec_s32_t)sum3B, v16ui);
+        pp3Bo = vec_mulo(sum3B, v1ss);
+
+        pp1cAe = vec_add(pp1Ae, v512si);
+        pp1cAo = vec_add(pp1Ao, v512si);
+        pp1cBe = vec_add(pp1Be, v512si);
+        pp1cBo = vec_add(pp1Bo, v512si);
 
-    sumAe = vec_add(pp1cAe, pp32Ae);
-    sumAo = vec_add(pp1cAo, pp32Ao);
-    sumBe = vec_add(pp1cBe, pp32Be);
-    sumBo = vec_add(pp1cBo, pp32Bo);
+        pp32Ae = vec_sub(pp3Ae, pp2Ae);
+        pp32Ao = vec_sub(pp3Ao, pp2Ao);
+        pp32Be = vec_sub(pp3Be, pp2Be);
+        pp32Bo = vec_sub(pp3Bo, pp2Bo);
 
-    ssumAe = vec_sra(sumAe, v10ui);
-    ssumAo = vec_sra(sumAo, v10ui);
-    ssumBe = vec_sra(sumBe, v10ui);
-    ssumBo = vec_sra(sumBo, v10ui);
+        sumAe = vec_add(pp1cAe, pp32Ae);
+        sumAo = vec_add(pp1cAo, pp32Ao);
+        sumBe = vec_add(pp1cBe, pp32Be);
+        sumBo = vec_add(pp1cBo, pp32Bo);
 
-    ssume = vec_packs(ssumAe, ssumBe);
-    ssumo = vec_packs(ssumAo, ssumBo);
+        ssumAe = vec_sra(sumAe, v10ui);
+        ssumAo = vec_sra(sumAo, v10ui);
+        ssumBe = vec_sra(sumBe, v10ui);
+        ssumBo = vec_sra(sumBo, v10ui);
 
-    sumv = vec_packsu(ssume, ssumo);
-    sum = vec_perm(sumv, sumv, mperm);
+        ssume = vec_packs(ssumAe, ssumBe);
+        ssumo = vec_packs(ssumAo, ssumBo);
 
-    ASSERT_ALIGNED(dst);
-    vdst = vec_ld(0, dst);
+        sumv = vec_packsu(ssume, ssumo);
+        sum = vec_perm(sumv, sumv, mperm);
 
-    OP_U8_ALTIVEC(fsum, sum, vdst);
+        ASSERT_ALIGNED(dst);
+        vdst = vec_ld(0, dst);
 
-    vec_st(fsum, 0, dst);
+        OP_U8_ALTIVEC(fsum, sum, vdst);
 
-    dst += dstStride;
-  }
-  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+        vec_st(fsum, 0, dst);
+
+        dst += dstStride;
+    }
+    POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
 }
--- a/ppc/idct_altivec.c	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/idct_altivec.c	Sun Jul 20 18:58:30 2008 +0000
@@ -22,7 +22,6 @@
  * NOTE: This code is based on GPL code from the libmpeg2 project.  The
  * author, Michel Lespinasses, has given explicit permission to release
  * under LGPL as part of ffmpeg.
- *
  */
 
 /*
--- a/ppc/imgresample_altivec.c	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/imgresample_altivec.c	Sun Jul 20 18:58:30 2008 +0000
@@ -46,8 +46,7 @@
     vector signed short zeros, sumhv, sumlv;
     s = src;
 
-    for(i=0;i<4;i++)
-    {
+    for(i=0;i<4;i++) {
         /*
            The vec_madds later on does an implicit >>15 on the result.
            Since FILTER_BITS is 8, and we have 15 bits of magnitude in
@@ -86,13 +85,11 @@
 
     /* Do our altivec resampling on 16 pixels at once. */
     while(dst_width>=16) {
-        /*
-           Read 16 (potentially unaligned) bytes from each of
+        /* Read 16 (potentially unaligned) bytes from each of
            4 lines into 4 vectors, and split them into shorts.
            Interleave the multipy/accumulate for the resample
            filter with the loads to hide the 3 cycle latency
-           the vec_madds have.
-        */
+           the vec_madds have. */
         tv = (vector unsigned char *) &s[0 * wrap];
         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
         srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
@@ -121,10 +118,8 @@
         sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
         sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
 
-        /*
-           Pack the results into our destination vector,
-           and do an aligned write of that back to memory.
-        */
+        /* Pack the results into our destination vector,
+           and do an aligned write of that back to memory. */
         dstv = vec_packsu(sumhv, sumlv) ;
         vec_st(dstv, 0, (vector unsigned char *) dst);
 
@@ -133,10 +128,8 @@
         dst_width-=16;
     }
 
-    /*
-       If there are any leftover pixels, resample them
-       with the slow scalar method.
-    */
+    /* If there are any leftover pixels, resample them
+       with the slow scalar method. */
     while(dst_width>0) {
         sum = s[0 * wrap] * filter[0] +
         s[1 * wrap] * filter[1] +
--- a/ppc/int_altivec.c	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/int_altivec.c	Sun Jul 20 18:58:30 2008 +0000
@@ -38,7 +38,7 @@
     vector signed short vpix2, vdiff, vpix1l,vpix1h;
     union { vector signed int vscore;
             int32_t score[4];
-           } u;
+          } u;
     u.vscore = vec_splat_s32(0);
 //
 //XXX lazy way, fix it later
--- a/ppc/mathops.h	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/mathops.h	Sun Jul 20 18:58:30 2008 +0000
@@ -25,14 +25,14 @@
 
 #if defined(ARCH_POWERPC_405)
 /* signed 16x16 -> 32 multiply add accumulate */
-#   define MAC16(rt, ra, rb) \
-        asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+#define MAC16(rt, ra, rb) \
+    asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
 
 /* signed 16x16 -> 32 multiply */
-#   define MUL16(ra, rb) \
-        ({ int __rt; \
-         asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
-         __rt; })
+#define MUL16(ra, rb) \
+    ({ int __rt; \
+    asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
+    __rt; })
 #endif
 
 #endif /* FFMPEG_PPC_MATHOPS_H */
--- a/ppc/mpegvideo_altivec.c	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/mpegvideo_altivec.c	Sun Jul 20 18:58:30 2008 +0000
@@ -41,15 +41,15 @@
 // transposes a matrix consisting of four vectors with four elements each
 #define TRANSPOSE4(a,b,c,d) \
 do { \
-  __typeof__(a) _trans_ach = vec_mergeh(a, c); \
-  __typeof__(a) _trans_acl = vec_mergel(a, c); \
-  __typeof__(a) _trans_bdh = vec_mergeh(b, d); \
-  __typeof__(a) _trans_bdl = vec_mergel(b, d); \
- \
-  a = vec_mergeh(_trans_ach, _trans_bdh); \
-  b = vec_mergel(_trans_ach, _trans_bdh); \
-  c = vec_mergeh(_trans_acl, _trans_bdl); \
-  d = vec_mergel(_trans_acl, _trans_bdl); \
+    __typeof__(a) _trans_ach = vec_mergeh(a, c); \
+    __typeof__(a) _trans_acl = vec_mergel(a, c); \
+    __typeof__(a) _trans_bdh = vec_mergeh(b, d); \
+    __typeof__(a) _trans_bdl = vec_mergel(b, d); \
+                                                 \
+    a = vec_mergeh(_trans_ach, _trans_bdh);      \
+    b = vec_mergel(_trans_ach, _trans_bdh);      \
+    c = vec_mergeh(_trans_acl, _trans_bdl);      \
+    d = vec_mergel(_trans_acl, _trans_bdl);      \
 } while (0)
 
 
@@ -58,19 +58,19 @@
 // target address is four-byte aligned (which should be always).
 #define LOAD4(vec, address) \
 { \
-    __typeof__(vec)* _load_addr = (__typeof__(vec)*)(address); \
-    vector unsigned char _perm_vec = vec_lvsl(0,(address)); \
-    vec = vec_ld(0, _load_addr); \
-    vec = vec_perm(vec, vec, _perm_vec); \
-    vec = vec_splat(vec, 0); \
+    __typeof__(vec)* _load_addr = (__typeof__(vec)*)(address);  \
+    vector unsigned char _perm_vec = vec_lvsl(0,(address));     \
+    vec = vec_ld(0, _load_addr);                                \
+    vec = vec_perm(vec, vec, _perm_vec);                        \
+    vec = vec_splat(vec, 0);                                    \
 }
 
 
 #define FOUROF(a) AVV(a,a,a,a)
 
 int dct_quantize_altivec(MpegEncContext* s,
-                        DCTELEM* data, int n,
-                        int qscale, int* overflow)
+                         DCTELEM* data, int n,
+                         int qscale, int* overflow)
 {
     int lastNonZero;
     vector float row0, row1, row2, row3, row4, row5, row6, row7;
@@ -137,10 +137,8 @@
 
         int whichPass, whichHalf;
 
-        for(whichPass = 1; whichPass<=2; whichPass++)
-        {
-            for(whichHalf = 1; whichHalf<=2; whichHalf++)
-            {
+        for(whichPass = 1; whichPass<=2; whichPass++) {
+            for(whichHalf = 1; whichHalf<=2; whichHalf++) {
                 vector float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
                 vector float tmp10, tmp11, tmp12, tmp13;
                 vector float z1, z2, z3, z4, z5;
@@ -235,8 +233,7 @@
                 SWAP(row7, alt7);
             }
 
-            if (whichPass == 1)
-            {
+            if (whichPass == 1) {
                 // transpose the data for the second pass
 
                 // First, block transpose the upper right with lower left.
@@ -261,8 +258,7 @@
         const vector signed int* qmat;
         vector float bias, negBias;
 
-        if (s->mb_intra)
-        {
+        if (s->mb_intra) {
             vector signed int baseVector;
 
             // We must cache element 0 in the intra case
@@ -272,9 +268,7 @@
 
             qmat = (vector signed int*)s->q_intra_matrix[qscale];
             biasAddr = &(s->intra_quant_bias);
-        }
-        else
-        {
+        } else {
             qmat = (vector signed int*)s->q_inter_matrix[qscale];
             biasAddr = &(s->inter_quant_bias);
         }
@@ -439,8 +433,7 @@
         // and handle it using the vector unit if we can.  This is the permute used
         // by the altivec idct, so it is common when using the altivec dct.
 
-        if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM))
-        {
+        if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) {
             TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);
         }
 
@@ -456,10 +449,8 @@
     }
 
     // special handling of block[0]
-    if (s->mb_intra)
-    {
-        if (!s->h263_aic)
-        {
+    if (s->mb_intra) {
+        if (!s->h263_aic) {
             if (n < 4)
                 oldBaseValue /= s->y_dc_scale;
             else
@@ -474,8 +465,7 @@
     // need to permute the "no" permutation case.
     if ((lastNonZero > 0) &&
         (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) &&
-        (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM))
-    {
+        (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) {
         ff_block_permute(data, s->dsp.idct_permutation,
                 s->intra_scantable.scantable, lastNonZero);
     }
@@ -483,10 +473,8 @@
     return lastNonZero;
 }
 
-/*
-  AltiVec version of dct_unquantize_h263
-  this code assumes `block' is 16 bytes-aligned
-*/
+/* AltiVec version of dct_unquantize_h263
+   this code assumes `block' is 16 bytes-aligned */
 void dct_unquantize_h263_altivec(MpegEncContext *s,
                                  DCTELEM *block, int n, int qscale)
 {
@@ -517,82 +505,81 @@
     }
 
     {
-      register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);
-      DECLARE_ALIGNED_16(short, qmul8[]) =
-          {
-            qmul, qmul, qmul, qmul,
-            qmul, qmul, qmul, qmul
-          };
-      DECLARE_ALIGNED_16(short, qadd8[]) =
-          {
-            qadd, qadd, qadd, qadd,
-            qadd, qadd, qadd, qadd
-          };
-      DECLARE_ALIGNED_16(short, nqadd8[]) =
-          {
-            -qadd, -qadd, -qadd, -qadd,
-            -qadd, -qadd, -qadd, -qadd
-          };
-      register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
-      register vector bool short blockv_null, blockv_neg;
-      register short backup_0 = block[0];
-      register int j = 0;
+        register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);
+        DECLARE_ALIGNED_16(short, qmul8[]) =
+            {
+              qmul, qmul, qmul, qmul,
+              qmul, qmul, qmul, qmul
+            };
+        DECLARE_ALIGNED_16(short, qadd8[]) =
+            {
+              qadd, qadd, qadd, qadd,
+              qadd, qadd, qadd, qadd
+            };
+        DECLARE_ALIGNED_16(short, nqadd8[]) =
+            {
+              -qadd, -qadd, -qadd, -qadd,
+              -qadd, -qadd, -qadd, -qadd
+            };
+        register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
+        register vector bool short blockv_null, blockv_neg;
+        register short backup_0 = block[0];
+        register int j = 0;
 
-      qmulv = vec_ld(0, qmul8);
-      qaddv = vec_ld(0, qadd8);
-      nqaddv = vec_ld(0, nqadd8);
+        qmulv = vec_ld(0, qmul8);
+        qaddv = vec_ld(0, qadd8);
+        nqaddv = vec_ld(0, nqadd8);
 
-#if 0 // block *is* 16 bytes-aligned, it seems.
-      // first make sure block[j] is 16 bytes-aligned
-      for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) {
-        level = block[j];
-        if (level) {
-          if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
+#if 0   // block *is* 16 bytes-aligned, it seems.
+        // first make sure block[j] is 16 bytes-aligned
+        for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) {
+            level = block[j];
+            if (level) {
+                if (level < 0) {
+                    level = level * qmul - qadd;
+                } else {
+                    level = level * qmul + qadd;
+                }
+                block[j] = level;
             }
-            block[j] = level;
         }
-      }
 #endif
 
-      // vectorize all the 16 bytes-aligned blocks
-      // of 8 elements
-      for(; (j + 7) <= nCoeffs ; j+=8)
-      {
-        blockv = vec_ld(j << 1, block);
-        blockv_neg = vec_cmplt(blockv, vczero);
-        blockv_null = vec_cmpeq(blockv, vczero);
-        // choose between +qadd or -qadd as the third operand
-        temp1 = vec_sel(qaddv, nqaddv, blockv_neg);
-        // multiply & add (block{i,i+7} * qmul [+-] qadd)
-        temp1 = vec_mladd(blockv, qmulv, temp1);
-        // put 0 where block[{i,i+7} used to have 0
-        blockv = vec_sel(temp1, blockv, blockv_null);
-        vec_st(blockv, j << 1, block);
-      }
+        // vectorize all the 16 bytes-aligned blocks
+        // of 8 elements
+        for(; (j + 7) <= nCoeffs ; j+=8) {
+            blockv = vec_ld(j << 1, block);
+            blockv_neg = vec_cmplt(blockv, vczero);
+            blockv_null = vec_cmpeq(blockv, vczero);
+            // choose between +qadd or -qadd as the third operand
+            temp1 = vec_sel(qaddv, nqaddv, blockv_neg);
+            // multiply & add (block{i,i+7} * qmul [+-] qadd)
+            temp1 = vec_mladd(blockv, qmulv, temp1);
+            // put 0 where block[{i,i+7} used to have 0
+            blockv = vec_sel(temp1, blockv, blockv_null);
+            vec_st(blockv, j << 1, block);
+        }
 
-      // if nCoeffs isn't a multiple of 8, finish the job
-      // using good old scalar units.
-      // (we could do it using a truncated vector,
-      // but I'm not sure it's worth the hassle)
-      for(; j <= nCoeffs ; j++) {
-        level = block[j];
-        if (level) {
-          if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
+        // if nCoeffs isn't a multiple of 8, finish the job
+        // using good old scalar units.
+        // (we could do it using a truncated vector,
+        // but I'm not sure it's worth the hassle)
+        for(; j <= nCoeffs ; j++) {
+            level = block[j];
+            if (level) {
+                if (level < 0) {
+                    level = level * qmul - qadd;
+                } else {
+                    level = level * qmul + qadd;
+                }
+                block[j] = level;
             }
-            block[j] = level;
         }
-      }
 
-      if (i == 1)
-      { // cheat. this avoid special-casing the first iteration
-        block[0] = backup_0;
-      }
+        if (i == 1) {
+            // cheat. this avoid special-casing the first iteration
+            block[0] = backup_0;
+        }
     }
 POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
 }
@@ -605,11 +592,9 @@
 {
     if ((mm_flags & MM_ALTIVEC) == 0) return;
 
-    if (s->avctx->lowres==0)
-    {
+    if (s->avctx->lowres==0) {
         if ((s->avctx->idct_algo == FF_IDCT_AUTO) ||
-                (s->avctx->idct_algo == FF_IDCT_ALTIVEC))
-        {
+            (s->avctx->idct_algo == FF_IDCT_ALTIVEC)) {
             s->dsp.idct_put = idct_put_altivec;
             s->dsp.idct_add = idct_add_altivec;
             s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
@@ -618,15 +603,13 @@
 
     // Test to make sure that the dct required alignments are met.
     if ((((long)(s->q_intra_matrix) & 0x0f) != 0) ||
-        (((long)(s->q_inter_matrix) & 0x0f) != 0))
-    {
+        (((long)(s->q_inter_matrix) & 0x0f) != 0)) {
         av_log(s->avctx, AV_LOG_INFO, "Internal Error: q-matrix blocks must be 16-byte aligned "
                 "to use AltiVec DCT. Reverting to non-AltiVec version.\n");
         return;
     }
 
-    if (((long)(s->intra_scantable.inverse) & 0x0f) != 0)
-    {
+    if (((long)(s->intra_scantable.inverse) & 0x0f) != 0) {
         av_log(s->avctx, AV_LOG_INFO, "Internal Error: scan table blocks must be 16-byte aligned "
                 "to use AltiVec DCT. Reverting to non-AltiVec version.\n");
         return;
@@ -634,8 +617,7 @@
 
 
     if ((s->avctx->dct_algo == FF_DCT_AUTO) ||
-            (s->avctx->dct_algo == FF_DCT_ALTIVEC))
-    {
+            (s->avctx->dct_algo == FF_DCT_ALTIVEC)) {
 #if 0 /* seems to cause trouble under some circumstances */
         s->dct_quantize = dct_quantize_altivec;
 #endif
--- a/ppc/snow_altivec.c	Sun Jul 20 18:06:41 2008 +0000
+++ b/ppc/snow_altivec.c	Sun Jul 20 18:58:30 2008 +0000
@@ -379,8 +379,7 @@
     v4=(vector signed int *)b4;
     v5=(vector signed int *)b5;
 
-    for (i=0; i< w4;i++)
-    {
+    for (i=0; i< w4;i++) {
 
     #if 0
         b4[i] -= (3*(b3[i] + b5[i])+4)>>3;
@@ -782,8 +781,8 @@
 void snow_init_altivec(DSPContext* c, AVCodecContext *avctx)
 {
 #if 0
-        c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec;
-        c->vertical_compose97i = ff_snow_vertical_compose97i_altivec;
-        c->inner_add_yblock = ff_snow_inner_add_yblock_altivec;
+    c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec;
+    c->vertical_compose97i = ff_snow_vertical_compose97i_altivec;
+    c->inner_add_yblock = ff_snow_inner_add_yblock_altivec;
 #endif
 }