changeset 28957:e538af2c69c1

Do not assume long is same width as x86 register.
author ramiro
date Wed, 18 Mar 2009 17:07:30 +0000
parents 6d0da4fd4544
children 99f0c79aba05
files libswscale/rgb2rgb_template.c libswscale/swscale.c libswscale/swscale_template.c libswscale/yuv2rgb.c libswscale/yuv2rgb_template.c
diffstat 5 files changed, 39 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/libswscale/rgb2rgb_template.c	Wed Mar 18 17:02:29 2009 +0000
+++ b/libswscale/rgb2rgb_template.c	Wed Mar 18 17:07:30 2009 +0000
@@ -1339,7 +1339,7 @@
 
 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
 {
-    long idx = 15 - src_size;
+    x86_reg idx = 15 - src_size;
     const uint8_t *s = src-idx;
     uint8_t *d = dst-idx;
 #if HAVE_MMX
@@ -1405,7 +1405,7 @@
 {
     unsigned i;
 #if HAVE_MMX
-    long mmx_size= 23 - src_size;
+    x86_reg mmx_size= 23 - src_size;
     __asm__ volatile (
     "test             %%"REG_a", %%"REG_a"          \n\t"
     "jns                     2f                     \n\t"
@@ -1476,7 +1476,7 @@
                                            long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
 {
     long y;
-    const long chromWidth= width>>1;
+    const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y++)
     {
 #if HAVE_MMX
@@ -1628,7 +1628,7 @@
                                            long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
 {
     long y;
-    const long chromWidth= width>>1;
+    const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y++)
     {
 #if HAVE_MMX
@@ -1758,7 +1758,7 @@
                                       long lumStride, long chromStride, long srcStride)
 {
     long y;
-    const long chromWidth= width>>1;
+    const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y+=2)
     {
 #if HAVE_MMX
@@ -1900,7 +1900,7 @@
 
     for (y=1; y<srcHeight; y++){
 #if HAVE_MMX2 || HAVE_AMD3DNOW
-        const long mmxSize= srcWidth&~15;
+        const x86_reg mmxSize= srcWidth&~15;
         __asm__ volatile(
         "mov           %4, %%"REG_a"            \n\t"
         "1:                                     \n\t"
@@ -1944,7 +1944,7 @@
 
         );
 #else
-        const long mmxSize=1;
+        const x86_reg mmxSize=1;
 #endif
         dst[0        ]= (3*src[0] +   src[srcStride])>>2;
         dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
@@ -1996,7 +1996,7 @@
                                       long lumStride, long chromStride, long srcStride)
 {
     long y;
-    const long chromWidth= width>>1;
+    const x86_reg chromWidth= width>>1;
     for (y=0; y<height; y+=2)
     {
 #if HAVE_MMX
@@ -2123,7 +2123,7 @@
                                        long lumStride, long chromStride, long srcStride)
 {
     long y;
-    const long chromWidth= width>>1;
+    const x86_reg chromWidth= width>>1;
 #if HAVE_MMX
     for (y=0; y<height-2; y+=2)
     {
@@ -2196,7 +2196,7 @@
             MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
             "add                        $8,      %%"REG_a"  \n\t"
             " js                        1b                  \n\t"
-            : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
+            : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
             : "%"REG_a, "%"REG_d
             );
             ydst += lumStride;
@@ -2440,7 +2440,7 @@
         "add                    $16, %%"REG_a"  \n\t"
         "cmp                     %3, %%"REG_a"  \n\t"
         " jb                     1b             \n\t"
-        ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
+        ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
         : "memory", "%"REG_a""
         );
 #else
@@ -2466,7 +2466,7 @@
         "add                    $16, %%"REG_a"  \n\t"
         "cmp                     %3, %%"REG_a"  \n\t"
         " jb                     1b             \n\t"
-        ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
+        ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
         : "memory", "%"REG_a
         );
 #endif
@@ -2501,7 +2501,8 @@
                                        long srcStride1, long srcStride2,
                                        long dstStride1, long dstStride2)
 {
-    long y,x,w,h;
+    x86_reg y;
+    long x,w,h;
     w=width/2; h=height/2;
 #if HAVE_MMX
     __asm__ volatile(
@@ -2604,7 +2605,8 @@
                                         long srcStride1, long srcStride2,
                                         long srcStride3, long dstStride)
 {
-    long y,x,w,h;
+    x86_reg x;
+    long y,w,h;
     w=width/2; h=height;
     for (y=0;y<h;y++){
     const uint8_t* yp=src1+srcStride1*y;
--- a/libswscale/swscale.c	Wed Mar 18 17:02:29 2009 +0000
+++ b/libswscale/swscale.c	Wed Mar 18 17:07:30 2009 +0000
@@ -1466,13 +1466,13 @@
 static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
 {
     uint8_t *fragmentA;
-    long imm8OfPShufW1A;
-    long imm8OfPShufW2A;
-    long fragmentLengthA;
+    x86_reg imm8OfPShufW1A;
+    x86_reg imm8OfPShufW2A;
+    x86_reg fragmentLengthA;
     uint8_t *fragmentB;
-    long imm8OfPShufW1B;
-    long imm8OfPShufW2B;
-    long fragmentLengthB;
+    x86_reg imm8OfPShufW1B;
+    x86_reg imm8OfPShufW2B;
+    x86_reg fragmentLengthB;
     int fragmentPos;
 
     int xpos, i;
--- a/libswscale/swscale_template.c	Wed Mar 18 17:02:29 2009 +0000
+++ b/libswscale/swscale_template.c	Wed Mar 18 17:07:30 2009 +0000
@@ -961,7 +961,7 @@
         long p= uDest ? 3 : 1;
         uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
         uint8_t *dst[3]= {dest, uDest, vDest};
-        long counter[3] = {dstW, chrDstW, chrDstW};
+        x86_reg counter[3] = {dstW, chrDstW, chrDstW};
 
         if (c->flags & SWS_ACCURATE_RND){
             while(p--){
@@ -1024,7 +1024,7 @@
                                        uint8_t *dest, long dstW, long dstY)
 {
 #if HAVE_MMX
-    long dummy=0;
+    x86_reg dummy=0;
     if(!(c->flags & SWS_BITEXACT)){
         if (c->flags & SWS_ACCURATE_RND){
             switch(c->dstFormat){
@@ -1515,7 +1515,7 @@
     "movq                %%mm0, (%2, %%"REG_a") \n\t"
     "add                    $8, %%"REG_a"       \n\t"
     " js                    1b                  \n\t"
-    : : "g" (-width), "r" (src+width*2), "r" (dst+width)
+    : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
     : "%"REG_a
     );
 #else
@@ -1546,7 +1546,7 @@
     "movd                %%mm1, (%2, %%"REG_a") \n\t"
     "add                    $4, %%"REG_a"       \n\t"
     " js                    1b                  \n\t"
-    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
+    : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
     : "%"REG_a
     );
 #else
@@ -1576,7 +1576,7 @@
     "movq              %%mm0, (%2, %%"REG_a")   \n\t"
     "add                  $8, %%"REG_a"         \n\t"
     " js                  1b                    \n\t"
-    : : "g" (-width), "r" (src+width*2), "r" (dst+width)
+    : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
     : "%"REG_a
     );
 #else
@@ -1607,7 +1607,7 @@
     "movd                %%mm1, (%2, %%"REG_a") \n\t"
     "add                    $4, %%"REG_a"       \n\t"
     " js                    1b                  \n\t"
-    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
+    : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
     : "%"REG_a
     );
 #else
@@ -1731,7 +1731,7 @@
         "add                        $4, %%"REG_a"   \n\t"
         " js                        1b              \n\t"
     : "+r" (src)
-    : "r" (dst+width), "g" (-width)
+    : "r" (dst+width), "g" ((x86_reg)-width)
     : "%"REG_a
     );
 }
@@ -1789,7 +1789,7 @@
         "add                        $4, %%"REG_a"   \n\t"
         " js                        1b              \n\t"
     : "+r" (src)
-    : "r" (dstU+width), "r" (dstV+width), "g" (-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0])
+    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0])
     : "%"REG_a
     );
 }
@@ -1951,7 +1951,7 @@
     assert(filterSize % 4 == 0 && filterSize>0);
     if (filterSize==4) // Always true for upscaling, sometimes for down, too.
     {
-        long counter= -2*dstW;
+        x86_reg counter= -2*dstW;
         filter-= counter*2;
         filterPos-= counter/2;
         dst-= counter/2;
@@ -1997,7 +1997,7 @@
     }
     else if (filterSize==8)
     {
-        long counter= -2*dstW;
+        x86_reg counter= -2*dstW;
         filter-= counter*4;
         filterPos-= counter/2;
         dst-= counter/2;
@@ -2055,7 +2055,7 @@
     else
     {
         uint8_t *offset = src+filterSize;
-        long counter= -2*dstW;
+        x86_reg counter= -2*dstW;
         //filter-= counter*filterSize/2;
         filterPos-= counter/2;
         dst-= counter/2;
@@ -2098,7 +2098,7 @@
 
         : "+r" (counter), "+r" (filter)
         : "m" (filterPos), "m" (dst), "m"(offset),
-          "m" (src), "r" (filterSize*2)
+          "m" (src), "r" ((x86_reg)filterSize*2)
         : "%"REG_a, "%"REG_c, "%"REG_d
         );
     }
@@ -2289,7 +2289,7 @@
         else
         {
 #endif /* HAVE_MMX2 */
-        long xInc_shr16 = xInc >> 16;
+        x86_reg xInc_shr16 = xInc >> 16;
         uint16_t xInc_mask = xInc & 0xffff;
         //NO MMX just normal asm ...
         __asm__ volatile(
@@ -2575,7 +2575,7 @@
         else
         {
 #endif /* HAVE_MMX2 */
-            long xInc_shr16 = (long) (xInc >> 16);
+            x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
             uint16_t xInc_mask = xInc & 0xffff;
             __asm__ volatile(
             "xor %%"REG_a", %%"REG_a"               \n\t" // i
@@ -2613,7 +2613,7 @@
 /* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
    which is needed to support GCC 4.0. */
 #if ARCH_X86_64 && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-            :: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
+            :: "m" (src1), "m" (dst), "g" ((x86_reg)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
 #else
             :: "m" (src1), "m" (dst), "m" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
 #endif
--- a/libswscale/yuv2rgb.c	Wed Mar 18 17:02:29 2009 +0000
+++ b/libswscale/yuv2rgb.c	Wed Mar 18 17:07:30 2009 +0000
@@ -33,6 +33,7 @@
 #include "rgb2rgb.h"
 #include "swscale.h"
 #include "swscale_internal.h"
+#include "libavutil/x86_cpu.h"
 
 #define DITHER1XBPP // only for MMX
 
--- a/libswscale/yuv2rgb_template.c	Wed Mar 18 17:02:29 2009 +0000
+++ b/libswscale/yuv2rgb_template.c	Wed Mar 18 17:07:30 2009 +0000
@@ -137,7 +137,7 @@
         uint8_t *py = src[0] + y*srcStride[0];                \
         uint8_t *pu = src[1] + (y>>1)*srcStride[1];           \
         uint8_t *pv = src[2] + (y>>1)*srcStride[2];           \
-        long index= -h_size/2;                                \
+        x86_reg index= -h_size/2;                                \
 
 #define YUV2RGB_INIT                                                       \
         /* This MMX assembly code deals with a SINGLE scan line at a time, \