changeset 27513:2550d0c5bcb6

Fix accurate rounding mode on x86_64. Fixes issue222.
author michael
date Sun, 07 Sep 2008 21:06:21 +0000
parents c1019e4096ca
children fa98d451036f
files libswscale/swscale.c libswscale/swscale_internal.h libswscale/swscale_template.c
diffstat 3 files changed, 34 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/libswscale/swscale.c	Sun Sep 07 18:22:29 2008 +0000
+++ b/libswscale/swscale.c	Sun Sep 07 21:06:21 2008 +0000
@@ -1293,7 +1293,7 @@
     filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
     assert(filterSize > 0);
     filter= av_malloc(filterSize*dstW*sizeof(double));
-    if (filterSize >= MAX_FILTER_SIZE || !filter)
+    if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
         goto error;
     *outFilterSize= filterSize;
 
--- a/libswscale/swscale_internal.h	Sun Sep 07 18:22:29 2008 +0000
+++ b/libswscale/swscale_internal.h	Sun Sep 07 21:06:21 2008 +0000
@@ -29,6 +29,8 @@
 
 #include "libavutil/avutil.h"
 
+#define STR(s)         AV_TOSTRING(s) //AV_STINGIFY is too long
+
 #define MAX_FILTER_SIZE 256
 
 #define VOFW 2048
@@ -40,6 +42,16 @@
 #define ALT32_CORR   1
 #endif
 
+#ifdef ARCH_X86_64
+#   define APCK_PTR2 8
+#   define APCK_COEF 16
+#   define APCK_SIZE 24
+#else
+#   define APCK_PTR2 4
+#   define APCK_COEF 8
+#   define APCK_SIZE 16
+#endif
+
 typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
              int srcSliceH, uint8_t* dst[], int dstStride[]);
 
--- a/libswscale/swscale_template.c	Sun Sep 07 18:22:29 2008 +0000
+++ b/libswscale/swscale_template.c	Sun Sep 07 21:06:21 2008 +0000
@@ -119,19 +119,19 @@
     "1:                                                 \n\t"\
     "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
     "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
-    "mov                       4(%%"REG_d"), %%"REG_S"  \n\t"\
+    "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
     "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
     "movq                             %%mm0, %%mm3      \n\t"\
     "punpcklwd                        %%mm1, %%mm0      \n\t"\
     "punpckhwd                        %%mm1, %%mm3      \n\t"\
-    "movq                      8(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
+    "movq       "STR(APCK_COEF)"(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
     "pmaddwd                          %%mm1, %%mm0      \n\t"\
     "pmaddwd                          %%mm1, %%mm3      \n\t"\
     "paddd                            %%mm0, %%mm4      \n\t"\
     "paddd                            %%mm3, %%mm5      \n\t"\
     "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
-    "mov                      16(%%"REG_d"), %%"REG_S"  \n\t"\
-    "add                                $16, %%"REG_d"  \n\t"\
+    "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
+    "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
     "test                         %%"REG_S", %%"REG_S"  \n\t"\
     "movq                             %%mm2, %%mm0      \n\t"\
     "punpcklwd                        %%mm3, %%mm2      \n\t"\
@@ -271,19 +271,19 @@
     "2:                                             \n\t"\
     "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
     "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
-    "mov                4(%%"REG_d"), %%"REG_S"     \n\t"\
+    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
     "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
     "movq                      %%mm0, %%mm3         \n\t"\
     "punpcklwd                 %%mm1, %%mm0         \n\t"\
     "punpckhwd                 %%mm1, %%mm3         \n\t"\
-    "movq               8(%%"REG_d"), %%mm1         \n\t" /* filterCoeff */\
+    "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\
     "pmaddwd                   %%mm1, %%mm0         \n\t"\
     "pmaddwd                   %%mm1, %%mm3         \n\t"\
     "paddd                     %%mm0, %%mm4         \n\t"\
     "paddd                     %%mm3, %%mm5         \n\t"\
     "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
-    "mov               16(%%"REG_d"), %%"REG_S"     \n\t"\
-    "add                         $16, %%"REG_d"     \n\t"\
+    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
+    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
     "test                  %%"REG_S", %%"REG_S"     \n\t"\
     "movq                      %%mm2, %%mm0         \n\t"\
     "punpcklwd                 %%mm3, %%mm2         \n\t"\
@@ -315,19 +315,19 @@
     "2:                                             \n\t"\
     "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
     "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
-    "mov                4(%%"REG_d"), %%"REG_S"     \n\t"\
+    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
     "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
     "movq                      %%mm0, %%mm3         \n\t"\
     "punpcklwd                 %%mm4, %%mm0         \n\t"\
     "punpckhwd                 %%mm4, %%mm3         \n\t"\
-    "movq               8(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
+    "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
     "pmaddwd                   %%mm4, %%mm0         \n\t"\
     "pmaddwd                   %%mm4, %%mm3         \n\t"\
     "paddd                     %%mm0, %%mm1         \n\t"\
     "paddd                     %%mm3, %%mm5         \n\t"\
     "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
-    "mov               16(%%"REG_d"), %%"REG_S"     \n\t"\
-    "add                         $16, %%"REG_d"     \n\t"\
+    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
+    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
     "test                  %%"REG_S", %%"REG_S"     \n\t"\
     "movq                      %%mm2, %%mm0         \n\t"\
     "punpcklwd                 %%mm3, %%mm2         \n\t"\
@@ -3180,18 +3180,19 @@
 #ifdef HAVE_MMX
             int i;
         if (flags & SWS_ACCURATE_RND){
+            int s= APCK_SIZE / 8;
             for (i=0; i<vLumFilterSize; i+=2){
-                lumMmxFilter[2*i+0]= (int32_t)lumSrcPtr[i  ];
-                lumMmxFilter[2*i+1]= (int32_t)lumSrcPtr[i+(vLumFilterSize>1)];
-                lumMmxFilter[2*i+2]=
-                lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i    ]
+                *(void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
+                *(void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
+                          lumMmxFilter[s*i+APCK_COEF/4  ]=
+                          lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
                     + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
             }
             for (i=0; i<vChrFilterSize; i+=2){
-                chrMmxFilter[2*i+0]= (int32_t)chrSrcPtr[i  ];
-                chrMmxFilter[2*i+1]= (int32_t)chrSrcPtr[i+(vChrFilterSize>1)];
-                chrMmxFilter[2*i+2]=
-                chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i    ]
+                *(void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
+                *(void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrSrcPtr[i+(vChrFilterSize>1)];
+                          chrMmxFilter[s*i+APCK_COEF/4  ]=
+                          chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
                     + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
             }
         }else{