Mercurial > mplayer.hg
changeset 27513:2550d0c5bcb6
Fix accurate rounding mode on x86_64.
Fixes issue222.
author | michael |
---|---|
date | Sun, 07 Sep 2008 21:06:21 +0000 (2008-09-07) |
parents | c1019e4096ca |
children | fa98d451036f |
files | libswscale/swscale.c libswscale/swscale_internal.h libswscale/swscale_template.c |
diffstat | 3 files changed, 34 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/libswscale/swscale.c Sun Sep 07 18:22:29 2008 +0000 +++ b/libswscale/swscale.c Sun Sep 07 21:06:21 2008 +0000 @@ -1293,7 +1293,7 @@ filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1)); assert(filterSize > 0); filter= av_malloc(filterSize*dstW*sizeof(double)); - if (filterSize >= MAX_FILTER_SIZE || !filter) + if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter) goto error; *outFilterSize= filterSize;
--- a/libswscale/swscale_internal.h Sun Sep 07 18:22:29 2008 +0000 +++ b/libswscale/swscale_internal.h Sun Sep 07 21:06:21 2008 +0000 @@ -29,6 +29,8 @@ #include "libavutil/avutil.h" +#define STR(s) AV_TOSTRING(s) //AV_STINGIFY is too long + #define MAX_FILTER_SIZE 256 #define VOFW 2048 @@ -40,6 +42,16 @@ #define ALT32_CORR 1 #endif +#ifdef ARCH_X86_64 +# define APCK_PTR2 8 +# define APCK_COEF 16 +# define APCK_SIZE 24 +#else +# define APCK_PTR2 4 +# define APCK_COEF 8 +# define APCK_SIZE 16 +#endif + typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]);
--- a/libswscale/swscale_template.c Sun Sep 07 18:22:29 2008 +0000 +++ b/libswscale/swscale_template.c Sun Sep 07 21:06:21 2008 +0000 @@ -119,19 +119,19 @@ "1: \n\t"\ "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\ "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ - "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ + "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\ "movq %%mm0, %%mm3 \n\t"\ "punpcklwd %%mm1, %%mm0 \n\t"\ "punpckhwd %%mm1, %%mm3 \n\t"\ - "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ + "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ "pmaddwd %%mm1, %%mm0 \n\t"\ "pmaddwd %%mm1, %%mm3 \n\t"\ "paddd %%mm0, %%mm4 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\ "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\ - "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ - "add $16, %%"REG_d" \n\t"\ + "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ + "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\ "movq %%mm2, %%mm0 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\ @@ -271,19 +271,19 @@ "2: \n\t"\ "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ - "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ + "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ "movq %%mm0, %%mm3 \n\t"\ "punpcklwd %%mm1, %%mm0 \n\t"\ "punpckhwd %%mm1, %%mm3 \n\t"\ - "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ + "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\ "pmaddwd %%mm1, %%mm0 \n\t"\ "pmaddwd %%mm1, %%mm3 \n\t"\ "paddd %%mm0, %%mm4 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\ "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ - "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ - "add $16, %%"REG_d" \n\t"\ + "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ + "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\ "movq %%mm2, %%mm0 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\ @@ -315,19 +315,19 @@ "2: \n\t"\ "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ - "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ + "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ "movq %%mm0, %%mm3 \n\t"\ "punpcklwd %%mm4, %%mm0 \n\t"\ "punpckhwd %%mm4, %%mm3 \n\t"\ - "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ + "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ "pmaddwd %%mm4, %%mm0 \n\t"\ "pmaddwd %%mm4, %%mm3 \n\t"\ "paddd %%mm0, %%mm1 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\ "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ - "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ - "add $16, %%"REG_d" \n\t"\ + "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ + "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\ "movq %%mm2, %%mm0 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\ @@ -3180,18 +3180,19 @@ #ifdef HAVE_MMX int i; if (flags & SWS_ACCURATE_RND){ + int s= APCK_SIZE / 8; for (i=0; i<vLumFilterSize; i+=2){ - lumMmxFilter[2*i+0]= (int32_t)lumSrcPtr[i ]; - lumMmxFilter[2*i+1]= (int32_t)lumSrcPtr[i+(vLumFilterSize>1)]; - lumMmxFilter[2*i+2]= - lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ] + *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ]; + *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)]; + lumMmxFilter[s*i+APCK_COEF/4 ]= + lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); } for (i=0; i<vChrFilterSize; i+=2){ - chrMmxFilter[2*i+0]= (int32_t)chrSrcPtr[i ]; - chrMmxFilter[2*i+1]= (int32_t)chrSrcPtr[i+(vChrFilterSize>1)]; - chrMmxFilter[2*i+2]= - chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ] + *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ]; + *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)]; + chrMmxFilter[s*i+APCK_COEF/4 ]= + chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); } }else{