Mercurial > mplayer.hg
changeset 11122:c552fe6acbaa
rounding fixes
author | michael |
---|---|
date | Wed, 15 Oct 2003 11:21:54 +0000 |
parents | 554b829860c3 |
children | b41af00e8c17 |
files | postproc/swscale.c postproc/swscale_internal.h postproc/swscale_template.c |
diffstat | 3 files changed, 27 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/postproc/swscale.c Wed Oct 15 00:08:55 2003 +0000 +++ b/postproc/swscale.c Wed Oct 15 11:21:54 2003 +0000 @@ -222,7 +222,7 @@ int i; for(i=0; i<dstW; i++) { - int val=0; + int val=1<<18; int j; for(j=0; j<lumFilterSize; j++) val += lumSrc[j][i] * lumFilter[j]; @@ -233,8 +233,8 @@ if(uDest != NULL) for(i=0; i<chrDstW; i++) { - int u=0; - int v=0; + int u=1<<18; + int v=1<<18; int j; for(j=0; j<chrFilterSize; j++) { @@ -251,10 +251,10 @@ #define YSCALE_YUV_2_PACKEDX_C(type) \ for(i=0; i<(dstW>>1); i++){\ int j;\ - int Y1=0;\ - int Y2=0;\ - int U=0;\ - int V=0;\ + int Y1=1<<18;\ + int Y2=1<<18;\ + int U=1<<18;\ + int V=1<<18;\ type *r, *b, *g;\ const int i2= 2*i;\ \ @@ -621,8 +621,8 @@ int acc=0; for(i=0; i<dstW-1; i+=2){ int j; - int Y1=0; - int Y2=0; + int Y1=1<<18; + int Y2=1<<18; for(j=0; j<lumFilterSize; j++) { @@ -1093,7 +1093,7 @@ scale/= sum; for(j=0; j<*outFilterSize; j++) { - (*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale); + (*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale + 0.5); } } @@ -1772,6 +1772,7 @@ c->srcFormat= srcFormat; c->origDstFormat= origDstFormat; c->origSrcFormat= origSrcFormat; + c->vRounder= 4* 0x0001000100010001ULL; usesFilter=0; if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1;
--- a/postproc/swscale_internal.h Wed Oct 15 00:08:55 2003 +0000 +++ b/postproc/swscale_internal.h Wed Oct 15 11:21:54 2003 +0000 @@ -109,10 +109,11 @@ #define Y_OFFSET "8*8" #define U_OFFSET "9*8" #define V_OFFSET "10*8" -#define LUM_MMX_FILTER_OFFSET "11*8" -#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256" -#define DSTW_OFFSET "11*8+4*4*256*2" -#define ESP_OFFSET "11*8+4*4*256*2+4" +#define VROUNDER_OFFSET "11*8" +#define LUM_MMX_FILTER_OFFSET "12*8" +#define CHR_MMX_FILTER_OFFSET "12*8+4*4*256" +#define DSTW_OFFSET "12*8+4*4*256*2" +#define ESP_OFFSET "12*8+4*4*256*2+4" uint64_t redDither __attribute__((aligned(8))); uint64_t greenDither __attribute__((aligned(8))); @@ -126,6 +127,7 @@ uint64_t yOffset __attribute__((aligned(8))); uint64_t uOffset __attribute__((aligned(8))); uint64_t vOffset __attribute__((aligned(8))); + uint64_t vRounder __attribute__((aligned(8))); int32_t lumMmxFilter[4*MAX_FILTER_SIZE]; int32_t chrMmxFilter[4*MAX_FILTER_SIZE]; int dstW;
--- a/postproc/swscale_template.c Wed Oct 15 00:08:55 2003 +0000 +++ b/postproc/swscale_template.c Wed Oct 15 11:21:54 2003 +0000 @@ -61,8 +61,8 @@ #define YSCALEYUV2YV12X(x, offset) \ "xorl %%eax, %%eax \n\t"\ - "pxor %%mm3, %%mm3 \n\t"\ - "pxor %%mm4, %%mm4 \n\t"\ + "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ + "movq %%mm3, %%mm4 \n\t"\ "leal " offset "(%0), %%edx \n\t"\ "movl (%%edx), %%esi \n\t"\ ".balign 16 \n\t" /* FIXME Unroll? */\ @@ -84,8 +84,8 @@ MOVNTQ(%%mm3, (%1, %%eax))\ "addl $8, %%eax \n\t"\ "cmpl %2, %%eax \n\t"\ - "pxor %%mm3, %%mm3 \n\t"\ - "pxor %%mm4, %%mm4 \n\t"\ + "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ + "movq %%mm3, %%mm4 \n\t"\ "leal " offset "(%0), %%edx \n\t"\ "movl (%%edx), %%esi \n\t"\ "jb 1b \n\t" @@ -117,8 +117,8 @@ "1: \n\t"\ "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ "movl (%%edx), %%esi \n\t"\ - "pxor %%mm3, %%mm3 \n\t"\ - "pxor %%mm4, %%mm4 \n\t"\ + "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ + "movq %%mm3, %%mm4 \n\t"\ ".balign 16 \n\t"\ "2: \n\t"\ "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ @@ -135,8 +135,8 @@ \ "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ "movl (%%edx), %%esi \n\t"\ - "pxor %%mm1, %%mm1 \n\t"\ - "pxor %%mm7, %%mm7 \n\t"\ + "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\ + "movq %%mm1, %%mm7 \n\t"\ ".balign 16 \n\t"\ "2: \n\t"\ "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ @@ -2611,6 +2611,8 @@ const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input +//printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n", +// dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample); //handle holes (FAST_BILINEAR & weird filters) if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;