comparison postproc/swscale_template.c @ 11122:c552fe6acbaa

rounding fixes
author michael
date Wed, 15 Oct 2003 11:21:54 +0000
parents 6e35326c742f
children 21e5cb258a95
comparison
equal deleted inserted replaced
11121:554b829860c3 11122:c552fe6acbaa
59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" 59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
60 #endif 60 #endif
61 61
62 #define YSCALEYUV2YV12X(x, offset) \ 62 #define YSCALEYUV2YV12X(x, offset) \
63 "xorl %%eax, %%eax \n\t"\ 63 "xorl %%eax, %%eax \n\t"\
64 "pxor %%mm3, %%mm3 \n\t"\ 64 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
65 "pxor %%mm4, %%mm4 \n\t"\ 65 "movq %%mm3, %%mm4 \n\t"\
66 "leal " offset "(%0), %%edx \n\t"\ 66 "leal " offset "(%0), %%edx \n\t"\
67 "movl (%%edx), %%esi \n\t"\ 67 "movl (%%edx), %%esi \n\t"\
68 ".balign 16 \n\t" /* FIXME Unroll? */\ 68 ".balign 16 \n\t" /* FIXME Unroll? */\
69 "1: \n\t"\ 69 "1: \n\t"\
70 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ 70 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
82 "psraw $3, %%mm4 \n\t"\ 82 "psraw $3, %%mm4 \n\t"\
83 "packuswb %%mm4, %%mm3 \n\t"\ 83 "packuswb %%mm4, %%mm3 \n\t"\
84 MOVNTQ(%%mm3, (%1, %%eax))\ 84 MOVNTQ(%%mm3, (%1, %%eax))\
85 "addl $8, %%eax \n\t"\ 85 "addl $8, %%eax \n\t"\
86 "cmpl %2, %%eax \n\t"\ 86 "cmpl %2, %%eax \n\t"\
87 "pxor %%mm3, %%mm3 \n\t"\ 87 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
88 "pxor %%mm4, %%mm4 \n\t"\ 88 "movq %%mm3, %%mm4 \n\t"\
89 "leal " offset "(%0), %%edx \n\t"\ 89 "leal " offset "(%0), %%edx \n\t"\
90 "movl (%%edx), %%esi \n\t"\ 90 "movl (%%edx), %%esi \n\t"\
91 "jb 1b \n\t" 91 "jb 1b \n\t"
92 92
93 #define YSCALEYUV2YV121 \ 93 #define YSCALEYUV2YV121 \
115 ".balign 16 \n\t"\ 115 ".balign 16 \n\t"\
116 "nop \n\t"\ 116 "nop \n\t"\
117 "1: \n\t"\ 117 "1: \n\t"\
118 "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ 118 "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
119 "movl (%%edx), %%esi \n\t"\ 119 "movl (%%edx), %%esi \n\t"\
120 "pxor %%mm3, %%mm3 \n\t"\ 120 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
121 "pxor %%mm4, %%mm4 \n\t"\ 121 "movq %%mm3, %%mm4 \n\t"\
122 ".balign 16 \n\t"\ 122 ".balign 16 \n\t"\
123 "2: \n\t"\ 123 "2: \n\t"\
124 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ 124 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
125 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ 125 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\
126 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ 126 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\
133 "testl %%esi, %%esi \n\t"\ 133 "testl %%esi, %%esi \n\t"\
134 " jnz 2b \n\t"\ 134 " jnz 2b \n\t"\
135 \ 135 \
136 "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ 136 "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
137 "movl (%%edx), %%esi \n\t"\ 137 "movl (%%edx), %%esi \n\t"\
138 "pxor %%mm1, %%mm1 \n\t"\ 138 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
139 "pxor %%mm7, %%mm7 \n\t"\ 139 "movq %%mm1, %%mm7 \n\t"\
140 ".balign 16 \n\t"\ 140 ".balign 16 \n\t"\
141 "2: \n\t"\ 141 "2: \n\t"\
142 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ 142 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
143 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ 143 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\
144 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ 144 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\
2609 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input 2609 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2610 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input 2610 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2611 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input 2611 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
2612 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input 2612 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
2613 2613
2614 //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
2615 // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
2614 //handle holes (FAST_BILINEAR & weird filters) 2616 //handle holes (FAST_BILINEAR & weird filters)
2615 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; 2617 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2616 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; 2618 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2617 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize); 2619 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
2618 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1) 2620 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1)