Mercurial > mplayer.hg
comparison postproc/swscale_template.c @ 11122:c552fe6acbaa
rounding fixes
author | michael |
---|---|
date | Wed, 15 Oct 2003 11:21:54 +0000 |
parents | 6e35326c742f |
children | 21e5cb258a95 |
comparison
equal
deleted
inserted
replaced
11121:554b829860c3 | 11122:c552fe6acbaa |
---|---|
59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" | 59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" |
60 #endif | 60 #endif |
61 | 61 |
62 #define YSCALEYUV2YV12X(x, offset) \ | 62 #define YSCALEYUV2YV12X(x, offset) \ |
63 "xorl %%eax, %%eax \n\t"\ | 63 "xorl %%eax, %%eax \n\t"\ |
64 "pxor %%mm3, %%mm3 \n\t"\ | 64 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ |
65 "pxor %%mm4, %%mm4 \n\t"\ | 65 "movq %%mm3, %%mm4 \n\t"\ |
66 "leal " offset "(%0), %%edx \n\t"\ | 66 "leal " offset "(%0), %%edx \n\t"\ |
67 "movl (%%edx), %%esi \n\t"\ | 67 "movl (%%edx), %%esi \n\t"\ |
68 ".balign 16 \n\t" /* FIXME Unroll? */\ | 68 ".balign 16 \n\t" /* FIXME Unroll? */\ |
69 "1: \n\t"\ | 69 "1: \n\t"\ |
70 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ | 70 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
82 "psraw $3, %%mm4 \n\t"\ | 82 "psraw $3, %%mm4 \n\t"\ |
83 "packuswb %%mm4, %%mm3 \n\t"\ | 83 "packuswb %%mm4, %%mm3 \n\t"\ |
84 MOVNTQ(%%mm3, (%1, %%eax))\ | 84 MOVNTQ(%%mm3, (%1, %%eax))\ |
85 "addl $8, %%eax \n\t"\ | 85 "addl $8, %%eax \n\t"\ |
86 "cmpl %2, %%eax \n\t"\ | 86 "cmpl %2, %%eax \n\t"\ |
87 "pxor %%mm3, %%mm3 \n\t"\ | 87 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ |
88 "pxor %%mm4, %%mm4 \n\t"\ | 88 "movq %%mm3, %%mm4 \n\t"\ |
89 "leal " offset "(%0), %%edx \n\t"\ | 89 "leal " offset "(%0), %%edx \n\t"\ |
90 "movl (%%edx), %%esi \n\t"\ | 90 "movl (%%edx), %%esi \n\t"\ |
91 "jb 1b \n\t" | 91 "jb 1b \n\t" |
92 | 92 |
93 #define YSCALEYUV2YV121 \ | 93 #define YSCALEYUV2YV121 \ |
115 ".balign 16 \n\t"\ | 115 ".balign 16 \n\t"\ |
116 "nop \n\t"\ | 116 "nop \n\t"\ |
117 "1: \n\t"\ | 117 "1: \n\t"\ |
118 "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ | 118 "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ |
119 "movl (%%edx), %%esi \n\t"\ | 119 "movl (%%edx), %%esi \n\t"\ |
120 "pxor %%mm3, %%mm3 \n\t"\ | 120 "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\ |
121 "pxor %%mm4, %%mm4 \n\t"\ | 121 "movq %%mm3, %%mm4 \n\t"\ |
122 ".balign 16 \n\t"\ | 122 ".balign 16 \n\t"\ |
123 "2: \n\t"\ | 123 "2: \n\t"\ |
124 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ | 124 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
125 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ | 125 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ |
126 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ | 126 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ |
133 "testl %%esi, %%esi \n\t"\ | 133 "testl %%esi, %%esi \n\t"\ |
134 " jnz 2b \n\t"\ | 134 " jnz 2b \n\t"\ |
135 \ | 135 \ |
136 "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ | 136 "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ |
137 "movl (%%edx), %%esi \n\t"\ | 137 "movl (%%edx), %%esi \n\t"\ |
138 "pxor %%mm1, %%mm1 \n\t"\ | 138 "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\ |
139 "pxor %%mm7, %%mm7 \n\t"\ | 139 "movq %%mm1, %%mm7 \n\t"\ |
140 ".balign 16 \n\t"\ | 140 ".balign 16 \n\t"\ |
141 "2: \n\t"\ | 141 "2: \n\t"\ |
142 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ | 142 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
143 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ | 143 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ |
144 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ | 144 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ |
2609 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input | 2609 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input |
2610 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input | 2610 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input |
2611 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input | 2611 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input |
2612 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input | 2612 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input |
2613 | 2613 |
2614 //printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n", | |
2615 // dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample); | |
2614 //handle holes (FAST_BILINEAR & weird filters) | 2616 //handle holes (FAST_BILINEAR & weird filters) |
2615 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; | 2617 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; |
2616 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; | 2618 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; |
2617 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize); | 2619 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize); |
2618 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1) | 2620 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1) |