comparison libswscale/swscale_template.c @ 27513:2550d0c5bcb6

Fix accurate rounding mode on x86_64. Fixes issue222.
author michael
date Sun, 07 Sep 2008 21:06:21 +0000
parents c1019e4096ca
children 9355343a7721
comparison
equal deleted inserted replaced
27512:c1019e4096ca 27513:2550d0c5bcb6
117 "mov (%%"REG_d"), %%"REG_S" \n\t"\ 117 "mov (%%"REG_d"), %%"REG_S" \n\t"\
118 ASMALIGN(4) \ 118 ASMALIGN(4) \
119 "1: \n\t"\ 119 "1: \n\t"\
120 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\ 120 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\
121 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ 121 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
122 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ 122 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
123 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\ 123 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\
124 "movq %%mm0, %%mm3 \n\t"\ 124 "movq %%mm0, %%mm3 \n\t"\
125 "punpcklwd %%mm1, %%mm0 \n\t"\ 125 "punpcklwd %%mm1, %%mm0 \n\t"\
126 "punpckhwd %%mm1, %%mm3 \n\t"\ 126 "punpckhwd %%mm1, %%mm3 \n\t"\
127 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ 127 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
128 "pmaddwd %%mm1, %%mm0 \n\t"\ 128 "pmaddwd %%mm1, %%mm0 \n\t"\
129 "pmaddwd %%mm1, %%mm3 \n\t"\ 129 "pmaddwd %%mm1, %%mm3 \n\t"\
130 "paddd %%mm0, %%mm4 \n\t"\ 130 "paddd %%mm0, %%mm4 \n\t"\
131 "paddd %%mm3, %%mm5 \n\t"\ 131 "paddd %%mm3, %%mm5 \n\t"\
132 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\ 132 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\
133 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ 133 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
134 "add $16, %%"REG_d" \n\t"\ 134 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
135 "test %%"REG_S", %%"REG_S" \n\t"\ 135 "test %%"REG_S", %%"REG_S" \n\t"\
136 "movq %%mm2, %%mm0 \n\t"\ 136 "movq %%mm2, %%mm0 \n\t"\
137 "punpcklwd %%mm3, %%mm2 \n\t"\ 137 "punpcklwd %%mm3, %%mm2 \n\t"\
138 "punpckhwd %%mm3, %%mm0 \n\t"\ 138 "punpckhwd %%mm3, %%mm0 \n\t"\
139 "pmaddwd %%mm1, %%mm2 \n\t"\ 139 "pmaddwd %%mm1, %%mm2 \n\t"\
269 "pxor %%mm7, %%mm7 \n\t"\ 269 "pxor %%mm7, %%mm7 \n\t"\
270 ASMALIGN(4)\ 270 ASMALIGN(4)\
271 "2: \n\t"\ 271 "2: \n\t"\
272 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ 272 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
273 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ 273 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
274 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ 274 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
275 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ 275 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
276 "movq %%mm0, %%mm3 \n\t"\ 276 "movq %%mm0, %%mm3 \n\t"\
277 "punpcklwd %%mm1, %%mm0 \n\t"\ 277 "punpcklwd %%mm1, %%mm0 \n\t"\
278 "punpckhwd %%mm1, %%mm3 \n\t"\ 278 "punpckhwd %%mm1, %%mm3 \n\t"\
279 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ 279 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\
280 "pmaddwd %%mm1, %%mm0 \n\t"\ 280 "pmaddwd %%mm1, %%mm0 \n\t"\
281 "pmaddwd %%mm1, %%mm3 \n\t"\ 281 "pmaddwd %%mm1, %%mm3 \n\t"\
282 "paddd %%mm0, %%mm4 \n\t"\ 282 "paddd %%mm0, %%mm4 \n\t"\
283 "paddd %%mm3, %%mm5 \n\t"\ 283 "paddd %%mm3, %%mm5 \n\t"\
284 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ 284 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
285 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ 285 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
286 "add $16, %%"REG_d" \n\t"\ 286 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
287 "test %%"REG_S", %%"REG_S" \n\t"\ 287 "test %%"REG_S", %%"REG_S" \n\t"\
288 "movq %%mm2, %%mm0 \n\t"\ 288 "movq %%mm2, %%mm0 \n\t"\
289 "punpcklwd %%mm3, %%mm2 \n\t"\ 289 "punpcklwd %%mm3, %%mm2 \n\t"\
290 "punpckhwd %%mm3, %%mm0 \n\t"\ 290 "punpckhwd %%mm3, %%mm0 \n\t"\
291 "pmaddwd %%mm1, %%mm2 \n\t"\ 291 "pmaddwd %%mm1, %%mm2 \n\t"\
313 "pxor %%mm6, %%mm6 \n\t"\ 313 "pxor %%mm6, %%mm6 \n\t"\
314 ASMALIGN(4)\ 314 ASMALIGN(4)\
315 "2: \n\t"\ 315 "2: \n\t"\
316 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ 316 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
317 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ 317 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
318 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ 318 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
319 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ 319 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
320 "movq %%mm0, %%mm3 \n\t"\ 320 "movq %%mm0, %%mm3 \n\t"\
321 "punpcklwd %%mm4, %%mm0 \n\t"\ 321 "punpcklwd %%mm4, %%mm0 \n\t"\
322 "punpckhwd %%mm4, %%mm3 \n\t"\ 322 "punpckhwd %%mm4, %%mm3 \n\t"\
323 "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ 323 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
324 "pmaddwd %%mm4, %%mm0 \n\t"\ 324 "pmaddwd %%mm4, %%mm0 \n\t"\
325 "pmaddwd %%mm4, %%mm3 \n\t"\ 325 "pmaddwd %%mm4, %%mm3 \n\t"\
326 "paddd %%mm0, %%mm1 \n\t"\ 326 "paddd %%mm0, %%mm1 \n\t"\
327 "paddd %%mm3, %%mm5 \n\t"\ 327 "paddd %%mm3, %%mm5 \n\t"\
328 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ 328 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
329 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ 329 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
330 "add $16, %%"REG_d" \n\t"\ 330 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
331 "test %%"REG_S", %%"REG_S" \n\t"\ 331 "test %%"REG_S", %%"REG_S" \n\t"\
332 "movq %%mm2, %%mm0 \n\t"\ 332 "movq %%mm2, %%mm0 \n\t"\
333 "punpcklwd %%mm3, %%mm2 \n\t"\ 333 "punpcklwd %%mm3, %%mm2 \n\t"\
334 "punpckhwd %%mm3, %%mm0 \n\t"\ 334 "punpckhwd %%mm3, %%mm0 \n\t"\
335 "pmaddwd %%mm4, %%mm2 \n\t"\ 335 "pmaddwd %%mm4, %%mm2 \n\t"\
3178 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; 3178 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
3179 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; 3179 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
3180 #ifdef HAVE_MMX 3180 #ifdef HAVE_MMX
3181 int i; 3181 int i;
3182 if (flags & SWS_ACCURATE_RND){ 3182 if (flags & SWS_ACCURATE_RND){
3183 int s= APCK_SIZE / 8;
3183 for (i=0; i<vLumFilterSize; i+=2){ 3184 for (i=0; i<vLumFilterSize; i+=2){
3184 lumMmxFilter[2*i+0]= (int32_t)lumSrcPtr[i ]; 3185 *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
3185 lumMmxFilter[2*i+1]= (int32_t)lumSrcPtr[i+(vLumFilterSize>1)]; 3186 *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
3186 lumMmxFilter[2*i+2]= 3187 lumMmxFilter[s*i+APCK_COEF/4 ]=
3187 lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ] 3188 lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
3188 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); 3189 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
3189 } 3190 }
3190 for (i=0; i<vChrFilterSize; i+=2){ 3191 for (i=0; i<vChrFilterSize; i+=2){
3191 chrMmxFilter[2*i+0]= (int32_t)chrSrcPtr[i ]; 3192 *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
3192 chrMmxFilter[2*i+1]= (int32_t)chrSrcPtr[i+(vChrFilterSize>1)]; 3193 *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
3193 chrMmxFilter[2*i+2]= 3194 chrMmxFilter[s*i+APCK_COEF/4 ]=
3194 chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ] 3195 chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
3195 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); 3196 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
3196 } 3197 }
3197 }else{ 3198 }else{
3198 for (i=0; i<vLumFilterSize; i++) 3199 for (i=0; i<vLumFilterSize; i++)
3199 { 3200 {