Mercurial > mplayer.hg
comparison libswscale/swscale_template.c @ 27513:2550d0c5bcb6
Fix accurate rounding mode on x86_64.
Fixes issue222.
author | michael |
---|---|
date | Sun, 07 Sep 2008 21:06:21 +0000 |
parents | c1019e4096ca |
children | 9355343a7721 |
comparison
equal
deleted
inserted
replaced
27512:c1019e4096ca | 27513:2550d0c5bcb6 |
---|---|
117 "mov (%%"REG_d"), %%"REG_S" \n\t"\ | 117 "mov (%%"REG_d"), %%"REG_S" \n\t"\ |
118 ASMALIGN(4) \ | 118 ASMALIGN(4) \ |
119 "1: \n\t"\ | 119 "1: \n\t"\ |
120 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\ | 120 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\ |
121 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ | 121 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ |
122 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ | 122 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ |
123 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\ | 123 "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\ |
124 "movq %%mm0, %%mm3 \n\t"\ | 124 "movq %%mm0, %%mm3 \n\t"\ |
125 "punpcklwd %%mm1, %%mm0 \n\t"\ | 125 "punpcklwd %%mm1, %%mm0 \n\t"\ |
126 "punpckhwd %%mm1, %%mm3 \n\t"\ | 126 "punpckhwd %%mm1, %%mm3 \n\t"\ |
127 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ | 127 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ |
128 "pmaddwd %%mm1, %%mm0 \n\t"\ | 128 "pmaddwd %%mm1, %%mm0 \n\t"\ |
129 "pmaddwd %%mm1, %%mm3 \n\t"\ | 129 "pmaddwd %%mm1, %%mm3 \n\t"\ |
130 "paddd %%mm0, %%mm4 \n\t"\ | 130 "paddd %%mm0, %%mm4 \n\t"\ |
131 "paddd %%mm3, %%mm5 \n\t"\ | 131 "paddd %%mm3, %%mm5 \n\t"\ |
132 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\ | 132 "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\ |
133 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ | 133 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ |
134 "add $16, %%"REG_d" \n\t"\ | 134 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ |
135 "test %%"REG_S", %%"REG_S" \n\t"\ | 135 "test %%"REG_S", %%"REG_S" \n\t"\ |
136 "movq %%mm2, %%mm0 \n\t"\ | 136 "movq %%mm2, %%mm0 \n\t"\ |
137 "punpcklwd %%mm3, %%mm2 \n\t"\ | 137 "punpcklwd %%mm3, %%mm2 \n\t"\ |
138 "punpckhwd %%mm3, %%mm0 \n\t"\ | 138 "punpckhwd %%mm3, %%mm0 \n\t"\ |
139 "pmaddwd %%mm1, %%mm2 \n\t"\ | 139 "pmaddwd %%mm1, %%mm2 \n\t"\ |
269 "pxor %%mm7, %%mm7 \n\t"\ | 269 "pxor %%mm7, %%mm7 \n\t"\ |
270 ASMALIGN(4)\ | 270 ASMALIGN(4)\ |
271 "2: \n\t"\ | 271 "2: \n\t"\ |
272 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ | 272 "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ |
273 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ | 273 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ |
274 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ | 274 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ |
275 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ | 275 "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ |
276 "movq %%mm0, %%mm3 \n\t"\ | 276 "movq %%mm0, %%mm3 \n\t"\ |
277 "punpcklwd %%mm1, %%mm0 \n\t"\ | 277 "punpcklwd %%mm1, %%mm0 \n\t"\ |
278 "punpckhwd %%mm1, %%mm3 \n\t"\ | 278 "punpckhwd %%mm1, %%mm3 \n\t"\ |
279 "movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ | 279 "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\ |
280 "pmaddwd %%mm1, %%mm0 \n\t"\ | 280 "pmaddwd %%mm1, %%mm0 \n\t"\ |
281 "pmaddwd %%mm1, %%mm3 \n\t"\ | 281 "pmaddwd %%mm1, %%mm3 \n\t"\ |
282 "paddd %%mm0, %%mm4 \n\t"\ | 282 "paddd %%mm0, %%mm4 \n\t"\ |
283 "paddd %%mm3, %%mm5 \n\t"\ | 283 "paddd %%mm3, %%mm5 \n\t"\ |
284 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ | 284 "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ |
285 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ | 285 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ |
286 "add $16, %%"REG_d" \n\t"\ | 286 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ |
287 "test %%"REG_S", %%"REG_S" \n\t"\ | 287 "test %%"REG_S", %%"REG_S" \n\t"\ |
288 "movq %%mm2, %%mm0 \n\t"\ | 288 "movq %%mm2, %%mm0 \n\t"\ |
289 "punpcklwd %%mm3, %%mm2 \n\t"\ | 289 "punpcklwd %%mm3, %%mm2 \n\t"\ |
290 "punpckhwd %%mm3, %%mm0 \n\t"\ | 290 "punpckhwd %%mm3, %%mm0 \n\t"\ |
291 "pmaddwd %%mm1, %%mm2 \n\t"\ | 291 "pmaddwd %%mm1, %%mm2 \n\t"\ |
313 "pxor %%mm6, %%mm6 \n\t"\ | 313 "pxor %%mm6, %%mm6 \n\t"\ |
314 ASMALIGN(4)\ | 314 ASMALIGN(4)\ |
315 "2: \n\t"\ | 315 "2: \n\t"\ |
316 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ | 316 "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ |
317 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ | 317 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ |
318 "mov 4(%%"REG_d"), %%"REG_S" \n\t"\ | 318 "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ |
319 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ | 319 "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ |
320 "movq %%mm0, %%mm3 \n\t"\ | 320 "movq %%mm0, %%mm3 \n\t"\ |
321 "punpcklwd %%mm4, %%mm0 \n\t"\ | 321 "punpcklwd %%mm4, %%mm0 \n\t"\ |
322 "punpckhwd %%mm4, %%mm3 \n\t"\ | 322 "punpckhwd %%mm4, %%mm3 \n\t"\ |
323 "movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ | 323 "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ |
324 "pmaddwd %%mm4, %%mm0 \n\t"\ | 324 "pmaddwd %%mm4, %%mm0 \n\t"\ |
325 "pmaddwd %%mm4, %%mm3 \n\t"\ | 325 "pmaddwd %%mm4, %%mm3 \n\t"\ |
326 "paddd %%mm0, %%mm1 \n\t"\ | 326 "paddd %%mm0, %%mm1 \n\t"\ |
327 "paddd %%mm3, %%mm5 \n\t"\ | 327 "paddd %%mm3, %%mm5 \n\t"\ |
328 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ | 328 "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ |
329 "mov 16(%%"REG_d"), %%"REG_S" \n\t"\ | 329 "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ |
330 "add $16, %%"REG_d" \n\t"\ | 330 "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ |
331 "test %%"REG_S", %%"REG_S" \n\t"\ | 331 "test %%"REG_S", %%"REG_S" \n\t"\ |
332 "movq %%mm2, %%mm0 \n\t"\ | 332 "movq %%mm2, %%mm0 \n\t"\ |
333 "punpcklwd %%mm3, %%mm2 \n\t"\ | 333 "punpcklwd %%mm3, %%mm2 \n\t"\ |
334 "punpckhwd %%mm3, %%mm0 \n\t"\ | 334 "punpckhwd %%mm3, %%mm0 \n\t"\ |
335 "pmaddwd %%mm4, %%mm2 \n\t"\ | 335 "pmaddwd %%mm4, %%mm2 \n\t"\ |
3178 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | 3178 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
3179 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | 3179 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; |
3180 #ifdef HAVE_MMX | 3180 #ifdef HAVE_MMX |
3181 int i; | 3181 int i; |
3182 if (flags & SWS_ACCURATE_RND){ | 3182 if (flags & SWS_ACCURATE_RND){ |
3183 int s= APCK_SIZE / 8; | |
3183 for (i=0; i<vLumFilterSize; i+=2){ | 3184 for (i=0; i<vLumFilterSize; i+=2){ |
3184 lumMmxFilter[2*i+0]= (int32_t)lumSrcPtr[i ]; | 3185 *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ]; |
3185 lumMmxFilter[2*i+1]= (int32_t)lumSrcPtr[i+(vLumFilterSize>1)]; | 3186 *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)]; |
3186 lumMmxFilter[2*i+2]= | 3187 lumMmxFilter[s*i+APCK_COEF/4 ]= |
3187 lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ] | 3188 lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] |
3188 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); | 3189 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); |
3189 } | 3190 } |
3190 for (i=0; i<vChrFilterSize; i+=2){ | 3191 for (i=0; i<vChrFilterSize; i+=2){ |
3191 chrMmxFilter[2*i+0]= (int32_t)chrSrcPtr[i ]; | 3192 *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ]; |
3192 chrMmxFilter[2*i+1]= (int32_t)chrSrcPtr[i+(vChrFilterSize>1)]; | 3193 *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)]; |
3193 chrMmxFilter[2*i+2]= | 3194 chrMmxFilter[s*i+APCK_COEF/4 ]= |
3194 chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ] | 3195 chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] |
3195 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); | 3196 + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); |
3196 } | 3197 } |
3197 }else{ | 3198 }else{ |
3198 for (i=0; i<vLumFilterSize; i++) | 3199 for (i=0; i<vLumFilterSize; i++) |
3199 { | 3200 { |