comparison postproc/swscale.c @ 2800:7847d6b7ad3d

.balign or weĦ­ll align by 64kb on some architectures
author michael
date Sat, 10 Nov 2001 20:39:23 +0000
parents 0d7fd1655a89
children e71ae0213431
comparison
equal deleted inserted replaced
2799:0d7fd1655a89 2800:7847d6b7ad3d
141 "punpcklwd %%mm6, %%mm6 \n\t"\ 141 "punpcklwd %%mm6, %%mm6 \n\t"\
142 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ 142 "movd %7, %%mm5 \n\t" /*uvalpha1*/\
143 "punpcklwd %%mm5, %%mm5 \n\t"\ 143 "punpcklwd %%mm5, %%mm5 \n\t"\
144 "punpcklwd %%mm5, %%mm5 \n\t"\ 144 "punpcklwd %%mm5, %%mm5 \n\t"\
145 "xorl %%eax, %%eax \n\t"\ 145 "xorl %%eax, %%eax \n\t"\
146 ".align 16 \n\t"\ 146 ".balign 16 \n\t"\
147 "1: \n\t"\ 147 "1: \n\t"\
148 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ 148 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\
149 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ 149 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\
150 "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\ 150 "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\
151 "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\ 151 "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\
195 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ 195 "movd %7, %%mm5 \n\t" /*uvalpha1*/\
196 "punpcklwd %%mm5, %%mm5 \n\t"\ 196 "punpcklwd %%mm5, %%mm5 \n\t"\
197 "punpcklwd %%mm5, %%mm5 \n\t"\ 197 "punpcklwd %%mm5, %%mm5 \n\t"\
198 "movq %%mm5, asm_uvalpha1 \n\t"\ 198 "movq %%mm5, asm_uvalpha1 \n\t"\
199 "xorl %%eax, %%eax \n\t"\ 199 "xorl %%eax, %%eax \n\t"\
200 ".align 16 \n\t"\ 200 ".balign 16 \n\t"\
201 "1: \n\t"\ 201 "1: \n\t"\
202 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ 202 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\
203 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ 203 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\
204 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ 204 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
205 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ 205 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
260 "packuswb %%mm3, %%mm4 \n\t"\ 260 "packuswb %%mm3, %%mm4 \n\t"\
261 "pxor %%mm7, %%mm7 \n\t" 261 "pxor %%mm7, %%mm7 \n\t"
262 262
263 #define YSCALEYUV2RGB1 \ 263 #define YSCALEYUV2RGB1 \
264 "xorl %%eax, %%eax \n\t"\ 264 "xorl %%eax, %%eax \n\t"\
265 ".align 16 \n\t"\ 265 ".balign 16 \n\t"\
266 "1: \n\t"\ 266 "1: \n\t"\
267 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ 267 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\
268 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ 268 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
269 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ 269 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
270 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ 270 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
309 "pxor %%mm7, %%mm7 \n\t" 309 "pxor %%mm7, %%mm7 \n\t"
310 310
311 // do vertical chrominance interpolation 311 // do vertical chrominance interpolation
312 #define YSCALEYUV2RGB1b \ 312 #define YSCALEYUV2RGB1b \
313 "xorl %%eax, %%eax \n\t"\ 313 "xorl %%eax, %%eax \n\t"\
314 ".align 16 \n\t"\ 314 ".balign 16 \n\t"\
315 "1: \n\t"\ 315 "1: \n\t"\
316 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ 316 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\
317 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ 317 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\
318 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ 318 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
319 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ 319 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1308 //NO MMX just normal asm ... 1308 //NO MMX just normal asm ...
1309 asm volatile( 1309 asm volatile(
1310 "xorl %%eax, %%eax \n\t" // i 1310 "xorl %%eax, %%eax \n\t" // i
1311 "xorl %%ebx, %%ebx \n\t" // xx 1311 "xorl %%ebx, %%ebx \n\t" // xx
1312 "xorl %%ecx, %%ecx \n\t" // 2*xalpha 1312 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
1313 ".align 16 \n\t" 1313 ".balign 16 \n\t"
1314 "1: \n\t" 1314 "1: \n\t"
1315 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] 1315 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx]
1316 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] 1316 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1]
1317 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] 1317 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
1318 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha 1318 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
1440 #endif 1440 #endif
1441 asm volatile( 1441 asm volatile(
1442 "xorl %%eax, %%eax \n\t" // i 1442 "xorl %%eax, %%eax \n\t" // i
1443 "xorl %%ebx, %%ebx \n\t" // xx 1443 "xorl %%ebx, %%ebx \n\t" // xx
1444 "xorl %%ecx, %%ecx \n\t" // 2*xalpha 1444 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
1445 ".align 16 \n\t" 1445 ".balign 16 \n\t"
1446 "1: \n\t" 1446 "1: \n\t"
1447 "movl %0, %%esi \n\t" 1447 "movl %0, %%esi \n\t"
1448 "movzbl (%%esi, %%ebx), %%edi \n\t" //src[xx] 1448 "movzbl (%%esi, %%ebx), %%edi \n\t" //src[xx]
1449 "movzbl 1(%%esi, %%ebx), %%esi \n\t" //src[xx+1] 1449 "movzbl 1(%%esi, %%ebx), %%esi \n\t" //src[xx+1]
1450 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] 1450 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]