Mercurial > mplayer.hg
comparison postproc/swscale.c @ 2800:7847d6b7ad3d
.balign or weĦll align by 64kb on some architectures
author | michael |
---|---|
date | Sat, 10 Nov 2001 20:39:23 +0000 |
parents | 0d7fd1655a89 |
children | e71ae0213431 |
comparison
equal
deleted
inserted
replaced
2799:0d7fd1655a89 | 2800:7847d6b7ad3d |
---|---|
141 "punpcklwd %%mm6, %%mm6 \n\t"\ | 141 "punpcklwd %%mm6, %%mm6 \n\t"\ |
142 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ | 142 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
143 "punpcklwd %%mm5, %%mm5 \n\t"\ | 143 "punpcklwd %%mm5, %%mm5 \n\t"\ |
144 "punpcklwd %%mm5, %%mm5 \n\t"\ | 144 "punpcklwd %%mm5, %%mm5 \n\t"\ |
145 "xorl %%eax, %%eax \n\t"\ | 145 "xorl %%eax, %%eax \n\t"\ |
146 ".align 16 \n\t"\ | 146 ".balign 16 \n\t"\ |
147 "1: \n\t"\ | 147 "1: \n\t"\ |
148 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ | 148 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ |
149 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ | 149 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ |
150 "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\ | 150 "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
151 "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\ | 151 "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
195 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ | 195 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
196 "punpcklwd %%mm5, %%mm5 \n\t"\ | 196 "punpcklwd %%mm5, %%mm5 \n\t"\ |
197 "punpcklwd %%mm5, %%mm5 \n\t"\ | 197 "punpcklwd %%mm5, %%mm5 \n\t"\ |
198 "movq %%mm5, asm_uvalpha1 \n\t"\ | 198 "movq %%mm5, asm_uvalpha1 \n\t"\ |
199 "xorl %%eax, %%eax \n\t"\ | 199 "xorl %%eax, %%eax \n\t"\ |
200 ".align 16 \n\t"\ | 200 ".balign 16 \n\t"\ |
201 "1: \n\t"\ | 201 "1: \n\t"\ |
202 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ | 202 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
203 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ | 203 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
204 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | 204 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
205 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | 205 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
260 "packuswb %%mm3, %%mm4 \n\t"\ | 260 "packuswb %%mm3, %%mm4 \n\t"\ |
261 "pxor %%mm7, %%mm7 \n\t" | 261 "pxor %%mm7, %%mm7 \n\t" |
262 | 262 |
263 #define YSCALEYUV2RGB1 \ | 263 #define YSCALEYUV2RGB1 \ |
264 "xorl %%eax, %%eax \n\t"\ | 264 "xorl %%eax, %%eax \n\t"\ |
265 ".align 16 \n\t"\ | 265 ".balign 16 \n\t"\ |
266 "1: \n\t"\ | 266 "1: \n\t"\ |
267 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ | 267 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
268 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | 268 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
269 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ | 269 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
270 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ | 270 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
309 "pxor %%mm7, %%mm7 \n\t" | 309 "pxor %%mm7, %%mm7 \n\t" |
310 | 310 |
311 // do vertical chrominance interpolation | 311 // do vertical chrominance interpolation |
312 #define YSCALEYUV2RGB1b \ | 312 #define YSCALEYUV2RGB1b \ |
313 "xorl %%eax, %%eax \n\t"\ | 313 "xorl %%eax, %%eax \n\t"\ |
314 ".align 16 \n\t"\ | 314 ".balign 16 \n\t"\ |
315 "1: \n\t"\ | 315 "1: \n\t"\ |
316 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ | 316 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
317 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ | 317 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
318 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | 318 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
319 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | 319 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
1308 //NO MMX just normal asm ... | 1308 //NO MMX just normal asm ... |
1309 asm volatile( | 1309 asm volatile( |
1310 "xorl %%eax, %%eax \n\t" // i | 1310 "xorl %%eax, %%eax \n\t" // i |
1311 "xorl %%ebx, %%ebx \n\t" // xx | 1311 "xorl %%ebx, %%ebx \n\t" // xx |
1312 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | 1312 "xorl %%ecx, %%ecx \n\t" // 2*xalpha |
1313 ".align 16 \n\t" | 1313 ".balign 16 \n\t" |
1314 "1: \n\t" | 1314 "1: \n\t" |
1315 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | 1315 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] |
1316 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | 1316 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] |
1317 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | 1317 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] |
1318 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | 1318 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha |
1440 #endif | 1440 #endif |
1441 asm volatile( | 1441 asm volatile( |
1442 "xorl %%eax, %%eax \n\t" // i | 1442 "xorl %%eax, %%eax \n\t" // i |
1443 "xorl %%ebx, %%ebx \n\t" // xx | 1443 "xorl %%ebx, %%ebx \n\t" // xx |
1444 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | 1444 "xorl %%ecx, %%ecx \n\t" // 2*xalpha |
1445 ".align 16 \n\t" | 1445 ".balign 16 \n\t" |
1446 "1: \n\t" | 1446 "1: \n\t" |
1447 "movl %0, %%esi \n\t" | 1447 "movl %0, %%esi \n\t" |
1448 "movzbl (%%esi, %%ebx), %%edi \n\t" //src[xx] | 1448 "movzbl (%%esi, %%ebx), %%edi \n\t" //src[xx] |
1449 "movzbl 1(%%esi, %%ebx), %%esi \n\t" //src[xx+1] | 1449 "movzbl 1(%%esi, %%ebx), %%esi \n\t" //src[xx+1] |
1450 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | 1450 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] |