Mercurial > mplayer.hg
comparison postproc/swscale_template.c @ 9417:5eea6d903b4c
cleanup
author | michael |
---|---|
date | Thu, 13 Feb 2003 21:38:43 +0000 |
parents | 04c6fd75ed96 |
children | 53f03173e48f |
comparison
equal
deleted
inserted
replaced
9416:83fe90af3e16 | 9417:5eea6d903b4c |
---|---|
343 "packuswb %%mm0, %%mm2 \n\t"\ | 343 "packuswb %%mm0, %%mm2 \n\t"\ |
344 "packuswb %%mm6, %%mm5 \n\t"\ | 344 "packuswb %%mm6, %%mm5 \n\t"\ |
345 "packuswb %%mm3, %%mm4 \n\t"\ | 345 "packuswb %%mm3, %%mm4 \n\t"\ |
346 "pxor %%mm7, %%mm7 \n\t" | 346 "pxor %%mm7, %%mm7 \n\t" |
347 | 347 |
348 #define YSCALEYUV2PACKED1 \ | 348 #define YSCALEYUV2PACKED1(index, c) \ |
349 "xorl %%eax, %%eax \n\t"\ | 349 "xorl "#index", "#index" \n\t"\ |
350 ".balign 16 \n\t"\ | 350 ".balign 16 \n\t"\ |
351 "1: \n\t"\ | 351 "1: \n\t"\ |
352 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ | 352 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
353 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | 353 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
354 "psraw $7, %%mm3 \n\t" \ | 354 "psraw $7, %%mm3 \n\t" \ |
355 "psraw $7, %%mm4 \n\t" \ | 355 "psraw $7, %%mm4 \n\t" \ |
356 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ | 356 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
357 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ | 357 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ |
358 "psraw $7, %%mm1 \n\t" \ | 358 "psraw $7, %%mm1 \n\t" \ |
359 "psraw $7, %%mm7 \n\t" \ | 359 "psraw $7, %%mm7 \n\t" \ |
360 | 360 |
361 #define YSCALEYUV2RGB1 \ | 361 #define YSCALEYUV2RGB1(index, c) \ |
362 "xorl %%eax, %%eax \n\t"\ | 362 "xorl "#index", "#index" \n\t"\ |
363 ".balign 16 \n\t"\ | 363 ".balign 16 \n\t"\ |
364 "1: \n\t"\ | 364 "1: \n\t"\ |
365 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ | 365 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
366 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | 366 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
367 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ | 367 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
368 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ | 368 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
369 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ | 369 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ |
370 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | 370 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ |
371 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ | 371 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
372 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ | 372 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
373 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ | 373 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\ |
374 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | 374 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\ |
375 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ | 375 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
376 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ | 376 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
377 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ | 377 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ |
378 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | 378 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
379 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | 379 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
380 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ | 380 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\ |
381 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | 381 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\ |
382 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | 382 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ |
383 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | 383 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ |
384 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | 384 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ |
385 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | 385 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ |
386 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ | 386 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
387 "paddw %%mm3, %%mm4 \n\t"\ | 387 "paddw %%mm3, %%mm4 \n\t"\ |
388 "movq %%mm2, %%mm0 \n\t"\ | 388 "movq %%mm2, %%mm0 \n\t"\ |
389 "movq %%mm5, %%mm6 \n\t"\ | 389 "movq %%mm5, %%mm6 \n\t"\ |
390 "movq %%mm4, %%mm3 \n\t"\ | 390 "movq %%mm4, %%mm3 \n\t"\ |
404 "packuswb %%mm0, %%mm2 \n\t"\ | 404 "packuswb %%mm0, %%mm2 \n\t"\ |
405 "packuswb %%mm6, %%mm5 \n\t"\ | 405 "packuswb %%mm6, %%mm5 \n\t"\ |
406 "packuswb %%mm3, %%mm4 \n\t"\ | 406 "packuswb %%mm3, %%mm4 \n\t"\ |
407 "pxor %%mm7, %%mm7 \n\t" | 407 "pxor %%mm7, %%mm7 \n\t" |
408 | 408 |
409 #define YSCALEYUV2PACKED1b \ | 409 #define YSCALEYUV2PACKED1b(index, c) \ |
410 "xorl %%eax, %%eax \n\t"\ | 410 "xorl "#index", "#index" \n\t"\ |
411 ".balign 16 \n\t"\ | 411 ".balign 16 \n\t"\ |
412 "1: \n\t"\ | 412 "1: \n\t"\ |
413 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ | 413 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
414 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ | 414 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
415 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | 415 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
416 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | 416 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
417 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ | 417 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ |
418 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | 418 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ |
419 "psrlw $8, %%mm3 \n\t" \ | 419 "psrlw $8, %%mm3 \n\t" \ |
420 "psrlw $8, %%mm4 \n\t" \ | 420 "psrlw $8, %%mm4 \n\t" \ |
421 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ | 421 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
422 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ | 422 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ |
423 "psraw $7, %%mm1 \n\t" \ | 423 "psraw $7, %%mm1 \n\t" \ |
424 "psraw $7, %%mm7 \n\t" | 424 "psraw $7, %%mm7 \n\t" |
425 | 425 |
426 // do vertical chrominance interpolation | 426 // do vertical chrominance interpolation |
427 #define YSCALEYUV2RGB1b \ | 427 #define YSCALEYUV2RGB1b(index, c) \ |
428 "xorl %%eax, %%eax \n\t"\ | 428 "xorl "#index", "#index" \n\t"\ |
429 ".balign 16 \n\t"\ | 429 ".balign 16 \n\t"\ |
430 "1: \n\t"\ | 430 "1: \n\t"\ |
431 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ | 431 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
432 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ | 432 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
433 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | 433 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
434 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | 434 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
435 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ | 435 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ |
436 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | 436 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ |
437 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ | 437 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ |
438 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ | 438 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ |
439 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ | 439 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ |
440 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | 440 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ |
441 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ | 441 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
442 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ | 442 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
443 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ | 443 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\ |
444 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | 444 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\ |
445 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ | 445 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
446 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ | 446 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
447 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ | 447 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ |
448 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | 448 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
449 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | 449 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
450 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ | 450 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\ |
451 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | 451 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\ |
452 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | 452 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ |
453 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | 453 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ |
454 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | 454 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ |
455 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | 455 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ |
456 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ | 456 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
457 "paddw %%mm3, %%mm4 \n\t"\ | 457 "paddw %%mm3, %%mm4 \n\t"\ |
458 "movq %%mm2, %%mm0 \n\t"\ | 458 "movq %%mm2, %%mm0 \n\t"\ |
459 "movq %%mm5, %%mm6 \n\t"\ | 459 "movq %%mm5, %%mm6 \n\t"\ |
460 "movq %%mm4, %%mm3 \n\t"\ | 460 "movq %%mm4, %%mm3 \n\t"\ |
1257 * YV12 to RGB without scaling or interpolating | 1257 * YV12 to RGB without scaling or interpolating |
1258 */ | 1258 */ |
1259 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, | 1259 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, |
1260 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y) | 1260 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y) |
1261 { | 1261 { |
1262 #ifdef HAVE_MMX | |
1263 int uvalpha1=uvalpha^4095; | |
1264 #endif | |
1265 const int yalpha1=0; | 1262 const int yalpha1=0; |
1266 int i; | 1263 int i; |
1267 | 1264 |
1268 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 | 1265 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 |
1269 const int yalpha= 4096; //FIXME ... | 1266 const int yalpha= 4096; //FIXME ... |
1279 { | 1276 { |
1280 switch(dstFormat) | 1277 switch(dstFormat) |
1281 { | 1278 { |
1282 case IMGFMT_BGR32: | 1279 case IMGFMT_BGR32: |
1283 asm volatile( | 1280 asm volatile( |
1284 YSCALEYUV2RGB1 | 1281 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1285 WRITEBGR32(%4, %5, %%eax) | 1282 "movl %4, %%esp \n\t" |
1286 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1283 YSCALEYUV2RGB1(%%eax, %5) |
1287 "m" (yalpha1), "m" (uvalpha1) | 1284 WRITEBGR32(%%esp, 8280(%5), %%eax) |
1285 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1286 | |
1287 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1288 "r" (&c->redDither) | |
1288 : "%eax" | 1289 : "%eax" |
1289 ); | 1290 ); |
1290 return; | 1291 return; |
1291 case IMGFMT_BGR24: | 1292 case IMGFMT_BGR24: |
1292 asm volatile( | 1293 asm volatile( |
1293 "movl %4, %%ebx \n\t" | 1294 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1294 YSCALEYUV2RGB1 | 1295 "movl %4, %%esp \n\t" |
1295 WRITEBGR24(%%ebx, %5, %%eax) | 1296 YSCALEYUV2RGB1(%%eax, %5) |
1296 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), | 1297 WRITEBGR24(%%esp, 8280(%5), %%eax) |
1297 "m" (yalpha1), "m" (uvalpha1) | 1298 "movl "ESP_OFFSET"(%5), %%esp \n\t" |
1298 : "%eax", "%ebx" | 1299 |
1300 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1301 "r" (&c->redDither) | |
1302 : "%eax" | |
1299 ); | 1303 ); |
1300 return; | 1304 return; |
1301 case IMGFMT_BGR15: | 1305 case IMGFMT_BGR15: |
1302 asm volatile( | 1306 asm volatile( |
1303 YSCALEYUV2RGB1 | 1307 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1308 "movl %4, %%esp \n\t" | |
1309 YSCALEYUV2RGB1(%%eax, %5) | |
1304 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | 1310 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
1305 #ifdef DITHER1XBPP | 1311 #ifdef DITHER1XBPP |
1306 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" | 1312 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1307 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | 1313 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" |
1308 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | 1314 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" |
1309 #endif | 1315 #endif |
1310 WRITEBGR15(%4, %5, %%eax) | 1316 WRITEBGR15(%%esp, 8280(%5), %%eax) |
1311 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1317 "movl "ESP_OFFSET"(%5), %%esp \n\t" |
1312 "m" (yalpha1), "m" (uvalpha1) | 1318 |
1319 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1320 "r" (&c->redDither) | |
1313 : "%eax" | 1321 : "%eax" |
1314 ); | 1322 ); |
1315 return; | 1323 return; |
1316 case IMGFMT_BGR16: | 1324 case IMGFMT_BGR16: |
1317 asm volatile( | 1325 asm volatile( |
1318 YSCALEYUV2RGB1 | 1326 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1327 "movl %4, %%esp \n\t" | |
1328 YSCALEYUV2RGB1(%%eax, %5) | |
1319 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | 1329 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
1320 #ifdef DITHER1XBPP | 1330 #ifdef DITHER1XBPP |
1321 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" | 1331 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1322 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | 1332 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" |
1323 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | 1333 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" |
1324 #endif | 1334 #endif |
1325 | 1335 |
1326 WRITEBGR16(%4, %5, %%eax) | 1336 WRITEBGR16(%%esp, 8280(%5), %%eax) |
1327 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1337 "movl "ESP_OFFSET"(%5), %%esp \n\t" |
1328 "m" (yalpha1), "m" (uvalpha1) | 1338 |
1339 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1340 "r" (&c->redDither) | |
1329 : "%eax" | 1341 : "%eax" |
1330 ); | 1342 ); |
1331 return; | 1343 return; |
1332 case IMGFMT_YUY2: | 1344 case IMGFMT_YUY2: |
1333 asm volatile( | 1345 asm volatile( |
1334 YSCALEYUV2PACKED1 | 1346 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1335 WRITEYUY2(%4, %5, %%eax) | 1347 "movl %4, %%esp \n\t" |
1336 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1348 YSCALEYUV2PACKED1(%%eax, %5) |
1337 "m" (yalpha1), "m" (uvalpha1) | 1349 WRITEYUY2(%%esp, 8280(%5), %%eax) |
1350 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1351 | |
1352 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1353 "r" (&c->redDither) | |
1338 : "%eax" | 1354 : "%eax" |
1339 ); | 1355 ); |
1340 return; | 1356 return; |
1341 } | 1357 } |
1342 } | 1358 } |
1344 { | 1360 { |
1345 switch(dstFormat) | 1361 switch(dstFormat) |
1346 { | 1362 { |
1347 case IMGFMT_BGR32: | 1363 case IMGFMT_BGR32: |
1348 asm volatile( | 1364 asm volatile( |
1349 YSCALEYUV2RGB1b | 1365 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1350 WRITEBGR32(%4, %5, %%eax) | 1366 "movl %4, %%esp \n\t" |
1351 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1367 YSCALEYUV2RGB1b(%%eax, %5) |
1352 "m" (yalpha1), "m" (uvalpha1) | 1368 WRITEBGR32(%%esp, 8280(%5), %%eax) |
1369 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1370 | |
1371 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1372 "r" (&c->redDither) | |
1353 : "%eax" | 1373 : "%eax" |
1354 ); | 1374 ); |
1355 return; | 1375 return; |
1356 case IMGFMT_BGR24: | 1376 case IMGFMT_BGR24: |
1357 asm volatile( | 1377 asm volatile( |
1358 "movl %4, %%ebx \n\t" | 1378 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1359 YSCALEYUV2RGB1b | 1379 "movl %4, %%esp \n\t" |
1360 WRITEBGR24(%%ebx, %5, %%eax) | 1380 YSCALEYUV2RGB1b(%%eax, %5) |
1361 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), | 1381 WRITEBGR24(%%esp, 8280(%5), %%eax) |
1362 "m" (yalpha1), "m" (uvalpha1) | 1382 "movl "ESP_OFFSET"(%5), %%esp \n\t" |
1363 : "%eax", "%ebx" | 1383 |
1384 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1385 "r" (&c->redDither) | |
1386 : "%eax" | |
1364 ); | 1387 ); |
1365 return; | 1388 return; |
1366 case IMGFMT_BGR15: | 1389 case IMGFMT_BGR15: |
1367 asm volatile( | 1390 asm volatile( |
1368 YSCALEYUV2RGB1b | 1391 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1392 "movl %4, %%esp \n\t" | |
1393 YSCALEYUV2RGB1b(%%eax, %5) | |
1369 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | 1394 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
1370 #ifdef DITHER1XBPP | 1395 #ifdef DITHER1XBPP |
1371 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" | 1396 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1372 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | 1397 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" |
1373 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | 1398 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" |
1374 #endif | 1399 #endif |
1375 WRITEBGR15(%4, %5, %%eax) | 1400 WRITEBGR15(%%esp, 8280(%5), %%eax) |
1376 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1401 "movl "ESP_OFFSET"(%5), %%esp \n\t" |
1377 "m" (yalpha1), "m" (uvalpha1) | 1402 |
1403 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1404 "r" (&c->redDither) | |
1378 : "%eax" | 1405 : "%eax" |
1379 ); | 1406 ); |
1380 return; | 1407 return; |
1381 case IMGFMT_BGR16: | 1408 case IMGFMT_BGR16: |
1382 asm volatile( | 1409 asm volatile( |
1383 YSCALEYUV2RGB1b | 1410 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1411 "movl %4, %%esp \n\t" | |
1412 YSCALEYUV2RGB1b(%%eax, %5) | |
1384 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | 1413 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
1385 #ifdef DITHER1XBPP | 1414 #ifdef DITHER1XBPP |
1386 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" | 1415 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1387 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | 1416 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" |
1388 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | 1417 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" |
1389 #endif | 1418 #endif |
1390 | 1419 |
1391 WRITEBGR16(%4, %5, %%eax) | 1420 WRITEBGR16(%%esp, 8280(%5), %%eax) |
1392 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1421 "movl "ESP_OFFSET"(%5), %%esp \n\t" |
1393 "m" (yalpha1), "m" (uvalpha1) | 1422 |
1423 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1424 "r" (&c->redDither) | |
1394 : "%eax" | 1425 : "%eax" |
1395 ); | 1426 ); |
1396 return; | 1427 return; |
1397 case IMGFMT_YUY2: | 1428 case IMGFMT_YUY2: |
1398 asm volatile( | 1429 asm volatile( |
1399 YSCALEYUV2PACKED1b | 1430 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1400 WRITEYUY2(%4, %5, %%eax) | 1431 "movl %4, %%esp \n\t" |
1401 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | 1432 YSCALEYUV2PACKED1b(%%eax, %5) |
1402 "m" (yalpha1), "m" (uvalpha1) | 1433 WRITEYUY2(%%esp, 8280(%5), %%eax) |
1434 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1435 | |
1436 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1437 "r" (&c->redDither) | |
1403 : "%eax" | 1438 : "%eax" |
1404 ); | 1439 ); |
1405 return; | 1440 return; |
1406 } | 1441 } |
1407 } | 1442 } |
2760 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | 2795 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); |
2761 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | 2796 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); |
2762 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB | 2797 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB |
2763 { | 2798 { |
2764 int chrAlpha= vChrFilter[2*dstY+1]; | 2799 int chrAlpha= vChrFilter[2*dstY+1]; |
2765 | |
2766 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), | 2800 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), |
2767 dest, dstW, chrAlpha, dstFormat, flags, dstY); | 2801 dest, dstW, chrAlpha, dstFormat, flags, dstY); |
2768 } | 2802 } |
2769 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB | 2803 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB |
2770 { | 2804 { |