comparison postproc/swscale_template.c @ 9417:5eea6d903b4c

cleanup
author michael
date Thu, 13 Feb 2003 21:38:43 +0000
parents 04c6fd75ed96
children 53f03173e48f
comparison
equal deleted inserted replaced
9416:83fe90af3e16 9417:5eea6d903b4c
343 "packuswb %%mm0, %%mm2 \n\t"\ 343 "packuswb %%mm0, %%mm2 \n\t"\
344 "packuswb %%mm6, %%mm5 \n\t"\ 344 "packuswb %%mm6, %%mm5 \n\t"\
345 "packuswb %%mm3, %%mm4 \n\t"\ 345 "packuswb %%mm3, %%mm4 \n\t"\
346 "pxor %%mm7, %%mm7 \n\t" 346 "pxor %%mm7, %%mm7 \n\t"
347 347
348 #define YSCALEYUV2PACKED1 \ 348 #define YSCALEYUV2PACKED1(index, c) \
349 "xorl %%eax, %%eax \n\t"\ 349 "xorl "#index", "#index" \n\t"\
350 ".balign 16 \n\t"\ 350 ".balign 16 \n\t"\
351 "1: \n\t"\ 351 "1: \n\t"\
352 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ 352 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
353 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ 353 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
354 "psraw $7, %%mm3 \n\t" \ 354 "psraw $7, %%mm3 \n\t" \
355 "psraw $7, %%mm4 \n\t" \ 355 "psraw $7, %%mm4 \n\t" \
356 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ 356 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
357 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ 357 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
358 "psraw $7, %%mm1 \n\t" \ 358 "psraw $7, %%mm1 \n\t" \
359 "psraw $7, %%mm7 \n\t" \ 359 "psraw $7, %%mm7 \n\t" \
360 360
361 #define YSCALEYUV2RGB1 \ 361 #define YSCALEYUV2RGB1(index, c) \
362 "xorl %%eax, %%eax \n\t"\ 362 "xorl "#index", "#index" \n\t"\
363 ".balign 16 \n\t"\ 363 ".balign 16 \n\t"\
364 "1: \n\t"\ 364 "1: \n\t"\
365 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ 365 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
366 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ 366 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
367 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ 367 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
368 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ 368 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
369 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ 369 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
370 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ 370 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
371 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ 371 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
372 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ 372 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
373 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ 373 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
374 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ 374 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
375 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ 375 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
376 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ 376 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
377 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ 377 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
378 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ 378 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
379 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ 379 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
380 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ 380 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
381 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ 381 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
382 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ 382 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
383 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ 383 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
384 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ 384 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
385 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ 385 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
386 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ 386 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
387 "paddw %%mm3, %%mm4 \n\t"\ 387 "paddw %%mm3, %%mm4 \n\t"\
388 "movq %%mm2, %%mm0 \n\t"\ 388 "movq %%mm2, %%mm0 \n\t"\
389 "movq %%mm5, %%mm6 \n\t"\ 389 "movq %%mm5, %%mm6 \n\t"\
390 "movq %%mm4, %%mm3 \n\t"\ 390 "movq %%mm4, %%mm3 \n\t"\
404 "packuswb %%mm0, %%mm2 \n\t"\ 404 "packuswb %%mm0, %%mm2 \n\t"\
405 "packuswb %%mm6, %%mm5 \n\t"\ 405 "packuswb %%mm6, %%mm5 \n\t"\
406 "packuswb %%mm3, %%mm4 \n\t"\ 406 "packuswb %%mm3, %%mm4 \n\t"\
407 "pxor %%mm7, %%mm7 \n\t" 407 "pxor %%mm7, %%mm7 \n\t"
408 408
409 #define YSCALEYUV2PACKED1b \ 409 #define YSCALEYUV2PACKED1b(index, c) \
410 "xorl %%eax, %%eax \n\t"\ 410 "xorl "#index", "#index" \n\t"\
411 ".balign 16 \n\t"\ 411 ".balign 16 \n\t"\
412 "1: \n\t"\ 412 "1: \n\t"\
413 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ 413 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
414 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ 414 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
415 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ 415 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
416 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ 416 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
417 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ 417 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
418 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ 418 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
419 "psrlw $8, %%mm3 \n\t" \ 419 "psrlw $8, %%mm3 \n\t" \
420 "psrlw $8, %%mm4 \n\t" \ 420 "psrlw $8, %%mm4 \n\t" \
421 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ 421 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
422 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ 422 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
423 "psraw $7, %%mm1 \n\t" \ 423 "psraw $7, %%mm1 \n\t" \
424 "psraw $7, %%mm7 \n\t" 424 "psraw $7, %%mm7 \n\t"
425 425
426 // do vertical chrominance interpolation 426 // do vertical chrominance interpolation
427 #define YSCALEYUV2RGB1b \ 427 #define YSCALEYUV2RGB1b(index, c) \
428 "xorl %%eax, %%eax \n\t"\ 428 "xorl "#index", "#index" \n\t"\
429 ".balign 16 \n\t"\ 429 ".balign 16 \n\t"\
430 "1: \n\t"\ 430 "1: \n\t"\
431 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ 431 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
432 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ 432 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
433 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ 433 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
434 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ 434 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
435 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ 435 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
436 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ 436 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
437 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ 437 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
438 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ 438 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
439 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ 439 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
440 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ 440 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
441 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ 441 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
442 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ 442 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
443 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ 443 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
444 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ 444 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
445 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ 445 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
446 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ 446 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
447 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ 447 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
448 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ 448 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
449 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ 449 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
450 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ 450 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
451 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ 451 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
452 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ 452 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
453 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ 453 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
454 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ 454 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
455 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ 455 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
456 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ 456 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
457 "paddw %%mm3, %%mm4 \n\t"\ 457 "paddw %%mm3, %%mm4 \n\t"\
458 "movq %%mm2, %%mm0 \n\t"\ 458 "movq %%mm2, %%mm0 \n\t"\
459 "movq %%mm5, %%mm6 \n\t"\ 459 "movq %%mm5, %%mm6 \n\t"\
460 "movq %%mm4, %%mm3 \n\t"\ 460 "movq %%mm4, %%mm3 \n\t"\
1257 * YV12 to RGB without scaling or interpolating 1257 * YV12 to RGB without scaling or interpolating
1258 */ 1258 */
1259 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, 1259 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
1260 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y) 1260 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
1261 { 1261 {
1262 #ifdef HAVE_MMX
1263 int uvalpha1=uvalpha^4095;
1264 #endif
1265 const int yalpha1=0; 1262 const int yalpha1=0;
1266 int i; 1263 int i;
1267 1264
1268 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 1265 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
1269 const int yalpha= 4096; //FIXME ... 1266 const int yalpha= 4096; //FIXME ...
1279 { 1276 {
1280 switch(dstFormat) 1277 switch(dstFormat)
1281 { 1278 {
1282 case IMGFMT_BGR32: 1279 case IMGFMT_BGR32:
1283 asm volatile( 1280 asm volatile(
1284 YSCALEYUV2RGB1 1281 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
1285 WRITEBGR32(%4, %5, %%eax) 1282 "movl %4, %%esp \n\t"
1286 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1283 YSCALEYUV2RGB1(%%eax, %5)
1287 "m" (yalpha1), "m" (uvalpha1) 1284 WRITEBGR32(%%esp, 8280(%5), %%eax)
1285 "movl "ESP_OFFSET"(%5), %%esp \n\t"
1286
1287 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1288 "r" (&c->redDither)
1288 : "%eax" 1289 : "%eax"
1289 ); 1290 );
1290 return; 1291 return;
1291 case IMGFMT_BGR24: 1292 case IMGFMT_BGR24:
1292 asm volatile( 1293 asm volatile(
1293 "movl %4, %%ebx \n\t" 1294 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
1294 YSCALEYUV2RGB1 1295 "movl %4, %%esp \n\t"
1295 WRITEBGR24(%%ebx, %5, %%eax) 1296 YSCALEYUV2RGB1(%%eax, %5)
1296 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), 1297 WRITEBGR24(%%esp, 8280(%5), %%eax)
1297 "m" (yalpha1), "m" (uvalpha1) 1298 "movl "ESP_OFFSET"(%5), %%esp \n\t"
1298 : "%eax", "%ebx" 1299
1300 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1301 "r" (&c->redDither)
1302 : "%eax"
1299 ); 1303 );
1300 return; 1304 return;
1301 case IMGFMT_BGR15: 1305 case IMGFMT_BGR15:
1302 asm volatile( 1306 asm volatile(
1303 YSCALEYUV2RGB1 1307 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
1308 "movl %4, %%esp \n\t"
1309 YSCALEYUV2RGB1(%%eax, %5)
1304 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ 1310 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1305 #ifdef DITHER1XBPP 1311 #ifdef DITHER1XBPP
1306 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" 1312 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1307 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" 1313 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1308 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" 1314 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1309 #endif 1315 #endif
1310 WRITEBGR15(%4, %5, %%eax) 1316 WRITEBGR15(%%esp, 8280(%5), %%eax)
1311 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1317 "movl "ESP_OFFSET"(%5), %%esp \n\t"
1312 "m" (yalpha1), "m" (uvalpha1) 1318
1319 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1320 "r" (&c->redDither)
1313 : "%eax" 1321 : "%eax"
1314 ); 1322 );
1315 return; 1323 return;
1316 case IMGFMT_BGR16: 1324 case IMGFMT_BGR16:
1317 asm volatile( 1325 asm volatile(
1318 YSCALEYUV2RGB1 1326 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
1327 "movl %4, %%esp \n\t"
1328 YSCALEYUV2RGB1(%%eax, %5)
1319 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ 1329 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1320 #ifdef DITHER1XBPP 1330 #ifdef DITHER1XBPP
1321 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" 1331 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1322 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" 1332 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
1323 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" 1333 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1324 #endif 1334 #endif
1325 1335
1326 WRITEBGR16(%4, %5, %%eax) 1336 WRITEBGR16(%%esp, 8280(%5), %%eax)
1327 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1337 "movl "ESP_OFFSET"(%5), %%esp \n\t"
1328 "m" (yalpha1), "m" (uvalpha1) 1338
1339 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1340 "r" (&c->redDither)
1329 : "%eax" 1341 : "%eax"
1330 ); 1342 );
1331 return; 1343 return;
1332 case IMGFMT_YUY2: 1344 case IMGFMT_YUY2:
1333 asm volatile( 1345 asm volatile(
1334 YSCALEYUV2PACKED1 1346 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
1335 WRITEYUY2(%4, %5, %%eax) 1347 "movl %4, %%esp \n\t"
1336 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1348 YSCALEYUV2PACKED1(%%eax, %5)
1337 "m" (yalpha1), "m" (uvalpha1) 1349 WRITEYUY2(%%esp, 8280(%5), %%eax)
1350 "movl "ESP_OFFSET"(%5), %%esp \n\t"
1351
1352 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1353 "r" (&c->redDither)
1338 : "%eax" 1354 : "%eax"
1339 ); 1355 );
1340 return; 1356 return;
1341 } 1357 }
1342 } 1358 }
1344 { 1360 {
1345 switch(dstFormat) 1361 switch(dstFormat)
1346 { 1362 {
1347 case IMGFMT_BGR32: 1363 case IMGFMT_BGR32:
1348 asm volatile( 1364 asm volatile(
1349 YSCALEYUV2RGB1b 1365 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
1350 WRITEBGR32(%4, %5, %%eax) 1366 "movl %4, %%esp \n\t"
1351 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1367 YSCALEYUV2RGB1b(%%eax, %5)
1352 "m" (yalpha1), "m" (uvalpha1) 1368 WRITEBGR32(%%esp, 8280(%5), %%eax)
1369 "movl "ESP_OFFSET"(%5), %%esp \n\t"
1370
1371 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1372 "r" (&c->redDither)
1353 : "%eax" 1373 : "%eax"
1354 ); 1374 );
1355 return; 1375 return;
1356 case IMGFMT_BGR24: 1376 case IMGFMT_BGR24:
1357 asm volatile( 1377 asm volatile(
1358 "movl %4, %%ebx \n\t" 1378 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
1359 YSCALEYUV2RGB1b 1379 "movl %4, %%esp \n\t"
1360 WRITEBGR24(%%ebx, %5, %%eax) 1380 YSCALEYUV2RGB1b(%%eax, %5)
1361 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), 1381 WRITEBGR24(%%esp, 8280(%5), %%eax)
1362 "m" (yalpha1), "m" (uvalpha1) 1382 "movl "ESP_OFFSET"(%5), %%esp \n\t"
1363 : "%eax", "%ebx" 1383
1384 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1385 "r" (&c->redDither)
1386 : "%eax"
1364 ); 1387 );
1365 return; 1388 return;
1366 case IMGFMT_BGR15: 1389 case IMGFMT_BGR15:
1367 asm volatile( 1390 asm volatile(
1368 YSCALEYUV2RGB1b 1391 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
1392 "movl %4, %%esp \n\t"
1393 YSCALEYUV2RGB1b(%%eax, %5)
1369 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ 1394 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1370 #ifdef DITHER1XBPP 1395 #ifdef DITHER1XBPP
1371 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" 1396 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1372 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" 1397 "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1373 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" 1398 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1374 #endif 1399 #endif
1375 WRITEBGR15(%4, %5, %%eax) 1400 WRITEBGR15(%%esp, 8280(%5), %%eax)
1376 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1401 "movl "ESP_OFFSET"(%5), %%esp \n\t"
1377 "m" (yalpha1), "m" (uvalpha1) 1402
1403 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1404 "r" (&c->redDither)
1378 : "%eax" 1405 : "%eax"
1379 ); 1406 );
1380 return; 1407 return;
1381 case IMGFMT_BGR16: 1408 case IMGFMT_BGR16:
1382 asm volatile( 1409 asm volatile(
1383 YSCALEYUV2RGB1b 1410 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
1411 "movl %4, %%esp \n\t"
1412 YSCALEYUV2RGB1b(%%eax, %5)
1384 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ 1413 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1385 #ifdef DITHER1XBPP 1414 #ifdef DITHER1XBPP
1386 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" 1415 "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1387 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" 1416 "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
1388 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" 1417 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1389 #endif 1418 #endif
1390 1419
1391 WRITEBGR16(%4, %5, %%eax) 1420 WRITEBGR16(%%esp, 8280(%5), %%eax)
1392 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1421 "movl "ESP_OFFSET"(%5), %%esp \n\t"
1393 "m" (yalpha1), "m" (uvalpha1) 1422
1423 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1424 "r" (&c->redDither)
1394 : "%eax" 1425 : "%eax"
1395 ); 1426 );
1396 return; 1427 return;
1397 case IMGFMT_YUY2: 1428 case IMGFMT_YUY2:
1398 asm volatile( 1429 asm volatile(
1399 YSCALEYUV2PACKED1b 1430 "movl %%esp, "ESP_OFFSET"(%5) \n\t"
1400 WRITEYUY2(%4, %5, %%eax) 1431 "movl %4, %%esp \n\t"
1401 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 1432 YSCALEYUV2PACKED1b(%%eax, %5)
1402 "m" (yalpha1), "m" (uvalpha1) 1433 WRITEYUY2(%%esp, 8280(%5), %%eax)
1434 "movl "ESP_OFFSET"(%5), %%esp \n\t"
1435
1436 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1437 "r" (&c->redDither)
1403 : "%eax" 1438 : "%eax"
1404 ); 1439 );
1405 return; 1440 return;
1406 } 1441 }
1407 } 1442 }
2760 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); 2795 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
2761 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); 2796 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
2762 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB 2797 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB
2763 { 2798 {
2764 int chrAlpha= vChrFilter[2*dstY+1]; 2799 int chrAlpha= vChrFilter[2*dstY+1];
2765
2766 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), 2800 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
2767 dest, dstW, chrAlpha, dstFormat, flags, dstY); 2801 dest, dstW, chrAlpha, dstFormat, flags, dstY);
2768 } 2802 }
2769 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB 2803 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB
2770 { 2804 {