comparison libswscale/swscale_template.c @ 28650:585b5f224fcd

make MMX registers parametrized in the WRITEBGR32 macro, patch by C¸«±dric Schieli (cschieli at gmail youknowwhat)
author stefang
date Sat, 21 Feb 2009 10:49:52 +0000
parents 6cdce79a0b9a
children 7241319c2d93
comparison
equal deleted inserted replaced
28649:7ec20a2f2067 28650:585b5f224fcd
624 "packuswb %%mm6, %%mm5 \n\t"\ 624 "packuswb %%mm6, %%mm5 \n\t"\
625 "packuswb %%mm3, %%mm4 \n\t"\ 625 "packuswb %%mm3, %%mm4 \n\t"\
626 "pxor %%mm7, %%mm7 \n\t" 626 "pxor %%mm7, %%mm7 \n\t"
627 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c) 627 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
628 628
629 #define REAL_WRITEBGR32(dst, dstw, index) \ 629 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
630 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ 630 "movq "#b", "#q2" \n\t" /* B */\
631 "movq %%mm2, %%mm1 \n\t" /* B */\ 631 "movq "#r", "#t" \n\t" /* R */\
632 "movq %%mm5, %%mm6 \n\t" /* R */\ 632 "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\
633 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ 633 "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\
634 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ 634 "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\
635 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ 635 "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\
636 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ 636 "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\
637 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ 637 "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\
638 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ 638 "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\
639 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ 639 "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\
640 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ 640 "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\
641 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ 641 "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\
642 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ 642 \
643 \ 643 MOVNTQ( q0, (dst, index, 4))\
644 MOVNTQ(%%mm0, (dst, index, 4))\ 644 MOVNTQ( b, 8(dst, index, 4))\
645 MOVNTQ(%%mm2, 8(dst, index, 4))\ 645 MOVNTQ( q2, 16(dst, index, 4))\
646 MOVNTQ(%%mm1, 16(dst, index, 4))\ 646 MOVNTQ( q3, 24(dst, index, 4))\
647 MOVNTQ(%%mm3, 24(dst, index, 4))\
648 \ 647 \
649 "add $8, "#index" \n\t"\ 648 "add $8, "#index" \n\t"\
650 "cmp "#dstw", "#index" \n\t"\ 649 "cmp "#dstw", "#index" \n\t"\
651 " jb 1b \n\t" 650 " jb 1b \n\t"
652 #define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index) 651 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
653 652
654 #define REAL_WRITERGB16(dst, dstw, index) \ 653 #define REAL_WRITERGB16(dst, dstw, index) \
655 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ 654 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
656 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ 655 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
657 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ 656 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
1012 if (c->flags & SWS_ACCURATE_RND){ 1011 if (c->flags & SWS_ACCURATE_RND){
1013 switch(c->dstFormat){ 1012 switch(c->dstFormat){
1014 case PIX_FMT_RGB32: 1013 case PIX_FMT_RGB32:
1015 YSCALEYUV2PACKEDX_ACCURATE 1014 YSCALEYUV2PACKEDX_ACCURATE
1016 YSCALEYUV2RGBX 1015 YSCALEYUV2RGBX
1017 WRITEBGR32(%4, %5, %%REGa) 1016 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1018 1017
1019 YSCALEYUV2PACKEDX_END 1018 YSCALEYUV2PACKEDX_END
1020 return; 1019 return;
1021 case PIX_FMT_BGR24: 1020 case PIX_FMT_BGR24:
1022 YSCALEYUV2PACKEDX_ACCURATE 1021 YSCALEYUV2PACKEDX_ACCURATE
1074 switch(c->dstFormat) 1073 switch(c->dstFormat)
1075 { 1074 {
1076 case PIX_FMT_RGB32: 1075 case PIX_FMT_RGB32:
1077 YSCALEYUV2PACKEDX 1076 YSCALEYUV2PACKEDX
1078 YSCALEYUV2RGBX 1077 YSCALEYUV2RGBX
1079 WRITEBGR32(%4, %5, %%REGa) 1078 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1080 YSCALEYUV2PACKEDX_END 1079 YSCALEYUV2PACKEDX_END
1081 return; 1080 return;
1082 case PIX_FMT_BGR24: 1081 case PIX_FMT_BGR24:
1083 YSCALEYUV2PACKEDX 1082 YSCALEYUV2PACKEDX
1084 YSCALEYUV2RGBX 1083 YSCALEYUV2RGBX
1169 __asm__ volatile( 1168 __asm__ volatile(
1170 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" 1169 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1171 "mov %4, %%"REG_b" \n\t" 1170 "mov %4, %%"REG_b" \n\t"
1172 "push %%"REG_BP" \n\t" 1171 "push %%"REG_BP" \n\t"
1173 YSCALEYUV2RGB(%%REGBP, %5) 1172 YSCALEYUV2RGB(%%REGBP, %5)
1174 WRITEBGR32(%%REGb, 8280(%5), %%REGBP) 1173 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1175 "pop %%"REG_BP" \n\t" 1174 "pop %%"REG_BP" \n\t"
1176 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" 1175 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1177 1176
1178 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), 1177 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1179 "a" (&c->redDither) 1178 "a" (&c->redDither)
1281 __asm__ volatile( 1280 __asm__ volatile(
1282 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" 1281 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1283 "mov %4, %%"REG_b" \n\t" 1282 "mov %4, %%"REG_b" \n\t"
1284 "push %%"REG_BP" \n\t" 1283 "push %%"REG_BP" \n\t"
1285 YSCALEYUV2RGB1(%%REGBP, %5) 1284 YSCALEYUV2RGB1(%%REGBP, %5)
1286 WRITEBGR32(%%REGb, 8280(%5), %%REGBP) 1285 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1287 "pop %%"REG_BP" \n\t" 1286 "pop %%"REG_BP" \n\t"
1288 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" 1287 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1289 1288
1290 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), 1289 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1291 "a" (&c->redDither) 1290 "a" (&c->redDither)
1370 __asm__ volatile( 1369 __asm__ volatile(
1371 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" 1370 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1372 "mov %4, %%"REG_b" \n\t" 1371 "mov %4, %%"REG_b" \n\t"
1373 "push %%"REG_BP" \n\t" 1372 "push %%"REG_BP" \n\t"
1374 YSCALEYUV2RGB1b(%%REGBP, %5) 1373 YSCALEYUV2RGB1b(%%REGBP, %5)
1375 WRITEBGR32(%%REGb, 8280(%5), %%REGBP) 1374 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1376 "pop %%"REG_BP" \n\t" 1375 "pop %%"REG_BP" \n\t"
1377 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" 1376 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1378 1377
1379 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), 1378 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1380 "a" (&c->redDither) 1379 "a" (&c->redDither)