Mercurial > mplayer.hg
comparison libswscale/swscale_template.c @ 28650:585b5f224fcd
make MMX registers parametrized in the WRITEBGR32 macro,
patch by C¸«±dric Schieli (cschieli at gmail youknowwhat)
author | stefang |
---|---|
date | Sat, 21 Feb 2009 10:49:52 +0000 |
parents | 6cdce79a0b9a |
children | 7241319c2d93 |
comparison
equal
deleted
inserted
replaced
28649:7ec20a2f2067 | 28650:585b5f224fcd |
---|---|
624 "packuswb %%mm6, %%mm5 \n\t"\ | 624 "packuswb %%mm6, %%mm5 \n\t"\ |
625 "packuswb %%mm3, %%mm4 \n\t"\ | 625 "packuswb %%mm3, %%mm4 \n\t"\ |
626 "pxor %%mm7, %%mm7 \n\t" | 626 "pxor %%mm7, %%mm7 \n\t" |
627 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c) | 627 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c) |
628 | 628 |
629 #define REAL_WRITEBGR32(dst, dstw, index) \ | 629 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \ |
630 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | 630 "movq "#b", "#q2" \n\t" /* B */\ |
631 "movq %%mm2, %%mm1 \n\t" /* B */\ | 631 "movq "#r", "#t" \n\t" /* R */\ |
632 "movq %%mm5, %%mm6 \n\t" /* R */\ | 632 "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\ |
633 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ | 633 "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\ |
634 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ | 634 "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\ |
635 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ | 635 "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\ |
636 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ | 636 "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\ |
637 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ | 637 "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\ |
638 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ | 638 "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\ |
639 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ | 639 "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\ |
640 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | 640 "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\ |
641 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | 641 "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\ |
642 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | 642 \ |
643 \ | 643 MOVNTQ( q0, (dst, index, 4))\ |
644 MOVNTQ(%%mm0, (dst, index, 4))\ | 644 MOVNTQ( b, 8(dst, index, 4))\ |
645 MOVNTQ(%%mm2, 8(dst, index, 4))\ | 645 MOVNTQ( q2, 16(dst, index, 4))\ |
646 MOVNTQ(%%mm1, 16(dst, index, 4))\ | 646 MOVNTQ( q3, 24(dst, index, 4))\ |
647 MOVNTQ(%%mm3, 24(dst, index, 4))\ | |
648 \ | 647 \ |
649 "add $8, "#index" \n\t"\ | 648 "add $8, "#index" \n\t"\ |
650 "cmp "#dstw", "#index" \n\t"\ | 649 "cmp "#dstw", "#index" \n\t"\ |
651 " jb 1b \n\t" | 650 " jb 1b \n\t" |
652 #define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index) | 651 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) |
653 | 652 |
654 #define REAL_WRITERGB16(dst, dstw, index) \ | 653 #define REAL_WRITERGB16(dst, dstw, index) \ |
655 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ | 654 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
656 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ | 655 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ |
657 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ | 656 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ |
1012 if (c->flags & SWS_ACCURATE_RND){ | 1011 if (c->flags & SWS_ACCURATE_RND){ |
1013 switch(c->dstFormat){ | 1012 switch(c->dstFormat){ |
1014 case PIX_FMT_RGB32: | 1013 case PIX_FMT_RGB32: |
1015 YSCALEYUV2PACKEDX_ACCURATE | 1014 YSCALEYUV2PACKEDX_ACCURATE |
1016 YSCALEYUV2RGBX | 1015 YSCALEYUV2RGBX |
1017 WRITEBGR32(%4, %5, %%REGa) | 1016 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
1018 | 1017 |
1019 YSCALEYUV2PACKEDX_END | 1018 YSCALEYUV2PACKEDX_END |
1020 return; | 1019 return; |
1021 case PIX_FMT_BGR24: | 1020 case PIX_FMT_BGR24: |
1022 YSCALEYUV2PACKEDX_ACCURATE | 1021 YSCALEYUV2PACKEDX_ACCURATE |
1074 switch(c->dstFormat) | 1073 switch(c->dstFormat) |
1075 { | 1074 { |
1076 case PIX_FMT_RGB32: | 1075 case PIX_FMT_RGB32: |
1077 YSCALEYUV2PACKEDX | 1076 YSCALEYUV2PACKEDX |
1078 YSCALEYUV2RGBX | 1077 YSCALEYUV2RGBX |
1079 WRITEBGR32(%4, %5, %%REGa) | 1078 WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
1080 YSCALEYUV2PACKEDX_END | 1079 YSCALEYUV2PACKEDX_END |
1081 return; | 1080 return; |
1082 case PIX_FMT_BGR24: | 1081 case PIX_FMT_BGR24: |
1083 YSCALEYUV2PACKEDX | 1082 YSCALEYUV2PACKEDX |
1084 YSCALEYUV2RGBX | 1083 YSCALEYUV2RGBX |
1169 __asm__ volatile( | 1168 __asm__ volatile( |
1170 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" | 1169 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
1171 "mov %4, %%"REG_b" \n\t" | 1170 "mov %4, %%"REG_b" \n\t" |
1172 "push %%"REG_BP" \n\t" | 1171 "push %%"REG_BP" \n\t" |
1173 YSCALEYUV2RGB(%%REGBP, %5) | 1172 YSCALEYUV2RGB(%%REGBP, %5) |
1174 WRITEBGR32(%%REGb, 8280(%5), %%REGBP) | 1173 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
1175 "pop %%"REG_BP" \n\t" | 1174 "pop %%"REG_BP" \n\t" |
1176 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" | 1175 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
1177 | 1176 |
1178 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), | 1177 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), |
1179 "a" (&c->redDither) | 1178 "a" (&c->redDither) |
1281 __asm__ volatile( | 1280 __asm__ volatile( |
1282 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" | 1281 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
1283 "mov %4, %%"REG_b" \n\t" | 1282 "mov %4, %%"REG_b" \n\t" |
1284 "push %%"REG_BP" \n\t" | 1283 "push %%"REG_BP" \n\t" |
1285 YSCALEYUV2RGB1(%%REGBP, %5) | 1284 YSCALEYUV2RGB1(%%REGBP, %5) |
1286 WRITEBGR32(%%REGb, 8280(%5), %%REGBP) | 1285 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
1287 "pop %%"REG_BP" \n\t" | 1286 "pop %%"REG_BP" \n\t" |
1288 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" | 1287 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
1289 | 1288 |
1290 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), | 1289 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), |
1291 "a" (&c->redDither) | 1290 "a" (&c->redDither) |
1370 __asm__ volatile( | 1369 __asm__ volatile( |
1371 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" | 1370 "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" |
1372 "mov %4, %%"REG_b" \n\t" | 1371 "mov %4, %%"REG_b" \n\t" |
1373 "push %%"REG_BP" \n\t" | 1372 "push %%"REG_BP" \n\t" |
1374 YSCALEYUV2RGB1b(%%REGBP, %5) | 1373 YSCALEYUV2RGB1b(%%REGBP, %5) |
1375 WRITEBGR32(%%REGb, 8280(%5), %%REGBP) | 1374 WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) |
1376 "pop %%"REG_BP" \n\t" | 1375 "pop %%"REG_BP" \n\t" |
1377 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" | 1376 "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" |
1378 | 1377 |
1379 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), | 1378 :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), |
1380 "a" (&c->redDither) | 1379 "a" (&c->redDither) |