Mercurial > mplayer.hg
comparison postproc/swscale.c @ 2748:01dbf100b4f8
better dithering
author | michael |
---|---|
date | Tue, 06 Nov 2001 18:42:22 +0000 |
parents | c483fc9bf0c4 |
children | 9ef09e232505 |
comparison
equal
deleted
inserted
replaced
2747:d94e94466dde | 2748:01dbf100b4f8 |
---|---|
69 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL; | 69 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL; |
70 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL; | 70 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL; |
71 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL; | 71 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL; |
72 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL; | 72 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL; |
73 | 73 |
74 static uint64_t __attribute__((aligned(8))) b16Dither= 0x0004000400040004LL; | 74 static uint64_t __attribute__((aligned(8))) b5Dither; |
75 static uint64_t __attribute__((aligned(8))) b16Dither1=0x0004000400040004LL; | 75 static uint64_t __attribute__((aligned(8))) g5Dither; |
76 static uint64_t __attribute__((aligned(8))) b16Dither2=0x0602060206020602LL; | 76 static uint64_t __attribute__((aligned(8))) g6Dither; |
77 static uint64_t __attribute__((aligned(8))) g16Dither= 0x0002000200020002LL; | 77 static uint64_t __attribute__((aligned(8))) r5Dither; |
78 static uint64_t __attribute__((aligned(8))) g16Dither1=0x0002000200020002LL; | 78 |
79 static uint64_t __attribute__((aligned(8))) g16Dither2=0x0301030103010301LL; | 79 static uint64_t __attribute__((aligned(8))) dither4[2]={ |
80 0x0103010301030103LL, | |
81 0x0200020002000200LL,}; | |
82 | |
83 static uint64_t __attribute__((aligned(8))) dither8[2]={ | |
84 0x0602060206020602LL, | |
85 0x0004000400040004LL,}; | |
80 | 86 |
81 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL; | 87 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL; |
82 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL; | 88 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL; |
83 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL; | 89 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL; |
84 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL; | 90 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL; |
595 | 601 |
596 #ifdef HAVE_MMX | 602 #ifdef HAVE_MMX |
597 void in_asm_used_var_warning_killer() | 603 void in_asm_used_var_warning_killer() |
598 { | 604 { |
599 int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ | 605 int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ |
600 bm00001111+bm00000111+bm11111000+b16Dither+b16Dither1+b16Dither2+g16Dither+g16Dither1+ | 606 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+ |
601 g16Dither2+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+ | |
602 M24A+M24B+M24C; | 607 M24A+M24B+M24C; |
603 if(i) i=0; | 608 if(i) i=0; |
604 } | 609 } |
605 #endif | 610 #endif |
606 | 611 |
721 { | 726 { |
722 asm volatile( | 727 asm volatile( |
723 | 728 |
724 FULL_YSCALEYUV2RGB | 729 FULL_YSCALEYUV2RGB |
725 #ifdef DITHER1XBPP | 730 #ifdef DITHER1XBPP |
726 "paddusb b16Dither, %%mm1 \n\t" | 731 "paddusb g5Dither, %%mm1 \n\t" |
727 "paddusb b16Dither, %%mm0 \n\t" | 732 "paddusb r5Dither, %%mm0 \n\t" |
728 "paddusb b16Dither, %%mm3 \n\t" | 733 "paddusb b5Dither, %%mm3 \n\t" |
729 #endif | 734 #endif |
730 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G | 735 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
731 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B | 736 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
732 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R | 737 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
733 | 738 |
755 { | 760 { |
756 asm volatile( | 761 asm volatile( |
757 | 762 |
758 FULL_YSCALEYUV2RGB | 763 FULL_YSCALEYUV2RGB |
759 #ifdef DITHER1XBPP | 764 #ifdef DITHER1XBPP |
760 "paddusb g16Dither, %%mm1 \n\t" | 765 "paddusb g6Dither, %%mm1 \n\t" |
761 "paddusb b16Dither, %%mm0 \n\t" | 766 "paddusb r5Dither, %%mm0 \n\t" |
762 "paddusb b16Dither, %%mm3 \n\t" | 767 "paddusb b5Dither, %%mm3 \n\t" |
763 #endif | 768 #endif |
764 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G | 769 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
765 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B | 770 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
766 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R | 771 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
767 | 772 |
864 { | 869 { |
865 asm volatile( | 870 asm volatile( |
866 YSCALEYUV2RGB | 871 YSCALEYUV2RGB |
867 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | 872 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
868 #ifdef DITHER1XBPP | 873 #ifdef DITHER1XBPP |
869 "paddusb b16Dither, %%mm2 \n\t" | 874 "paddusb b5Dither, %%mm2 \n\t" |
870 "paddusb b16Dither, %%mm4 \n\t" | 875 "paddusb g5Dither, %%mm4 \n\t" |
871 "paddusb b16Dither, %%mm5 \n\t" | 876 "paddusb r5Dither, %%mm5 \n\t" |
872 #endif | 877 #endif |
873 | 878 |
874 WRITEBGR15 | 879 WRITEBGR15 |
875 | 880 |
876 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), | 881 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), |
882 { | 887 { |
883 asm volatile( | 888 asm volatile( |
884 YSCALEYUV2RGB | 889 YSCALEYUV2RGB |
885 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | 890 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
886 #ifdef DITHER1XBPP | 891 #ifdef DITHER1XBPP |
887 "paddusb g16Dither, %%mm2 \n\t" | 892 "paddusb b5Dither, %%mm2 \n\t" |
888 "paddusb b16Dither, %%mm4 \n\t" | 893 "paddusb g6Dither, %%mm4 \n\t" |
889 "paddusb b16Dither, %%mm5 \n\t" | 894 "paddusb r5Dither, %%mm5 \n\t" |
890 #endif | 895 #endif |
891 | 896 |
892 WRITEBGR16 | 897 WRITEBGR16 |
893 | 898 |
894 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), | 899 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), |
1046 { | 1051 { |
1047 asm volatile( | 1052 asm volatile( |
1048 YSCALEYUV2RGB1 | 1053 YSCALEYUV2RGB1 |
1049 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | 1054 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
1050 #ifdef DITHER1XBPP | 1055 #ifdef DITHER1XBPP |
1051 "paddusb b16Dither, %%mm2 \n\t" | 1056 "paddusb b5Dither, %%mm2 \n\t" |
1052 "paddusb b16Dither, %%mm4 \n\t" | 1057 "paddusb g5Dither, %%mm4 \n\t" |
1053 "paddusb b16Dither, %%mm5 \n\t" | 1058 "paddusb r5Dither, %%mm5 \n\t" |
1054 #endif | 1059 #endif |
1055 WRITEBGR15 | 1060 WRITEBGR15 |
1056 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), | 1061 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), |
1057 "m" (yalpha1), "m" (uvalpha1) | 1062 "m" (yalpha1), "m" (uvalpha1) |
1058 : "%eax" | 1063 : "%eax" |
1062 { | 1067 { |
1063 asm volatile( | 1068 asm volatile( |
1064 YSCALEYUV2RGB1 | 1069 YSCALEYUV2RGB1 |
1065 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | 1070 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
1066 #ifdef DITHER1XBPP | 1071 #ifdef DITHER1XBPP |
1067 "paddusb g16Dither, %%mm2 \n\t" | 1072 "paddusb b5Dither, %%mm2 \n\t" |
1068 "paddusb b16Dither, %%mm4 \n\t" | 1073 "paddusb g6Dither, %%mm4 \n\t" |
1069 "paddusb b16Dither, %%mm5 \n\t" | 1074 "paddusb r5Dither, %%mm5 \n\t" |
1070 #endif | 1075 #endif |
1071 | 1076 |
1072 WRITEBGR16 | 1077 WRITEBGR16 |
1073 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), | 1078 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), |
1074 "m" (yalpha1), "m" (uvalpha1) | 1079 "m" (yalpha1), "m" (uvalpha1) |
1103 { | 1108 { |
1104 asm volatile( | 1109 asm volatile( |
1105 YSCALEYUV2RGB1b | 1110 YSCALEYUV2RGB1b |
1106 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | 1111 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
1107 #ifdef DITHER1XBPP | 1112 #ifdef DITHER1XBPP |
1108 "paddusb b16Dither, %%mm2 \n\t" | 1113 "paddusb b5Dither, %%mm2 \n\t" |
1109 "paddusb b16Dither, %%mm4 \n\t" | 1114 "paddusb g5Dither, %%mm4 \n\t" |
1110 "paddusb b16Dither, %%mm5 \n\t" | 1115 "paddusb r5Dither, %%mm5 \n\t" |
1111 #endif | 1116 #endif |
1112 WRITEBGR15 | 1117 WRITEBGR15 |
1113 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), | 1118 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), |
1114 "m" (yalpha1), "m" (uvalpha1) | 1119 "m" (yalpha1), "m" (uvalpha1) |
1115 : "%eax" | 1120 : "%eax" |
1119 { | 1124 { |
1120 asm volatile( | 1125 asm volatile( |
1121 YSCALEYUV2RGB1b | 1126 YSCALEYUV2RGB1b |
1122 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | 1127 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
1123 #ifdef DITHER1XBPP | 1128 #ifdef DITHER1XBPP |
1124 "paddusb g16Dither, %%mm2 \n\t" | 1129 "paddusb b5Dither, %%mm2 \n\t" |
1125 "paddusb b16Dither, %%mm4 \n\t" | 1130 "paddusb g6Dither, %%mm4 \n\t" |
1126 "paddusb b16Dither, %%mm5 \n\t" | 1131 "paddusb r5Dither, %%mm5 \n\t" |
1127 #endif | 1132 #endif |
1128 | 1133 |
1129 WRITEBGR16 | 1134 WRITEBGR16 |
1130 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), | 1135 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), |
1131 "m" (yalpha1), "m" (uvalpha1) | 1136 "m" (yalpha1), "m" (uvalpha1) |
1766 hcscale(uvbuf1, dstUVw, src1, src2, srcWidth, s_xinc2); | 1771 hcscale(uvbuf1, dstUVw, src1, src2, srcWidth, s_xinc2); |
1767 | 1772 |
1768 // the min() is required to avoid reuseing lines which where not available | 1773 // the min() is required to avoid reuseing lines which where not available |
1769 s_last_y1pos= MIN(y1, y/2+h/2-1); | 1774 s_last_y1pos= MIN(y1, y/2+h/2-1); |
1770 } | 1775 } |
1776 #ifdef HAVE_MMX | |
1777 b5Dither= dither8[s_ypos&1]; | |
1778 g6Dither= dither4[s_ypos&1]; | |
1779 g5Dither= dither8[s_ypos&1]; | |
1780 r5Dither= dither8[(s_ypos+1)&1]; | |
1781 #endif | |
1771 | 1782 |
1772 if(dstbpp==12) //YV12 | 1783 if(dstbpp==12) //YV12 |
1773 yuv2yuv(buf0, buf1, uvbuf0, uvbuf1, dest, uDest, vDest, dstw, yalpha, uvalpha); | 1784 yuv2yuv(buf0, buf1, uvbuf0, uvbuf1, dest, uDest, vDest, dstw, yalpha, uvalpha); |
1774 else if(ABS(s_yinc - 0x10000) < 10) | 1785 else if(ABS(s_yinc - 0x10000) < 10) |
1775 yuv2rgb1(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp); | 1786 yuv2rgb1(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp); |
1776 else | 1787 else |
1777 yuv2rgbX(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp); | 1788 yuv2rgbX(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp); |
1778 | |
1779 #ifdef HAVE_MMX | |
1780 b16Dither= b16Dither1; | |
1781 b16Dither1= b16Dither2; | |
1782 b16Dither2= b16Dither; | |
1783 | |
1784 g16Dither= g16Dither1; | |
1785 g16Dither1= g16Dither2; | |
1786 g16Dither2= g16Dither; | |
1787 #endif | |
1788 } | 1789 } |
1789 | 1790 |
1790 #ifdef HAVE_MMX | 1791 #ifdef HAVE_MMX |
1791 __asm __volatile(SFENCE:::"memory"); | 1792 __asm __volatile(SFENCE:::"memory"); |
1792 __asm __volatile(EMMS:::"memory"); | 1793 __asm __volatile(EMMS:::"memory"); |