comparison postproc/swscale.c @ 2748:01dbf100b4f8

better dithering
author michael
date Tue, 06 Nov 2001 18:42:22 +0000
parents c483fc9bf0c4
children 9ef09e232505
comparison
equal deleted inserted replaced
2747:d94e94466dde 2748:01dbf100b4f8
69 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL; 69 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
70 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL; 70 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
71 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL; 71 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
72 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL; 72 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
73 73
74 static uint64_t __attribute__((aligned(8))) b16Dither= 0x0004000400040004LL; 74 static uint64_t __attribute__((aligned(8))) b5Dither;
75 static uint64_t __attribute__((aligned(8))) b16Dither1=0x0004000400040004LL; 75 static uint64_t __attribute__((aligned(8))) g5Dither;
76 static uint64_t __attribute__((aligned(8))) b16Dither2=0x0602060206020602LL; 76 static uint64_t __attribute__((aligned(8))) g6Dither;
77 static uint64_t __attribute__((aligned(8))) g16Dither= 0x0002000200020002LL; 77 static uint64_t __attribute__((aligned(8))) r5Dither;
78 static uint64_t __attribute__((aligned(8))) g16Dither1=0x0002000200020002LL; 78
79 static uint64_t __attribute__((aligned(8))) g16Dither2=0x0301030103010301LL; 79 static uint64_t __attribute__((aligned(8))) dither4[2]={
80 0x0103010301030103LL,
81 0x0200020002000200LL,};
82
83 static uint64_t __attribute__((aligned(8))) dither8[2]={
84 0x0602060206020602LL,
85 0x0004000400040004LL,};
80 86
81 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL; 87 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;
82 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL; 88 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;
83 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL; 89 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;
84 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL; 90 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;
595 601
596 #ifdef HAVE_MMX 602 #ifdef HAVE_MMX
597 void in_asm_used_var_warning_killer() 603 void in_asm_used_var_warning_killer()
598 { 604 {
599 int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ 605 int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
600 bm00001111+bm00000111+bm11111000+b16Dither+b16Dither1+b16Dither2+g16Dither+g16Dither1+ 606 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+
601 g16Dither2+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+
602 M24A+M24B+M24C; 607 M24A+M24B+M24C;
603 if(i) i=0; 608 if(i) i=0;
604 } 609 }
605 #endif 610 #endif
606 611
721 { 726 {
722 asm volatile( 727 asm volatile(
723 728
724 FULL_YSCALEYUV2RGB 729 FULL_YSCALEYUV2RGB
725 #ifdef DITHER1XBPP 730 #ifdef DITHER1XBPP
726 "paddusb b16Dither, %%mm1 \n\t" 731 "paddusb g5Dither, %%mm1 \n\t"
727 "paddusb b16Dither, %%mm0 \n\t" 732 "paddusb r5Dither, %%mm0 \n\t"
728 "paddusb b16Dither, %%mm3 \n\t" 733 "paddusb b5Dither, %%mm3 \n\t"
729 #endif 734 #endif
730 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G 735 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
731 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B 736 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
732 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R 737 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
733 738
755 { 760 {
756 asm volatile( 761 asm volatile(
757 762
758 FULL_YSCALEYUV2RGB 763 FULL_YSCALEYUV2RGB
759 #ifdef DITHER1XBPP 764 #ifdef DITHER1XBPP
760 "paddusb g16Dither, %%mm1 \n\t" 765 "paddusb g6Dither, %%mm1 \n\t"
761 "paddusb b16Dither, %%mm0 \n\t" 766 "paddusb r5Dither, %%mm0 \n\t"
762 "paddusb b16Dither, %%mm3 \n\t" 767 "paddusb b5Dither, %%mm3 \n\t"
763 #endif 768 #endif
764 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G 769 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
765 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B 770 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
766 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R 771 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
767 772
864 { 869 {
865 asm volatile( 870 asm volatile(
866 YSCALEYUV2RGB 871 YSCALEYUV2RGB
867 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ 872 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
868 #ifdef DITHER1XBPP 873 #ifdef DITHER1XBPP
869 "paddusb b16Dither, %%mm2 \n\t" 874 "paddusb b5Dither, %%mm2 \n\t"
870 "paddusb b16Dither, %%mm4 \n\t" 875 "paddusb g5Dither, %%mm4 \n\t"
871 "paddusb b16Dither, %%mm5 \n\t" 876 "paddusb r5Dither, %%mm5 \n\t"
872 #endif 877 #endif
873 878
874 WRITEBGR15 879 WRITEBGR15
875 880
876 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), 881 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
882 { 887 {
883 asm volatile( 888 asm volatile(
884 YSCALEYUV2RGB 889 YSCALEYUV2RGB
885 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ 890 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
886 #ifdef DITHER1XBPP 891 #ifdef DITHER1XBPP
887 "paddusb g16Dither, %%mm2 \n\t" 892 "paddusb b5Dither, %%mm2 \n\t"
888 "paddusb b16Dither, %%mm4 \n\t" 893 "paddusb g6Dither, %%mm4 \n\t"
889 "paddusb b16Dither, %%mm5 \n\t" 894 "paddusb r5Dither, %%mm5 \n\t"
890 #endif 895 #endif
891 896
892 WRITEBGR16 897 WRITEBGR16
893 898
894 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), 899 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
1046 { 1051 {
1047 asm volatile( 1052 asm volatile(
1048 YSCALEYUV2RGB1 1053 YSCALEYUV2RGB1
1049 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ 1054 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1050 #ifdef DITHER1XBPP 1055 #ifdef DITHER1XBPP
1051 "paddusb b16Dither, %%mm2 \n\t" 1056 "paddusb b5Dither, %%mm2 \n\t"
1052 "paddusb b16Dither, %%mm4 \n\t" 1057 "paddusb g5Dither, %%mm4 \n\t"
1053 "paddusb b16Dither, %%mm5 \n\t" 1058 "paddusb r5Dither, %%mm5 \n\t"
1054 #endif 1059 #endif
1055 WRITEBGR15 1060 WRITEBGR15
1056 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), 1061 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
1057 "m" (yalpha1), "m" (uvalpha1) 1062 "m" (yalpha1), "m" (uvalpha1)
1058 : "%eax" 1063 : "%eax"
1062 { 1067 {
1063 asm volatile( 1068 asm volatile(
1064 YSCALEYUV2RGB1 1069 YSCALEYUV2RGB1
1065 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ 1070 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1066 #ifdef DITHER1XBPP 1071 #ifdef DITHER1XBPP
1067 "paddusb g16Dither, %%mm2 \n\t" 1072 "paddusb b5Dither, %%mm2 \n\t"
1068 "paddusb b16Dither, %%mm4 \n\t" 1073 "paddusb g6Dither, %%mm4 \n\t"
1069 "paddusb b16Dither, %%mm5 \n\t" 1074 "paddusb r5Dither, %%mm5 \n\t"
1070 #endif 1075 #endif
1071 1076
1072 WRITEBGR16 1077 WRITEBGR16
1073 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), 1078 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
1074 "m" (yalpha1), "m" (uvalpha1) 1079 "m" (yalpha1), "m" (uvalpha1)
1103 { 1108 {
1104 asm volatile( 1109 asm volatile(
1105 YSCALEYUV2RGB1b 1110 YSCALEYUV2RGB1b
1106 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ 1111 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1107 #ifdef DITHER1XBPP 1112 #ifdef DITHER1XBPP
1108 "paddusb b16Dither, %%mm2 \n\t" 1113 "paddusb b5Dither, %%mm2 \n\t"
1109 "paddusb b16Dither, %%mm4 \n\t" 1114 "paddusb g5Dither, %%mm4 \n\t"
1110 "paddusb b16Dither, %%mm5 \n\t" 1115 "paddusb r5Dither, %%mm5 \n\t"
1111 #endif 1116 #endif
1112 WRITEBGR15 1117 WRITEBGR15
1113 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), 1118 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
1114 "m" (yalpha1), "m" (uvalpha1) 1119 "m" (yalpha1), "m" (uvalpha1)
1115 : "%eax" 1120 : "%eax"
1119 { 1124 {
1120 asm volatile( 1125 asm volatile(
1121 YSCALEYUV2RGB1b 1126 YSCALEYUV2RGB1b
1122 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ 1127 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1123 #ifdef DITHER1XBPP 1128 #ifdef DITHER1XBPP
1124 "paddusb g16Dither, %%mm2 \n\t" 1129 "paddusb b5Dither, %%mm2 \n\t"
1125 "paddusb b16Dither, %%mm4 \n\t" 1130 "paddusb g6Dither, %%mm4 \n\t"
1126 "paddusb b16Dither, %%mm5 \n\t" 1131 "paddusb r5Dither, %%mm5 \n\t"
1127 #endif 1132 #endif
1128 1133
1129 WRITEBGR16 1134 WRITEBGR16
1130 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), 1135 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
1131 "m" (yalpha1), "m" (uvalpha1) 1136 "m" (yalpha1), "m" (uvalpha1)
1766 hcscale(uvbuf1, dstUVw, src1, src2, srcWidth, s_xinc2); 1771 hcscale(uvbuf1, dstUVw, src1, src2, srcWidth, s_xinc2);
1767 1772
1768 // the min() is required to avoid reuseing lines which where not available 1773 // the min() is required to avoid reuseing lines which where not available
1769 s_last_y1pos= MIN(y1, y/2+h/2-1); 1774 s_last_y1pos= MIN(y1, y/2+h/2-1);
1770 } 1775 }
1776 #ifdef HAVE_MMX
1777 b5Dither= dither8[s_ypos&1];
1778 g6Dither= dither4[s_ypos&1];
1779 g5Dither= dither8[s_ypos&1];
1780 r5Dither= dither8[(s_ypos+1)&1];
1781 #endif
1771 1782
1772 if(dstbpp==12) //YV12 1783 if(dstbpp==12) //YV12
1773 yuv2yuv(buf0, buf1, uvbuf0, uvbuf1, dest, uDest, vDest, dstw, yalpha, uvalpha); 1784 yuv2yuv(buf0, buf1, uvbuf0, uvbuf1, dest, uDest, vDest, dstw, yalpha, uvalpha);
1774 else if(ABS(s_yinc - 0x10000) < 10) 1785 else if(ABS(s_yinc - 0x10000) < 10)
1775 yuv2rgb1(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp); 1786 yuv2rgb1(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp);
1776 else 1787 else
1777 yuv2rgbX(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp); 1788 yuv2rgbX(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp);
1778
1779 #ifdef HAVE_MMX
1780 b16Dither= b16Dither1;
1781 b16Dither1= b16Dither2;
1782 b16Dither2= b16Dither;
1783
1784 g16Dither= g16Dither1;
1785 g16Dither1= g16Dither2;
1786 g16Dither2= g16Dither;
1787 #endif
1788 } 1789 }
1789 1790
1790 #ifdef HAVE_MMX 1791 #ifdef HAVE_MMX
1791 __asm __volatile(SFENCE:::"memory"); 1792 __asm __volatile(SFENCE:::"memory");
1792 __asm __volatile(EMMS:::"memory"); 1793 __asm __volatile(EMMS:::"memory");