Mercurial > mplayer.hg
annotate libvo/osd.c @ 2833:1b6c207c0410
Enable MMX stuff
I don't know why Michael Niedermayer disabled it but:
my benchmarks of 25 frames for TIMER+SUBTITLE (measured by RDTSC):
Non-MMX stuff:
total=306142159
MMX stuff:
total=159534150
author | nick |
---|---|
date | Sun, 11 Nov 2001 14:42:10 +0000 |
parents | 004ee19ebfcf |
children | 86fdf7897315 |
rev | line source |
---|---|
326 | 1 // Generic alpha renderers for all YUV modes and RGB depths. |
2 // These are "reference implementations", should be optimized later (MMX, etc) | |
3 | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
4 //#define FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
5 //#define FAST_OSD_TABLE |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
6 |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
7 #include "config.h" |
622 | 8 #include "osd.h" |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
9 #include "../mmx_defs.h" |
2833 | 10 //#define ENABLE_PROFILE |
11 #include "../my_profile.h" | |
622 | 12 |
326 | 13 void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
14 int y; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
15 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
16 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
17 #endif |
326 | 18 for(y=0;y<h;y++){ |
19 register int x; | |
20 for(x=0;x<w;x++){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
21 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
22 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
23 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
24 #else |
326 | 25 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x]; |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
26 #endif |
326 | 27 } |
28 src+=srcstride; | |
29 srca+=srcstride; | |
30 dstbase+=dststride; | |
31 } | |
32 return; | |
33 } | |
34 | |
35 void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ | |
36 int y; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
37 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
38 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
39 #endif |
326 | 40 for(y=0;y<h;y++){ |
41 register int x; | |
42 for(x=0;x<w;x++){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
43 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
44 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
45 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
46 #else |
326 | 47 if(srca[x]) dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x]; |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
48 #endif |
326 | 49 } |
50 src+=srcstride; | |
51 srca+=srcstride; | |
52 dstbase+=dststride; | |
53 } | |
54 return; | |
55 } | |
56 | |
57 void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ | |
58 int y; | |
59 for(y=0;y<h;y++){ | |
60 register unsigned char *dst = dstbase; | |
61 register int x; | |
62 for(x=0;x<w;x++){ | |
63 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
64 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
65 dst[0]=dst[1]=dst[2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
66 #else |
326 | 67 dst[0]=((dst[0]*srca[x])>>8)+src[x]; |
68 dst[1]=((dst[1]*srca[x])>>8)+src[x]; | |
69 dst[2]=((dst[2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
70 #endif |
326 | 71 } |
72 dst+=3; // 24bpp | |
73 } | |
74 src+=srcstride; | |
75 srca+=srcstride; | |
76 dstbase+=dststride; | |
77 } | |
78 return; | |
79 } | |
80 | |
81 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ | |
82 int y; | |
2833 | 83 PROFILE_START(); |
326 | 84 for(y=0;y<h;y++){ |
85 register int x; | |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
86 #ifdef ARCH_X86 |
2833 | 87 #ifdef HAVE_MMX |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
88 asm volatile( |
2578 | 89 "pxor %%mm7, %%mm7 \n\t" |
90 "xorl %%eax, %%eax \n\t" | |
91 "pcmpeqb %%mm6, %%mm6 \n\t" // F..F | |
2800
7847d6b7ad3d
.balign or weĦll align by 64kb on some architectures
michael
parents:
2798
diff
changeset
|
92 ".balign 16\n\t" |
2578 | 93 "1: \n\t" |
94 "movq (%0, %%eax, 4), %%mm0 \n\t" // dstbase | |
95 "movq %%mm0, %%mm1 \n\t" | |
96 "punpcklbw %%mm7, %%mm0 \n\t" | |
97 "punpckhbw %%mm7, %%mm1 \n\t" | |
98 "movd (%1, %%eax), %%mm2 \n\t" // srca ABCD0000 | |
99 "paddb %%mm6, %%mm2 \n\t" | |
100 "punpcklbw %%mm2, %%mm2 \n\t" // srca AABBCCDD | |
101 "punpcklbw %%mm2, %%mm2 \n\t" // srca AAAABBBB | |
102 "movq %%mm2, %%mm3 \n\t" | |
103 "punpcklbw %%mm7, %%mm2 \n\t" // srca 0A0A0A0A | |
104 "punpckhbw %%mm7, %%mm3 \n\t" // srca 0B0B0B0B | |
105 "pmullw %%mm2, %%mm0 \n\t" | |
106 "pmullw %%mm3, %%mm1 \n\t" | |
107 "psrlw $8, %%mm0 \n\t" | |
108 "psrlw $8, %%mm1 \n\t" | |
109 "packuswb %%mm1, %%mm0 \n\t" | |
110 "movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000 | |
111 "punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD | |
112 "punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB | |
113 "paddb %%mm2, %%mm0 \n\t" | |
2833 | 114 "movq %%mm0, (%0, %%eax, 4) \n\t" |
2578 | 115 "addl $2, %%eax \n\t" |
116 "cmpl %3, %%eax \n\t" | |
117 " jb 1b \n\t" | |
118 | |
119 :: "r" (dstbase), "r" (srca), "r" (src), "r" (w) | |
120 : "%eax" | |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
121 ); |
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
122 #else /* 0 HAVE_MMX2*/ |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
123 for(x=0;x<w;x++){ |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
124 if(srca[x]){ |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
125 asm volatile( |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
126 "movzbl (%0), %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
127 "movzbl 1(%0), %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
128 "movzbl 2(%0), %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
129 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
130 "imull %1, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
131 "imull %1, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
132 "imull %1, %%edx\n\t" |
2578 | 133 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
134 "addl %2, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
135 "addl %2, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
136 "addl %2, %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
137 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
138 "movb %%ch, (%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
139 "movb %%ah, 1(%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
140 "movb %%dh, 2(%0)\n\t" |
2578 | 141 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
142 : |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
143 :"r" (&dstbase[4*x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
144 "r" ((unsigned)srca[x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
145 "r" (((unsigned)src[x])<<8) |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
146 :"%eax", "%ecx", "%edx" |
2578 | 147 ); |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
148 } |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
149 } |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
150 #endif /* 0 HAVE_MMX*/ |
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
151 #else /*non x86 arch*/ |
326 | 152 for(x=0;x<w;x++){ |
153 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
154 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
155 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
156 #else |
326 | 157 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x]; |
158 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; | |
159 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
160 #endif |
326 | 161 } |
162 } | |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
163 #endif /* arch_x86 */ |
326 | 164 src+=srcstride; |
165 srca+=srcstride; | |
166 dstbase+=dststride; | |
167 } | |
2833 | 168 #ifdef HAVE_MMX |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
169 asm volatile(EMMS:::"memory"); |
2578 | 170 #endif |
2833 | 171 PROFILE_END("vo_draw_alpha_rgb32"); |
326 | 172 return; |
173 } | |
174 | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
175 #ifdef FAST_OSD_TABLE |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
176 static unsigned short fast_osd_15bpp_table[256]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
177 static unsigned short fast_osd_16bpp_table[256]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
178 #endif |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
179 |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
180 void vo_draw_alpha_init(){ |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
181 #ifdef FAST_OSD_TABLE |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
182 int i; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
183 for(i=0;i<256;i++){ |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
184 fast_osd_15bpp_table[i]=((i>>3)<<10)|((i>>3)<<5)|(i>>3); |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
185 fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3); |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
186 } |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
187 #endif |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
188 } |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
189 |
326 | 190 void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
191 int y; | |
192 for(y=0;y<h;y++){ | |
193 register unsigned short *dst = (unsigned short*) dstbase; | |
194 register int x; | |
195 for(x=0;x<w;x++){ | |
196 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
197 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
198 #ifdef FAST_OSD_TABLE |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
199 dst[x]=fast_osd_15bpp_table[src[x]]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
200 #else |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
201 register unsigned int a=src[x]>>3; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
202 dst[x]=(a<<10)|(a<<5)|a; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
203 #endif |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
204 #else |
326 | 205 unsigned char r=dst[x]&0x1F; |
206 unsigned char g=(dst[x]>>5)&0x1F; | |
207 unsigned char b=(dst[x]>>10)&0x1F; | |
208 r=(((r*srca[x])>>5)+src[x])>>3; | |
209 g=(((g*srca[x])>>5)+src[x])>>3; | |
210 b=(((b*srca[x])>>5)+src[x])>>3; | |
211 dst[x]=(b<<10)|(g<<5)|r; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
212 #endif |
326 | 213 } |
214 } | |
215 src+=srcstride; | |
216 srca+=srcstride; | |
217 dstbase+=dststride; | |
218 } | |
219 return; | |
220 } | |
221 | |
222 void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ | |
223 int y; | |
224 for(y=0;y<h;y++){ | |
225 register unsigned short *dst = (unsigned short*) dstbase; | |
226 register int x; | |
227 for(x=0;x<w;x++){ | |
228 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
229 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
230 #ifdef FAST_OSD_TABLE |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
231 dst[x]=fast_osd_16bpp_table[src[x]]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
232 #else |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
233 dst[x]=((src[x]>>3)<<11)|((src[x]>>2)<<5)|(src[x]>>3); |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
234 #endif |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
235 #else |
326 | 236 unsigned char r=dst[x]&0x1F; |
237 unsigned char g=(dst[x]>>5)&0x3F; | |
238 unsigned char b=(dst[x]>>11)&0x1F; | |
239 r=(((r*srca[x])>>5)+src[x])>>3; | |
240 g=(((g*srca[x])>>6)+src[x])>>2; | |
241 b=(((b*srca[x])>>5)+src[x])>>3; | |
242 dst[x]=(b<<11)|(g<<5)|r; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
243 #endif |
326 | 244 } |
245 } | |
246 src+=srcstride; | |
247 srca+=srcstride; | |
248 dstbase+=dststride; | |
249 } | |
250 return; | |
251 } | |
252 |