Mercurial > mplayer.hg
annotate libvo/osd_template.c @ 11623:ecaf7047b6e8
Patch from the author, Zoltan Hidvegi:
The filmdint filter does not handle NTSC "telecined" 15fps movies
where there is a frame break in the middle of every second NTSC frame,
it outputs only 15 frames for every 30 input frames, ignoring the io
option. You can notice this during encoding such a sequence you will
have lots of diplicate frames / skip frames messages. The patch below
fixes this.
author | rfelker |
---|---|
date | Thu, 11 Dec 2003 04:47:42 +0000 |
parents | 14c8c762c2b7 |
children | 6f7b5123ac56 |
rev | line source |
---|---|
326 | 1 // Generic alpha renderers for all YUV modes and RGB depths. |
2846 | 2 // Optimized by Nick and Michael |
3142 | 3 // Code from Michael Niedermayer (michaelni@gmx.at) is under GPL |
326 | 4 |
3142 | 5 #undef PREFETCH |
6 #undef EMMS | |
7 #undef PREFETCHW | |
8 #undef PAVGB | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
9 |
3142 | 10 #ifdef HAVE_3DNOW |
11 #define PREFETCH "prefetch" | |
12 #define PREFETCHW "prefetchw" | |
13 #define PAVGB "pavgusb" | |
14 #elif defined ( HAVE_MMX2 ) | |
15 #define PREFETCH "prefetchnta" | |
16 #define PREFETCHW "prefetcht0" | |
17 #define PAVGB "pavgb" | |
18 #else | |
19 #define PREFETCH "/nop" | |
20 #define PREFETCHW "/nop" | |
2846 | 21 #endif |
622 | 22 |
3142 | 23 #ifdef HAVE_3DNOW |
24 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |
25 #define EMMS "femms" | |
26 #else | |
27 #define EMMS "emms" | |
28 #endif | |
29 | |
30 static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ | |
326 | 31 int y; |
2846 | 32 #if defined(FAST_OSD) && !defined(HAVE_MMX) |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
33 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
34 #endif |
326 | 35 for(y=0;y<h;y++){ |
36 register int x; | |
2846 | 37 #ifdef HAVE_MMX |
38 asm volatile( | |
39 PREFETCHW" %0\n\t" | |
40 PREFETCH" %1\n\t" | |
41 PREFETCH" %2\n\t" | |
42 // "pxor %%mm7, %%mm7\n\t" | |
43 "pcmpeqb %%mm5, %%mm5\n\t" // F..F | |
44 "movq %%mm5, %%mm4\n\t" | |
45 "psllw $8, %%mm5\n\t" //FF00FF00FF00 | |
46 "psrlw $8, %%mm4\n\t" //00FF00FF00FF | |
47 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); | |
48 for(x=0;x<w;x+=8){ | |
49 asm volatile( | |
50 "movl %1, %%eax\n\t" | |
51 "orl 4%1, %%eax\n\t" | |
52 " jz 1f\n\t" | |
53 PREFETCHW" 32%0\n\t" | |
54 PREFETCH" 32%1\n\t" | |
55 PREFETCH" 32%2\n\t" | |
56 "movq %0, %%mm0\n\t" // dstbase | |
57 "movq %%mm0, %%mm1\n\t" | |
58 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y | |
59 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y | |
60 "movq %1, %%mm2\n\t" //srca HGFEDCBA | |
4245 | 61 "paddb "MANGLE(bFF)", %%mm2\n\t" |
2846 | 62 "movq %%mm2, %%mm3\n\t" |
63 "pand %%mm4, %%mm2\n\t" //0G0E0C0A | |
64 "psrlw $8, %%mm3\n\t" //0H0F0D0B | |
65 "pmullw %%mm2, %%mm0\n\t" | |
66 "pmullw %%mm3, %%mm1\n\t" | |
67 "psrlw $8, %%mm0\n\t" | |
68 "pand %%mm5, %%mm1\n\t" | |
69 "por %%mm1, %%mm0\n\t" | |
70 "paddb %2, %%mm0\n\t" | |
71 "movq %%mm0, %0\n\t" | |
72 "1:\n\t" | |
73 :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x]) | |
74 : "%eax"); | |
75 } | |
76 #else | |
326 | 77 for(x=0;x<w;x++){ |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
78 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
79 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
80 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
81 #else |
326 | 82 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x]; |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
83 #endif |
326 | 84 } |
2846 | 85 #endif |
326 | 86 src+=srcstride; |
87 srca+=srcstride; | |
88 dstbase+=dststride; | |
89 } | |
2846 | 90 #ifdef HAVE_MMX |
91 asm volatile(EMMS:::"memory"); | |
92 #endif | |
326 | 93 return; |
94 } | |
95 | |
3142 | 96 static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 97 int y; |
2846 | 98 #if defined(FAST_OSD) && !defined(HAVE_MMX) |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
99 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
100 #endif |
326 | 101 for(y=0;y<h;y++){ |
102 register int x; | |
2846 | 103 #ifdef HAVE_MMX |
104 asm volatile( | |
105 PREFETCHW" %0\n\t" | |
106 PREFETCH" %1\n\t" | |
107 PREFETCH" %2\n\t" | |
108 "pxor %%mm7, %%mm7\n\t" | |
109 "pcmpeqb %%mm5, %%mm5\n\t" // F..F | |
110 "movq %%mm5, %%mm4\n\t" | |
111 "psllw $8, %%mm5\n\t" //FF00FF00FF00 | |
112 "psrlw $8, %%mm4\n\t" //00FF00FF00FF | |
113 ::"m"(*dstbase),"m"(*srca),"m"(*src)); | |
114 for(x=0;x<w;x+=4){ | |
115 asm volatile( | |
116 "movl %1, %%eax\n\t" | |
117 "orl %%eax, %%eax\n\t" | |
118 " jz 1f\n\t" | |
119 PREFETCHW" 32%0\n\t" | |
120 PREFETCH" 32%1\n\t" | |
121 PREFETCH" 32%2\n\t" | |
122 "movq %0, %%mm0\n\t" // dstbase | |
123 "movq %%mm0, %%mm1\n\t" | |
124 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y | |
125 "movd %%eax, %%mm2\n\t" //srca 0000DCBA | |
4245 | 126 "paddb "MANGLE(bFF)", %%mm2\n\t" |
2846 | 127 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A |
128 "pmullw %%mm2, %%mm0\n\t" | |
129 "psrlw $8, %%mm0\n\t" | |
130 "pand %%mm5, %%mm1\n\t" //U0V0U0V0 | |
131 "movd %2, %%mm2\n\t" //src 0000DCBA | |
132 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A | |
133 "por %%mm1, %%mm0\n\t" | |
134 "paddb %%mm2, %%mm0\n\t" | |
135 "movq %%mm0, %0\n\t" | |
136 "1:\n\t" | |
137 :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x]) | |
138 : "%eax"); | |
139 } | |
140 #else | |
326 | 141 for(x=0;x<w;x++){ |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
142 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
143 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
144 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
145 #else |
3431
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
146 if(srca[x]) { |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
147 dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x]; |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
148 dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128; |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
149 } |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
150 #endif |
326 | 151 } |
2846 | 152 #endif |
153 src+=srcstride; | |
326 | 154 srca+=srcstride; |
155 dstbase+=dststride; | |
156 } | |
2846 | 157 #ifdef HAVE_MMX |
158 asm volatile(EMMS:::"memory"); | |
159 #endif | |
326 | 160 return; |
161 } | |
162 | |
3142 | 163 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 164 int y; |
165 for(y=0;y<h;y++){ | |
166 register unsigned char *dst = dstbase; | |
167 register int x; | |
2839 | 168 #ifdef ARCH_X86 |
169 #ifdef HAVE_MMX | |
170 asm volatile( | |
171 PREFETCHW" %0\n\t" | |
172 PREFETCH" %1\n\t" | |
173 PREFETCH" %2\n\t" | |
174 "pxor %%mm7, %%mm7\n\t" | |
175 "pcmpeqb %%mm6, %%mm6\n\t" // F..F | |
176 ::"m"(*dst),"m"(*srca),"m"(*src):"memory"); | |
177 for(x=0;x<w;x+=2){ | |
2843 | 178 if(srca[x] || srca[x+1]) |
2839 | 179 asm volatile( |
180 PREFETCHW" 32%0\n\t" | |
181 PREFETCH" 32%1\n\t" | |
182 PREFETCH" 32%2\n\t" | |
183 "movq %0, %%mm0\n\t" // dstbase | |
184 "movq %%mm0, %%mm1\n\t" | |
185 "movq %%mm0, %%mm5\n\t" | |
186 "punpcklbw %%mm7, %%mm0\n\t" | |
187 "punpckhbw %%mm7, %%mm1\n\t" | |
188 "movd %1, %%mm2\n\t" // srca ABCD0000 | |
189 "paddb %%mm6, %%mm2\n\t" | |
190 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD | |
191 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB | |
192 "movq %%mm2, %%mm3\n\t" | |
193 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A | |
194 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B | |
195 "pmullw %%mm2, %%mm0\n\t" | |
196 "pmullw %%mm3, %%mm1\n\t" | |
197 "psrlw $8, %%mm0\n\t" | |
198 "psrlw $8, %%mm1\n\t" | |
199 "packuswb %%mm1, %%mm0\n\t" | |
200 "movd %2, %%mm2 \n\t" // src ABCD0000 | |
201 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD | |
202 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB | |
203 "paddb %%mm2, %%mm0\n\t" | |
204 "pand %4, %%mm5\n\t" | |
205 "pand %3, %%mm0\n\t" | |
206 "por %%mm0, %%mm5\n\t" | |
207 "movq %%mm5, %0\n\t" | |
208 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh)); | |
209 dst += 6; | |
210 } | |
211 #else /* HAVE_MMX */ | |
212 for(x=0;x<w;x++){ | |
213 if(srca[x]){ | |
214 asm volatile( | |
215 "movzbl (%0), %%ecx\n\t" | |
216 "movzbl 1(%0), %%eax\n\t" | |
217 | |
218 "imull %1, %%ecx\n\t" | |
219 "imull %1, %%eax\n\t" | |
220 | |
5139 | 221 "addl %2, %%ecx\n\t" |
2839 | 222 "addl %2, %%eax\n\t" |
223 | |
224 "movb %%ch, (%0)\n\t" | |
225 "movb %%ah, 1(%0)\n\t" | |
5139 | 226 |
227 "movzbl 2(%0), %%eax\n\t" | |
228 "imull %1, %%eax\n\t" | |
229 "addl %2, %%eax\n\t" | |
230 "movb %%ah, 2(%0)\n\t" | |
2839 | 231 : |
232 :"r" (dst), | |
233 "r" ((unsigned)srca[x]), | |
234 "r" (((unsigned)src[x])<<8) | |
5139 | 235 :"%eax", "%ecx" |
2839 | 236 ); |
237 } | |
238 dst += 3; | |
239 } | |
5139 | 240 #endif /* !HAVE_MMX */ |
2839 | 241 #else /*non x86 arch*/ |
326 | 242 for(x=0;x<w;x++){ |
243 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
244 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
245 dst[0]=dst[1]=dst[2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
246 #else |
326 | 247 dst[0]=((dst[0]*srca[x])>>8)+src[x]; |
248 dst[1]=((dst[1]*srca[x])>>8)+src[x]; | |
249 dst[2]=((dst[2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
250 #endif |
326 | 251 } |
252 dst+=3; // 24bpp | |
253 } | |
2839 | 254 #endif /* arch_x86 */ |
326 | 255 src+=srcstride; |
256 srca+=srcstride; | |
257 dstbase+=dststride; | |
258 } | |
2839 | 259 #ifdef HAVE_MMX |
260 asm volatile(EMMS:::"memory"); | |
261 #endif | |
326 | 262 return; |
263 } | |
264 | |
3142 | 265 static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 266 int y; |
9960 | 267 #ifdef WORDS_BIGENDIAN |
268 dstbase++; | |
269 #endif | |
326 | 270 for(y=0;y<h;y++){ |
271 register int x; | |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
272 #ifdef ARCH_X86 |
2833 | 273 #ifdef HAVE_MMX |
2846 | 274 #ifdef HAVE_3DNOW |
2835 | 275 asm volatile( |
276 PREFETCHW" %0\n\t" | |
277 PREFETCH" %1\n\t" | |
278 PREFETCH" %2\n\t" | |
279 "pxor %%mm7, %%mm7\n\t" | |
280 "pcmpeqb %%mm6, %%mm6\n\t" // F..F | |
2839 | 281 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); |
2835 | 282 for(x=0;x<w;x+=2){ |
2843 | 283 if(srca[x] || srca[x+1]) |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
284 asm volatile( |
2835 | 285 PREFETCHW" 32%0\n\t" |
286 PREFETCH" 32%1\n\t" | |
287 PREFETCH" 32%2\n\t" | |
288 "movq %0, %%mm0\n\t" // dstbase | |
289 "movq %%mm0, %%mm1\n\t" | |
290 "punpcklbw %%mm7, %%mm0\n\t" | |
291 "punpckhbw %%mm7, %%mm1\n\t" | |
292 "movd %1, %%mm2\n\t" // srca ABCD0000 | |
293 "paddb %%mm6, %%mm2\n\t" | |
294 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD | |
295 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB | |
296 "movq %%mm2, %%mm3\n\t" | |
297 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A | |
298 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B | |
299 "pmullw %%mm2, %%mm0\n\t" | |
300 "pmullw %%mm3, %%mm1\n\t" | |
301 "psrlw $8, %%mm0\n\t" | |
302 "psrlw $8, %%mm1\n\t" | |
303 "packuswb %%mm1, %%mm0\n\t" | |
304 "movd %2, %%mm2 \n\t" // src ABCD0000 | |
305 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD | |
306 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB | |
307 "paddb %%mm2, %%mm0\n\t" | |
308 "movq %%mm0, %0\n\t" | |
309 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])); | |
310 } | |
2846 | 311 #else //this is faster for intels crap |
312 asm volatile( | |
313 PREFETCHW" %0\n\t" | |
314 PREFETCH" %1\n\t" | |
315 PREFETCH" %2\n\t" | |
316 "pxor %%mm7, %%mm7\n\t" | |
317 "pcmpeqb %%mm5, %%mm5\n\t" // F..F | |
318 "movq %%mm5, %%mm4\n\t" | |
319 "psllw $8, %%mm5\n\t" //FF00FF00FF00 | |
320 "psrlw $8, %%mm4\n\t" //00FF00FF00FF | |
321 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); | |
322 for(x=0;x<w;x+=4){ | |
323 asm volatile( | |
324 "movl %1, %%eax\n\t" | |
325 "orl %%eax, %%eax\n\t" | |
326 " jz 1f\n\t" | |
327 PREFETCHW" 32%0\n\t" | |
328 PREFETCH" 32%1\n\t" | |
329 PREFETCH" 32%2\n\t" | |
330 "movq %0, %%mm0\n\t" // dstbase | |
331 "movq %%mm0, %%mm1\n\t" | |
332 "pand %%mm4, %%mm0\n\t" //0R0B0R0B | |
333 "psrlw $8, %%mm1\n\t" //0?0G0?0G | |
334 "movd %%eax, %%mm2\n\t" //srca 0000DCBA | |
4245 | 335 "paddb "MANGLE(bFF)", %%mm2\n\t" |
2846 | 336 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA |
337 "movq %%mm2, %%mm3\n\t" | |
338 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A | |
339 "pmullw %%mm2, %%mm0\n\t" | |
340 "pmullw %%mm2, %%mm1\n\t" | |
341 "psrlw $8, %%mm0\n\t" | |
342 "pand %%mm5, %%mm1\n\t" | |
343 "por %%mm1, %%mm0\n\t" | |
344 "movd %2, %%mm2 \n\t" //src 0000DCBA | |
345 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA | |
346 "movq %%mm2, %%mm6\n\t" | |
347 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA | |
348 "paddb %%mm2, %%mm0\n\t" | |
349 "movq %%mm0, %0\n\t" | |
350 | |
351 "movq 8%0, %%mm0\n\t" // dstbase | |
352 "movq %%mm0, %%mm1\n\t" | |
353 "pand %%mm4, %%mm0\n\t" //0R0B0R0B | |
354 "psrlw $8, %%mm1\n\t" //0?0G0?0G | |
355 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C | |
356 "pmullw %%mm3, %%mm0\n\t" | |
357 "pmullw %%mm3, %%mm1\n\t" | |
358 "psrlw $8, %%mm0\n\t" | |
359 "pand %%mm5, %%mm1\n\t" | |
360 "por %%mm1, %%mm0\n\t" | |
361 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC | |
362 "paddb %%mm6, %%mm0\n\t" | |
363 "movq %%mm0, 8%0\n\t" | |
364 "1:\n\t" | |
365 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]) | |
366 : "%eax"); | |
367 } | |
368 #endif | |
2839 | 369 #else /* HAVE_MMX */ |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
370 for(x=0;x<w;x++){ |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
371 if(srca[x]){ |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
372 asm volatile( |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
373 "movzbl (%0), %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
374 "movzbl 1(%0), %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
375 "movzbl 2(%0), %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
376 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
377 "imull %1, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
378 "imull %1, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
379 "imull %1, %%edx\n\t" |
2578 | 380 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
381 "addl %2, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
382 "addl %2, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
383 "addl %2, %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
384 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
385 "movb %%ch, (%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
386 "movb %%ah, 1(%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
387 "movb %%dh, 2(%0)\n\t" |
2578 | 388 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
389 : |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
390 :"r" (&dstbase[4*x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
391 "r" ((unsigned)srca[x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
392 "r" (((unsigned)src[x])<<8) |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
393 :"%eax", "%ecx", "%edx" |
2578 | 394 ); |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
395 } |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
396 } |
2839 | 397 #endif /* HAVE_MMX */ |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
398 #else /*non x86 arch*/ |
326 | 399 for(x=0;x<w;x++){ |
400 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
401 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
402 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
403 #else |
326 | 404 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x]; |
405 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; | |
406 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
407 #endif |
326 | 408 } |
409 } | |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
410 #endif /* arch_x86 */ |
326 | 411 src+=srcstride; |
412 srca+=srcstride; | |
413 dstbase+=dststride; | |
414 } | |
2833 | 415 #ifdef HAVE_MMX |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
416 asm volatile(EMMS:::"memory"); |
2578 | 417 #endif |
326 | 418 return; |
419 } |