Mercurial > mplayer.hg
annotate libvo/osd_template.c @ 24678:9aab9d7b51f0
Fix missing subtitles after seeking back
Subtitle packets that had been demuxed but whose start time had not
yet been reached were left in the demuxer stream after seeking.
When using the default (non-libass) subtitle rendering this could
block subtitles from appearing as long as the playback position stayed
below the original one before seek. External subtitle files were not
affected.
Fixed by making seek code free all packets from the subtitle stream.
author | uau |
---|---|
date | Thu, 04 Oct 2007 02:35:34 +0000 |
parents | 9d42ff736ea5 |
children | 7a1397677cb3 |
rev | line source |
---|---|
326 | 1 // Generic alpha renderers for all YUV modes and RGB depths. |
2846 | 2 // Optimized by Nick and Michael |
3142 | 3 // Code from Michael Niedermayer (michaelni@gmx.at) is under GPL |
326 | 4 |
3142 | 5 #undef PREFETCH |
6 #undef EMMS | |
7 #undef PREFETCHW | |
8 #undef PAVGB | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
9 |
3142 | 10 #ifdef HAVE_3DNOW |
11 #define PREFETCH "prefetch" | |
12 #define PREFETCHW "prefetchw" | |
13 #define PAVGB "pavgusb" | |
14 #elif defined ( HAVE_MMX2 ) | |
15 #define PREFETCH "prefetchnta" | |
16 #define PREFETCHW "prefetcht0" | |
17 #define PAVGB "pavgb" | |
18 #else | |
19 #define PREFETCH "/nop" | |
20 #define PREFETCHW "/nop" | |
2846 | 21 #endif |
622 | 22 |
3142 | 23 #ifdef HAVE_3DNOW |
24 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |
25 #define EMMS "femms" | |
26 #else | |
27 #define EMMS "emms" | |
28 #endif | |
29 | |
30 static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ | |
326 | 31 int y; |
2846 | 32 #if defined(FAST_OSD) && !defined(HAVE_MMX) |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
33 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
34 #endif |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
35 #ifdef HAVE_MMX |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
36 asm volatile( |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
37 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
38 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
39 "movq %%mm5, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
40 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
41 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
42 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
43 #endif |
326 | 44 for(y=0;y<h;y++){ |
45 register int x; | |
2846 | 46 #ifdef HAVE_MMX |
47 asm volatile( | |
48 PREFETCHW" %0\n\t" | |
49 PREFETCH" %1\n\t" | |
50 PREFETCH" %2\n\t" | |
51 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); | |
52 for(x=0;x<w;x+=8){ | |
53 asm volatile( | |
54 "movl %1, %%eax\n\t" | |
55 "orl 4%1, %%eax\n\t" | |
56 " jz 1f\n\t" | |
57 PREFETCHW" 32%0\n\t" | |
58 PREFETCH" 32%1\n\t" | |
59 PREFETCH" 32%2\n\t" | |
60 "movq %0, %%mm0\n\t" // dstbase | |
61 "movq %%mm0, %%mm1\n\t" | |
62 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y | |
63 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y | |
64 "movq %1, %%mm2\n\t" //srca HGFEDCBA | |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
65 "paddb %%mm7, %%mm2\n\t" |
2846 | 66 "movq %%mm2, %%mm3\n\t" |
67 "pand %%mm4, %%mm2\n\t" //0G0E0C0A | |
68 "psrlw $8, %%mm3\n\t" //0H0F0D0B | |
69 "pmullw %%mm2, %%mm0\n\t" | |
70 "pmullw %%mm3, %%mm1\n\t" | |
71 "psrlw $8, %%mm0\n\t" | |
72 "pand %%mm5, %%mm1\n\t" | |
73 "por %%mm1, %%mm0\n\t" | |
74 "paddb %2, %%mm0\n\t" | |
75 "movq %%mm0, %0\n\t" | |
76 "1:\n\t" | |
77 :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x]) | |
78 : "%eax"); | |
79 } | |
80 #else | |
326 | 81 for(x=0;x<w;x++){ |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
82 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
83 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
84 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
85 #else |
326 | 86 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x]; |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
87 #endif |
326 | 88 } |
2846 | 89 #endif |
326 | 90 src+=srcstride; |
91 srca+=srcstride; | |
92 dstbase+=dststride; | |
93 } | |
2846 | 94 #ifdef HAVE_MMX |
95 asm volatile(EMMS:::"memory"); | |
96 #endif | |
326 | 97 return; |
98 } | |
99 | |
3142 | 100 static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 101 int y; |
2846 | 102 #if defined(FAST_OSD) && !defined(HAVE_MMX) |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
103 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
104 #endif |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
105 #ifdef HAVE_MMX |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
106 asm volatile( |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
107 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
108 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
109 "movq %%mm5, %%mm6\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
110 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
111 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
112 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
113 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
114 #endif |
326 | 115 for(y=0;y<h;y++){ |
116 register int x; | |
2846 | 117 #ifdef HAVE_MMX |
118 asm volatile( | |
119 PREFETCHW" %0\n\t" | |
120 PREFETCH" %1\n\t" | |
121 PREFETCH" %2\n\t" | |
122 ::"m"(*dstbase),"m"(*srca),"m"(*src)); | |
123 for(x=0;x<w;x+=4){ | |
124 asm volatile( | |
125 "movl %1, %%eax\n\t" | |
126 "orl %%eax, %%eax\n\t" | |
127 " jz 1f\n\t" | |
128 PREFETCHW" 32%0\n\t" | |
129 PREFETCH" 32%1\n\t" | |
130 PREFETCH" 32%2\n\t" | |
131 "movq %0, %%mm0\n\t" // dstbase | |
132 "movq %%mm0, %%mm1\n\t" | |
133 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y | |
134 "movd %%eax, %%mm2\n\t" //srca 0000DCBA | |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
135 "paddb %%mm6, %%mm2\n\t" |
2846 | 136 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A |
137 "pmullw %%mm2, %%mm0\n\t" | |
138 "psrlw $8, %%mm0\n\t" | |
139 "pand %%mm5, %%mm1\n\t" //U0V0U0V0 | |
140 "movd %2, %%mm2\n\t" //src 0000DCBA | |
141 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A | |
142 "por %%mm1, %%mm0\n\t" | |
143 "paddb %%mm2, %%mm0\n\t" | |
144 "movq %%mm0, %0\n\t" | |
145 "1:\n\t" | |
146 :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x]) | |
147 : "%eax"); | |
148 } | |
149 #else | |
326 | 150 for(x=0;x<w;x++){ |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
151 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
152 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
153 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
154 #else |
3431
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
155 if(srca[x]) { |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
156 dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x]; |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
157 dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128; |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
158 } |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
159 #endif |
326 | 160 } |
2846 | 161 #endif |
162 src+=srcstride; | |
326 | 163 srca+=srcstride; |
164 dstbase+=dststride; | |
165 } | |
2846 | 166 #ifdef HAVE_MMX |
167 asm volatile(EMMS:::"memory"); | |
168 #endif | |
326 | 169 return; |
170 } | |
171 | |
12516 | 172 static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
173 int y; | |
174 #if defined(FAST_OSD) | |
175 w=w>>1; | |
176 #endif | |
177 for(y=0;y<h;y++){ | |
178 register int x; | |
179 for(x=0;x<w;x++){ | |
180 #ifdef FAST_OSD | |
181 if(srca[2*x+0]) dstbase[4*x+2]=src[2*x+0]; | |
182 if(srca[2*x+1]) dstbase[4*x+0]=src[2*x+1]; | |
183 #else | |
184 if(srca[x]) { | |
185 dstbase[2*x+1]=((dstbase[2*x+1]*srca[x])>>8)+src[x]; | |
186 dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128; | |
187 } | |
188 #endif | |
189 } | |
190 src+=srcstride; | |
191 srca+=srcstride; | |
192 dstbase+=dststride; | |
193 } | |
194 } | |
195 | |
3142 | 196 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 197 int y; |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
198 #ifdef HAVE_MMX |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
199 asm volatile( |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
200 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
201 "pcmpeqb %%mm6, %%mm6\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
202 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
203 #endif |
326 | 204 for(y=0;y<h;y++){ |
205 register unsigned char *dst = dstbase; | |
206 register int x; | |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
207 #if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX)) |
2839 | 208 #ifdef HAVE_MMX |
209 asm volatile( | |
210 PREFETCHW" %0\n\t" | |
211 PREFETCH" %1\n\t" | |
212 PREFETCH" %2\n\t" | |
213 ::"m"(*dst),"m"(*srca),"m"(*src):"memory"); | |
214 for(x=0;x<w;x+=2){ | |
2843 | 215 if(srca[x] || srca[x+1]) |
2839 | 216 asm volatile( |
217 PREFETCHW" 32%0\n\t" | |
218 PREFETCH" 32%1\n\t" | |
219 PREFETCH" 32%2\n\t" | |
220 "movq %0, %%mm0\n\t" // dstbase | |
221 "movq %%mm0, %%mm1\n\t" | |
222 "movq %%mm0, %%mm5\n\t" | |
223 "punpcklbw %%mm7, %%mm0\n\t" | |
224 "punpckhbw %%mm7, %%mm1\n\t" | |
225 "movd %1, %%mm2\n\t" // srca ABCD0000 | |
226 "paddb %%mm6, %%mm2\n\t" | |
227 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD | |
228 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB | |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
229 "psrlq $8, %%mm2\n\t" // srca AAABBBB0 |
2839 | 230 "movq %%mm2, %%mm3\n\t" |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
231 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B |
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
232 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00 |
2839 | 233 "pmullw %%mm2, %%mm0\n\t" |
234 "pmullw %%mm3, %%mm1\n\t" | |
235 "psrlw $8, %%mm0\n\t" | |
236 "psrlw $8, %%mm1\n\t" | |
237 "packuswb %%mm1, %%mm0\n\t" | |
238 "movd %2, %%mm2 \n\t" // src ABCD0000 | |
239 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD | |
240 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB | |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
241 "psrlq $8, %%mm2\n\t" // src AAABBBB0 |
2839 | 242 "paddb %%mm2, %%mm0\n\t" |
243 "pand %4, %%mm5\n\t" | |
244 "pand %3, %%mm0\n\t" | |
245 "por %%mm0, %%mm5\n\t" | |
246 "movq %%mm5, %0\n\t" | |
247 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh)); | |
248 dst += 6; | |
249 } | |
250 #else /* HAVE_MMX */ | |
251 for(x=0;x<w;x++){ | |
252 if(srca[x]){ | |
253 asm volatile( | |
254 "movzbl (%0), %%ecx\n\t" | |
255 "movzbl 1(%0), %%eax\n\t" | |
256 | |
257 "imull %1, %%ecx\n\t" | |
258 "imull %1, %%eax\n\t" | |
259 | |
5139 | 260 "addl %2, %%ecx\n\t" |
2839 | 261 "addl %2, %%eax\n\t" |
262 | |
263 "movb %%ch, (%0)\n\t" | |
264 "movb %%ah, 1(%0)\n\t" | |
5139 | 265 |
266 "movzbl 2(%0), %%eax\n\t" | |
267 "imull %1, %%eax\n\t" | |
268 "addl %2, %%eax\n\t" | |
269 "movb %%ah, 2(%0)\n\t" | |
2839 | 270 : |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
12516
diff
changeset
|
271 :"D" (dst), |
2839 | 272 "r" ((unsigned)srca[x]), |
273 "r" (((unsigned)src[x])<<8) | |
5139 | 274 :"%eax", "%ecx" |
2839 | 275 ); |
276 } | |
277 dst += 3; | |
278 } | |
5139 | 279 #endif /* !HAVE_MMX */ |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
280 #else /*non x86 arch or x86_64 with MMX disabled */ |
326 | 281 for(x=0;x<w;x++){ |
282 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
283 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
284 dst[0]=dst[1]=dst[2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
285 #else |
326 | 286 dst[0]=((dst[0]*srca[x])>>8)+src[x]; |
287 dst[1]=((dst[1]*srca[x])>>8)+src[x]; | |
288 dst[2]=((dst[2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
289 #endif |
326 | 290 } |
291 dst+=3; // 24bpp | |
292 } | |
2839 | 293 #endif /* arch_x86 */ |
326 | 294 src+=srcstride; |
295 srca+=srcstride; | |
296 dstbase+=dststride; | |
297 } | |
2839 | 298 #ifdef HAVE_MMX |
299 asm volatile(EMMS:::"memory"); | |
300 #endif | |
326 | 301 return; |
302 } | |
303 | |
3142 | 304 static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 305 int y; |
9960 | 306 #ifdef WORDS_BIGENDIAN |
307 dstbase++; | |
308 #endif | |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
309 #ifdef HAVE_MMX |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
310 #ifdef HAVE_3DNOW |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
311 asm volatile( |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
312 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
313 "pcmpeqb %%mm6, %%mm6\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
314 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
315 #else /* HAVE_3DNOW */ |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
316 asm volatile( |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
317 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
318 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
319 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
320 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
321 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
322 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
323 #endif /* HAVE_3DNOW */ |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
324 #endif /* HAVE_MMX */ |
326 | 325 for(y=0;y<h;y++){ |
326 register int x; | |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
327 #if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX)) |
2833 | 328 #ifdef HAVE_MMX |
2846 | 329 #ifdef HAVE_3DNOW |
2835 | 330 asm volatile( |
331 PREFETCHW" %0\n\t" | |
332 PREFETCH" %1\n\t" | |
333 PREFETCH" %2\n\t" | |
2839 | 334 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); |
2835 | 335 for(x=0;x<w;x+=2){ |
2843 | 336 if(srca[x] || srca[x+1]) |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
337 asm volatile( |
2835 | 338 PREFETCHW" 32%0\n\t" |
339 PREFETCH" 32%1\n\t" | |
340 PREFETCH" 32%2\n\t" | |
341 "movq %0, %%mm0\n\t" // dstbase | |
342 "movq %%mm0, %%mm1\n\t" | |
343 "punpcklbw %%mm7, %%mm0\n\t" | |
344 "punpckhbw %%mm7, %%mm1\n\t" | |
345 "movd %1, %%mm2\n\t" // srca ABCD0000 | |
346 "paddb %%mm6, %%mm2\n\t" | |
347 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD | |
348 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB | |
349 "movq %%mm2, %%mm3\n\t" | |
350 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A | |
351 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B | |
352 "pmullw %%mm2, %%mm0\n\t" | |
353 "pmullw %%mm3, %%mm1\n\t" | |
354 "psrlw $8, %%mm0\n\t" | |
355 "psrlw $8, %%mm1\n\t" | |
356 "packuswb %%mm1, %%mm0\n\t" | |
357 "movd %2, %%mm2 \n\t" // src ABCD0000 | |
358 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD | |
359 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB | |
360 "paddb %%mm2, %%mm0\n\t" | |
361 "movq %%mm0, %0\n\t" | |
362 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])); | |
363 } | |
2846 | 364 #else //this is faster for intels crap |
365 asm volatile( | |
366 PREFETCHW" %0\n\t" | |
367 PREFETCH" %1\n\t" | |
368 PREFETCH" %2\n\t" | |
369 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); | |
370 for(x=0;x<w;x+=4){ | |
371 asm volatile( | |
372 "movl %1, %%eax\n\t" | |
373 "orl %%eax, %%eax\n\t" | |
374 " jz 1f\n\t" | |
375 PREFETCHW" 32%0\n\t" | |
376 PREFETCH" 32%1\n\t" | |
377 PREFETCH" 32%2\n\t" | |
378 "movq %0, %%mm0\n\t" // dstbase | |
379 "movq %%mm0, %%mm1\n\t" | |
380 "pand %%mm4, %%mm0\n\t" //0R0B0R0B | |
381 "psrlw $8, %%mm1\n\t" //0?0G0?0G | |
382 "movd %%eax, %%mm2\n\t" //srca 0000DCBA | |
4245 | 383 "paddb "MANGLE(bFF)", %%mm2\n\t" |
2846 | 384 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA |
385 "movq %%mm2, %%mm3\n\t" | |
386 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A | |
387 "pmullw %%mm2, %%mm0\n\t" | |
388 "pmullw %%mm2, %%mm1\n\t" | |
389 "psrlw $8, %%mm0\n\t" | |
390 "pand %%mm5, %%mm1\n\t" | |
391 "por %%mm1, %%mm0\n\t" | |
392 "movd %2, %%mm2 \n\t" //src 0000DCBA | |
393 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA | |
394 "movq %%mm2, %%mm6\n\t" | |
395 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA | |
396 "paddb %%mm2, %%mm0\n\t" | |
397 "movq %%mm0, %0\n\t" | |
398 | |
399 "movq 8%0, %%mm0\n\t" // dstbase | |
400 "movq %%mm0, %%mm1\n\t" | |
401 "pand %%mm4, %%mm0\n\t" //0R0B0R0B | |
402 "psrlw $8, %%mm1\n\t" //0?0G0?0G | |
403 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C | |
404 "pmullw %%mm3, %%mm0\n\t" | |
405 "pmullw %%mm3, %%mm1\n\t" | |
406 "psrlw $8, %%mm0\n\t" | |
407 "pand %%mm5, %%mm1\n\t" | |
408 "por %%mm1, %%mm0\n\t" | |
409 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC | |
410 "paddb %%mm6, %%mm0\n\t" | |
411 "movq %%mm0, 8%0\n\t" | |
412 "1:\n\t" | |
413 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]) | |
414 : "%eax"); | |
415 } | |
416 #endif | |
2839 | 417 #else /* HAVE_MMX */ |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
418 for(x=0;x<w;x++){ |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
419 if(srca[x]){ |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
420 asm volatile( |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
421 "movzbl (%0), %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
422 "movzbl 1(%0), %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
423 "movzbl 2(%0), %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
424 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
425 "imull %1, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
426 "imull %1, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
427 "imull %1, %%edx\n\t" |
2578 | 428 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
429 "addl %2, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
430 "addl %2, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
431 "addl %2, %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
432 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
433 "movb %%ch, (%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
434 "movb %%ah, 1(%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
435 "movb %%dh, 2(%0)\n\t" |
2578 | 436 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
437 : |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
438 :"r" (&dstbase[4*x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
439 "r" ((unsigned)srca[x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
440 "r" (((unsigned)src[x])<<8) |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
441 :"%eax", "%ecx", "%edx" |
2578 | 442 ); |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
443 } |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
444 } |
2839 | 445 #endif /* HAVE_MMX */ |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
446 #else /*non x86 arch or x86_64 with MMX disabled */ |
326 | 447 for(x=0;x<w;x++){ |
448 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
449 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
450 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
451 #else |
326 | 452 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x]; |
453 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; | |
454 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
455 #endif |
326 | 456 } |
457 } | |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
458 #endif /* arch_x86 */ |
326 | 459 src+=srcstride; |
460 srca+=srcstride; | |
461 dstbase+=dststride; | |
462 } | |
2833 | 463 #ifdef HAVE_MMX |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
464 asm volatile(EMMS:::"memory"); |
2578 | 465 #endif |
326 | 466 return; |
467 } |