Mercurial > mplayer.hg
annotate libvo/osd_template.c @ 29259:c8dda91cf626
libdvdread4 does not use HAVE_CONFIG_H, so drop the -D flag for libdvdread4.
author | diego |
---|---|
date | Tue, 12 May 2009 08:21:53 +0000 |
parents | 7681eab10aea |
children | 0f1b5b68af32 |
rev | line source |
---|---|
28446
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
1 /* |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
2 * generic alpha renderers for all YUV modes and RGB depths |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
3 * Optimized by Nick and Michael. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
4 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
5 * This file is part of MPlayer. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
6 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
7 * MPlayer is free software; you can redistribute it and/or modify |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
8 * it under the terms of the GNU General Public License as published by |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
9 * the Free Software Foundation; either version 2 of the License, or |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
10 * (at your option) any later version. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
11 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
12 * MPlayer is distributed in the hope that it will be useful, |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
15 * GNU General Public License for more details. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
16 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
17 * You should have received a copy of the GNU General Public License along |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
18 * with MPlayer; if not, write to the Free Software Foundation, Inc., |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
20 */ |
326 | 21 |
3142 | 22 #undef PREFETCH |
23 #undef EMMS | |
24 #undef PREFETCHW | |
25 #undef PAVGB | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
26 |
28335 | 27 #if HAVE_AMD3DNOW |
3142 | 28 #define PREFETCH "prefetch" |
29 #define PREFETCHW "prefetchw" | |
30 #define PAVGB "pavgusb" | |
28290 | 31 #elif HAVE_MMX2 |
3142 | 32 #define PREFETCH "prefetchnta" |
33 #define PREFETCHW "prefetcht0" | |
34 #define PAVGB "pavgb" | |
35 #else | |
25973
ef4297ed0d12
libvo: change asm syntax to use ASMALIGN and " # nop"
uau
parents:
25903
diff
changeset
|
36 #define PREFETCH " # nop" |
ef4297ed0d12
libvo: change asm syntax to use ASMALIGN and " # nop"
uau
parents:
25903
diff
changeset
|
37 #define PREFETCHW " # nop" |
2846 | 38 #endif |
622 | 39 |
28335 | 40 #if HAVE_AMD3DNOW |
3142 | 41 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ |
42 #define EMMS "femms" | |
43 #else | |
44 #define EMMS "emms" | |
45 #endif | |
46 | |
47 static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ | |
326 | 48 int y; |
28290 | 49 #if defined(FAST_OSD) && !HAVE_MMX |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
50 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
51 #endif |
28290 | 52 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
53 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
54 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
55 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
56 "movq %%mm5, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
57 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
58 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
59 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
60 #endif |
326 | 61 for(y=0;y<h;y++){ |
62 register int x; | |
28290 | 63 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
64 __asm__ volatile( |
2846 | 65 PREFETCHW" %0\n\t" |
66 PREFETCH" %1\n\t" | |
67 PREFETCH" %2\n\t" | |
68 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); | |
69 for(x=0;x<w;x+=8){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
70 __asm__ volatile( |
2846 | 71 "movl %1, %%eax\n\t" |
72 "orl 4%1, %%eax\n\t" | |
73 " jz 1f\n\t" | |
74 PREFETCHW" 32%0\n\t" | |
75 PREFETCH" 32%1\n\t" | |
76 PREFETCH" 32%2\n\t" | |
77 "movq %0, %%mm0\n\t" // dstbase | |
78 "movq %%mm0, %%mm1\n\t" | |
79 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y | |
80 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y | |
81 "movq %1, %%mm2\n\t" //srca HGFEDCBA | |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
82 "paddb %%mm7, %%mm2\n\t" |
2846 | 83 "movq %%mm2, %%mm3\n\t" |
84 "pand %%mm4, %%mm2\n\t" //0G0E0C0A | |
85 "psrlw $8, %%mm3\n\t" //0H0F0D0B | |
86 "pmullw %%mm2, %%mm0\n\t" | |
87 "pmullw %%mm3, %%mm1\n\t" | |
88 "psrlw $8, %%mm0\n\t" | |
89 "pand %%mm5, %%mm1\n\t" | |
90 "por %%mm1, %%mm0\n\t" | |
91 "paddb %2, %%mm0\n\t" | |
92 "movq %%mm0, %0\n\t" | |
93 "1:\n\t" | |
94 :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x]) | |
95 : "%eax"); | |
96 } | |
97 #else | |
326 | 98 for(x=0;x<w;x++){ |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
99 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
100 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
101 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
102 #else |
326 | 103 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x]; |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
104 #endif |
326 | 105 } |
2846 | 106 #endif |
326 | 107 src+=srcstride; |
108 srca+=srcstride; | |
109 dstbase+=dststride; | |
110 } | |
28290 | 111 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
112 __asm__ volatile(EMMS:::"memory"); |
2846 | 113 #endif |
326 | 114 return; |
115 } | |
116 | |
3142 | 117 static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 118 int y; |
28290 | 119 #if defined(FAST_OSD) && !HAVE_MMX |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
120 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
121 #endif |
28290 | 122 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
123 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
124 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
125 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
126 "movq %%mm5, %%mm6\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
127 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
128 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
129 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
130 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
131 #endif |
326 | 132 for(y=0;y<h;y++){ |
133 register int x; | |
28290 | 134 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
135 __asm__ volatile( |
2846 | 136 PREFETCHW" %0\n\t" |
137 PREFETCH" %1\n\t" | |
138 PREFETCH" %2\n\t" | |
139 ::"m"(*dstbase),"m"(*srca),"m"(*src)); | |
140 for(x=0;x<w;x+=4){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
141 __asm__ volatile( |
2846 | 142 "movl %1, %%eax\n\t" |
143 "orl %%eax, %%eax\n\t" | |
144 " jz 1f\n\t" | |
145 PREFETCHW" 32%0\n\t" | |
146 PREFETCH" 32%1\n\t" | |
147 PREFETCH" 32%2\n\t" | |
148 "movq %0, %%mm0\n\t" // dstbase | |
149 "movq %%mm0, %%mm1\n\t" | |
150 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y | |
151 "movd %%eax, %%mm2\n\t" //srca 0000DCBA | |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
152 "paddb %%mm6, %%mm2\n\t" |
2846 | 153 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A |
154 "pmullw %%mm2, %%mm0\n\t" | |
155 "psrlw $8, %%mm0\n\t" | |
156 "pand %%mm5, %%mm1\n\t" //U0V0U0V0 | |
157 "movd %2, %%mm2\n\t" //src 0000DCBA | |
158 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A | |
159 "por %%mm1, %%mm0\n\t" | |
160 "paddb %%mm2, %%mm0\n\t" | |
161 "movq %%mm0, %0\n\t" | |
162 "1:\n\t" | |
163 :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x]) | |
164 : "%eax"); | |
165 } | |
166 #else | |
326 | 167 for(x=0;x<w;x++){ |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
168 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
169 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
170 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
171 #else |
3431
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
172 if(srca[x]) { |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
173 dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x]; |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
174 dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128; |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
175 } |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
176 #endif |
326 | 177 } |
2846 | 178 #endif |
179 src+=srcstride; | |
326 | 180 srca+=srcstride; |
181 dstbase+=dststride; | |
182 } | |
28290 | 183 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
184 __asm__ volatile(EMMS:::"memory"); |
2846 | 185 #endif |
326 | 186 return; |
187 } | |
188 | |
12516 | 189 static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
190 int y; | |
191 #if defined(FAST_OSD) | |
192 w=w>>1; | |
193 #endif | |
194 for(y=0;y<h;y++){ | |
195 register int x; | |
196 for(x=0;x<w;x++){ | |
197 #ifdef FAST_OSD | |
198 if(srca[2*x+0]) dstbase[4*x+2]=src[2*x+0]; | |
199 if(srca[2*x+1]) dstbase[4*x+0]=src[2*x+1]; | |
200 #else | |
201 if(srca[x]) { | |
202 dstbase[2*x+1]=((dstbase[2*x+1]*srca[x])>>8)+src[x]; | |
203 dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128; | |
204 } | |
205 #endif | |
206 } | |
207 src+=srcstride; | |
208 srca+=srcstride; | |
209 dstbase+=dststride; | |
210 } | |
211 } | |
212 | |
3142 | 213 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 214 int y; |
28290 | 215 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
216 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
217 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
218 "pcmpeqb %%mm6, %%mm6\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
219 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
220 #endif |
326 | 221 for(y=0;y<h;y++){ |
222 register unsigned char *dst = dstbase; | |
223 register int x; | |
28290 | 224 #if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX) |
225 #if HAVE_MMX | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
226 __asm__ volatile( |
2839 | 227 PREFETCHW" %0\n\t" |
228 PREFETCH" %1\n\t" | |
229 PREFETCH" %2\n\t" | |
230 ::"m"(*dst),"m"(*srca),"m"(*src):"memory"); | |
231 for(x=0;x<w;x+=2){ | |
2843 | 232 if(srca[x] || srca[x+1]) |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
233 __asm__ volatile( |
2839 | 234 PREFETCHW" 32%0\n\t" |
235 PREFETCH" 32%1\n\t" | |
236 PREFETCH" 32%2\n\t" | |
237 "movq %0, %%mm0\n\t" // dstbase | |
238 "movq %%mm0, %%mm1\n\t" | |
239 "movq %%mm0, %%mm5\n\t" | |
240 "punpcklbw %%mm7, %%mm0\n\t" | |
241 "punpckhbw %%mm7, %%mm1\n\t" | |
242 "movd %1, %%mm2\n\t" // srca ABCD0000 | |
243 "paddb %%mm6, %%mm2\n\t" | |
244 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD | |
245 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB | |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
246 "psrlq $8, %%mm2\n\t" // srca AAABBBB0 |
2839 | 247 "movq %%mm2, %%mm3\n\t" |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
248 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B |
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
249 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00 |
2839 | 250 "pmullw %%mm2, %%mm0\n\t" |
251 "pmullw %%mm3, %%mm1\n\t" | |
252 "psrlw $8, %%mm0\n\t" | |
253 "psrlw $8, %%mm1\n\t" | |
254 "packuswb %%mm1, %%mm0\n\t" | |
255 "movd %2, %%mm2 \n\t" // src ABCD0000 | |
256 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD | |
257 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB | |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
258 "psrlq $8, %%mm2\n\t" // src AAABBBB0 |
2839 | 259 "paddb %%mm2, %%mm0\n\t" |
260 "pand %4, %%mm5\n\t" | |
261 "pand %3, %%mm0\n\t" | |
262 "por %%mm0, %%mm5\n\t" | |
263 "movq %%mm5, %0\n\t" | |
264 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh)); | |
265 dst += 6; | |
266 } | |
267 #else /* HAVE_MMX */ | |
268 for(x=0;x<w;x++){ | |
269 if(srca[x]){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
270 __asm__ volatile( |
2839 | 271 "movzbl (%0), %%ecx\n\t" |
272 "movzbl 1(%0), %%eax\n\t" | |
273 | |
274 "imull %1, %%ecx\n\t" | |
275 "imull %1, %%eax\n\t" | |
276 | |
5139 | 277 "addl %2, %%ecx\n\t" |
2839 | 278 "addl %2, %%eax\n\t" |
279 | |
280 "movb %%ch, (%0)\n\t" | |
281 "movb %%ah, 1(%0)\n\t" | |
5139 | 282 |
283 "movzbl 2(%0), %%eax\n\t" | |
284 "imull %1, %%eax\n\t" | |
285 "addl %2, %%eax\n\t" | |
286 "movb %%ah, 2(%0)\n\t" | |
2839 | 287 : |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
12516
diff
changeset
|
288 :"D" (dst), |
2839 | 289 "r" ((unsigned)srca[x]), |
290 "r" (((unsigned)src[x])<<8) | |
5139 | 291 :"%eax", "%ecx" |
2839 | 292 ); |
293 } | |
294 dst += 3; | |
295 } | |
5139 | 296 #endif /* !HAVE_MMX */ |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
297 #else /*non x86 arch or x86_64 with MMX disabled */ |
326 | 298 for(x=0;x<w;x++){ |
299 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
300 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
301 dst[0]=dst[1]=dst[2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
302 #else |
326 | 303 dst[0]=((dst[0]*srca[x])>>8)+src[x]; |
304 dst[1]=((dst[1]*srca[x])>>8)+src[x]; | |
305 dst[2]=((dst[2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
306 #endif |
326 | 307 } |
308 dst+=3; // 24bpp | |
309 } | |
2839 | 310 #endif /* arch_x86 */ |
326 | 311 src+=srcstride; |
312 srca+=srcstride; | |
313 dstbase+=dststride; | |
314 } | |
28290 | 315 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
316 __asm__ volatile(EMMS:::"memory"); |
2839 | 317 #endif |
326 | 318 return; |
319 } | |
320 | |
3142 | 321 static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 322 int y; |
9960 | 323 #ifdef WORDS_BIGENDIAN |
324 dstbase++; | |
325 #endif | |
28290 | 326 #if HAVE_MMX |
28335 | 327 #if HAVE_AMD3DNOW |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
328 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
329 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
330 "pcmpeqb %%mm6, %%mm6\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
331 ::); |
28335 | 332 #else /* HAVE_AMD3DNOW */ |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
333 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
334 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
335 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
336 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
337 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
338 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
339 ::); |
28335 | 340 #endif /* HAVE_AMD3DNOW */ |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
341 #endif /* HAVE_MMX */ |
326 | 342 for(y=0;y<h;y++){ |
343 register int x; | |
28290 | 344 #if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX) |
345 #if HAVE_MMX | |
28335 | 346 #if HAVE_AMD3DNOW |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
347 __asm__ volatile( |
2835 | 348 PREFETCHW" %0\n\t" |
349 PREFETCH" %1\n\t" | |
350 PREFETCH" %2\n\t" | |
2839 | 351 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); |
2835 | 352 for(x=0;x<w;x+=2){ |
2843 | 353 if(srca[x] || srca[x+1]) |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
354 __asm__ volatile( |
2835 | 355 PREFETCHW" 32%0\n\t" |
356 PREFETCH" 32%1\n\t" | |
357 PREFETCH" 32%2\n\t" | |
358 "movq %0, %%mm0\n\t" // dstbase | |
359 "movq %%mm0, %%mm1\n\t" | |
360 "punpcklbw %%mm7, %%mm0\n\t" | |
361 "punpckhbw %%mm7, %%mm1\n\t" | |
362 "movd %1, %%mm2\n\t" // srca ABCD0000 | |
363 "paddb %%mm6, %%mm2\n\t" | |
364 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD | |
365 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB | |
366 "movq %%mm2, %%mm3\n\t" | |
367 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A | |
368 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B | |
369 "pmullw %%mm2, %%mm0\n\t" | |
370 "pmullw %%mm3, %%mm1\n\t" | |
371 "psrlw $8, %%mm0\n\t" | |
372 "psrlw $8, %%mm1\n\t" | |
373 "packuswb %%mm1, %%mm0\n\t" | |
374 "movd %2, %%mm2 \n\t" // src ABCD0000 | |
375 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD | |
376 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB | |
377 "paddb %%mm2, %%mm0\n\t" | |
378 "movq %%mm0, %0\n\t" | |
379 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])); | |
380 } | |
2846 | 381 #else //this is faster for intels crap |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
382 __asm__ volatile( |
2846 | 383 PREFETCHW" %0\n\t" |
384 PREFETCH" %1\n\t" | |
385 PREFETCH" %2\n\t" | |
386 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); | |
387 for(x=0;x<w;x+=4){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
388 __asm__ volatile( |
2846 | 389 "movl %1, %%eax\n\t" |
390 "orl %%eax, %%eax\n\t" | |
391 " jz 1f\n\t" | |
392 PREFETCHW" 32%0\n\t" | |
393 PREFETCH" 32%1\n\t" | |
394 PREFETCH" 32%2\n\t" | |
395 "movq %0, %%mm0\n\t" // dstbase | |
396 "movq %%mm0, %%mm1\n\t" | |
397 "pand %%mm4, %%mm0\n\t" //0R0B0R0B | |
398 "psrlw $8, %%mm1\n\t" //0?0G0?0G | |
399 "movd %%eax, %%mm2\n\t" //srca 0000DCBA | |
25903
7a1397677cb3
Avoid a MANGLE, there is no register pressure and the generated code
reimar
parents:
21369
diff
changeset
|
400 "paddb %3, %%mm2\n\t" |
2846 | 401 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA |
402 "movq %%mm2, %%mm3\n\t" | |
403 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A | |
404 "pmullw %%mm2, %%mm0\n\t" | |
405 "pmullw %%mm2, %%mm1\n\t" | |
406 "psrlw $8, %%mm0\n\t" | |
407 "pand %%mm5, %%mm1\n\t" | |
408 "por %%mm1, %%mm0\n\t" | |
409 "movd %2, %%mm2 \n\t" //src 0000DCBA | |
410 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA | |
411 "movq %%mm2, %%mm6\n\t" | |
412 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA | |
413 "paddb %%mm2, %%mm0\n\t" | |
414 "movq %%mm0, %0\n\t" | |
415 | |
416 "movq 8%0, %%mm0\n\t" // dstbase | |
417 "movq %%mm0, %%mm1\n\t" | |
418 "pand %%mm4, %%mm0\n\t" //0R0B0R0B | |
419 "psrlw $8, %%mm1\n\t" //0?0G0?0G | |
420 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C | |
421 "pmullw %%mm3, %%mm0\n\t" | |
422 "pmullw %%mm3, %%mm1\n\t" | |
423 "psrlw $8, %%mm0\n\t" | |
424 "pand %%mm5, %%mm1\n\t" | |
425 "por %%mm1, %%mm0\n\t" | |
426 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC | |
427 "paddb %%mm6, %%mm0\n\t" | |
428 "movq %%mm0, 8%0\n\t" | |
429 "1:\n\t" | |
25903
7a1397677cb3
Avoid a MANGLE, there is no register pressure and the generated code
reimar
parents:
21369
diff
changeset
|
430 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]), "m" (bFF) |
2846 | 431 : "%eax"); |
432 } | |
433 #endif | |
2839 | 434 #else /* HAVE_MMX */ |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
435 for(x=0;x<w;x++){ |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
436 if(srca[x]){ |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
437 __asm__ volatile( |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
438 "movzbl (%0), %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
439 "movzbl 1(%0), %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
440 "movzbl 2(%0), %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
441 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
442 "imull %1, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
443 "imull %1, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
444 "imull %1, %%edx\n\t" |
2578 | 445 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
446 "addl %2, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
447 "addl %2, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
448 "addl %2, %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
449 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
450 "movb %%ch, (%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
451 "movb %%ah, 1(%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
452 "movb %%dh, 2(%0)\n\t" |
2578 | 453 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
454 : |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
455 :"r" (&dstbase[4*x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
456 "r" ((unsigned)srca[x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
457 "r" (((unsigned)src[x])<<8) |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
458 :"%eax", "%ecx", "%edx" |
2578 | 459 ); |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
460 } |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
461 } |
2839 | 462 #endif /* HAVE_MMX */ |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
463 #else /*non x86 arch or x86_64 with MMX disabled */ |
326 | 464 for(x=0;x<w;x++){ |
465 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
466 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
467 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
468 #else |
326 | 469 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x]; |
470 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; | |
471 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
472 #endif |
326 | 473 } |
474 } | |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
475 #endif /* arch_x86 */ |
326 | 476 src+=srcstride; |
477 srca+=srcstride; | |
478 dstbase+=dststride; | |
479 } | |
28290 | 480 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
481 __asm__ volatile(EMMS:::"memory"); |
2578 | 482 #endif |
326 | 483 return; |
484 } |