Mercurial > mplayer.hg
annotate libvo/osd_template.c @ 28511:db19e31a2c7c
Add a calc_src_dst_rects that calculates from window size, panscan etc.
which part of the video source must be scaled onto which part of the window.
Direct3D and (future) VDPAU need this, for XvMC it makes it easier to add
cropping support and Xv is changed to keep the diff to XvMC small.
author | reimar |
---|---|
date | Thu, 12 Feb 2009 17:40:53 +0000 |
parents | 7681eab10aea |
children | 0f1b5b68af32 |
rev | line source |
---|---|
28446
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
1 /* |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
2 * generic alpha renderers for all YUV modes and RGB depths |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
3 * Optimized by Nick and Michael. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
4 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
5 * This file is part of MPlayer. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
6 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
7 * MPlayer is free software; you can redistribute it and/or modify |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
8 * it under the terms of the GNU General Public License as published by |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
9 * the Free Software Foundation; either version 2 of the License, or |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
10 * (at your option) any later version. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
11 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
12 * MPlayer is distributed in the hope that it will be useful, |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
15 * GNU General Public License for more details. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
16 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
17 * You should have received a copy of the GNU General Public License along |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
18 * with MPlayer; if not, write to the Free Software Foundation, Inc., |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
20 */ |
326 | 21 |
3142 | 22 #undef PREFETCH |
23 #undef EMMS | |
24 #undef PREFETCHW | |
25 #undef PAVGB | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
26 |
28335 | 27 #if HAVE_AMD3DNOW |
3142 | 28 #define PREFETCH "prefetch" |
29 #define PREFETCHW "prefetchw" | |
30 #define PAVGB "pavgusb" | |
28290 | 31 #elif HAVE_MMX2 |
3142 | 32 #define PREFETCH "prefetchnta" |
33 #define PREFETCHW "prefetcht0" | |
34 #define PAVGB "pavgb" | |
35 #else | |
25973
ef4297ed0d12
libvo: change asm syntax to use ASMALIGN and " # nop"
uau
parents:
25903
diff
changeset
|
36 #define PREFETCH " # nop" |
ef4297ed0d12
libvo: change asm syntax to use ASMALIGN and " # nop"
uau
parents:
25903
diff
changeset
|
37 #define PREFETCHW " # nop" |
2846 | 38 #endif |
622 | 39 |
28335 | 40 #if HAVE_AMD3DNOW |
3142 | 41 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ |
42 #define EMMS "femms" | |
43 #else | |
44 #define EMMS "emms" | |
45 #endif | |
46 | |
47 static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ | |
326 | 48 int y; |
28290 | 49 #if defined(FAST_OSD) && !HAVE_MMX |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
50 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
51 #endif |
28290 | 52 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
53 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
54 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
55 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
56 "movq %%mm5, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
57 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
58 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
59 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
60 #endif |
326 | 61 for(y=0;y<h;y++){ |
62 register int x; | |
28290 | 63 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
64 __asm__ volatile( |
2846 | 65 PREFETCHW" %0\n\t" |
66 PREFETCH" %1\n\t" | |
67 PREFETCH" %2\n\t" | |
68 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); | |
69 for(x=0;x<w;x+=8){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
70 __asm__ volatile( |
2846 | 71 "movl %1, %%eax\n\t" |
72 "orl 4%1, %%eax\n\t" | |
73 " jz 1f\n\t" | |
74 PREFETCHW" 32%0\n\t" | |
75 PREFETCH" 32%1\n\t" | |
76 PREFETCH" 32%2\n\t" | |
77 "movq %0, %%mm0\n\t" // dstbase | |
78 "movq %%mm0, %%mm1\n\t" | |
79 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y | |
80 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y | |
81 "movq %1, %%mm2\n\t" //srca HGFEDCBA | |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
82 "paddb %%mm7, %%mm2\n\t" |
2846 | 83 "movq %%mm2, %%mm3\n\t" |
84 "pand %%mm4, %%mm2\n\t" //0G0E0C0A | |
85 "psrlw $8, %%mm3\n\t" //0H0F0D0B | |
86 "pmullw %%mm2, %%mm0\n\t" | |
87 "pmullw %%mm3, %%mm1\n\t" | |
88 "psrlw $8, %%mm0\n\t" | |
89 "pand %%mm5, %%mm1\n\t" | |
90 "por %%mm1, %%mm0\n\t" | |
91 "paddb %2, %%mm0\n\t" | |
92 "movq %%mm0, %0\n\t" | |
93 "1:\n\t" | |
94 :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x]) | |
95 : "%eax"); | |
96 } | |
97 #else | |
326 | 98 for(x=0;x<w;x++){ |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
99 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
100 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
101 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
102 #else |
326 | 103 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x]; |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
104 #endif |
326 | 105 } |
2846 | 106 #endif |
326 | 107 src+=srcstride; |
108 srca+=srcstride; | |
109 dstbase+=dststride; | |
110 } | |
28290 | 111 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
112 __asm__ volatile(EMMS:::"memory"); |
2846 | 113 #endif |
326 | 114 return; |
115 } | |
116 | |
3142 | 117 static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 118 int y; |
28290 | 119 #if defined(FAST_OSD) && !HAVE_MMX |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
120 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
121 #endif |
28290 | 122 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
123 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
124 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
125 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
126 "movq %%mm5, %%mm6\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
127 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
128 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
129 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
130 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
131 #endif |
326 | 132 for(y=0;y<h;y++){ |
133 register int x; | |
28290 | 134 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
135 __asm__ volatile( |
2846 | 136 PREFETCHW" %0\n\t" |
137 PREFETCH" %1\n\t" | |
138 PREFETCH" %2\n\t" | |
139 ::"m"(*dstbase),"m"(*srca),"m"(*src)); | |
140 for(x=0;x<w;x+=4){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
141 __asm__ volatile( |
2846 | 142 "movl %1, %%eax\n\t" |
143 "orl %%eax, %%eax\n\t" | |
144 " jz 1f\n\t" | |
145 PREFETCHW" 32%0\n\t" | |
146 PREFETCH" 32%1\n\t" | |
147 PREFETCH" 32%2\n\t" | |
148 "movq %0, %%mm0\n\t" // dstbase | |
149 "movq %%mm0, %%mm1\n\t" | |
150 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y | |
151 "movd %%eax, %%mm2\n\t" //srca 0000DCBA | |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
152 "paddb %%mm6, %%mm2\n\t" |
2846 | 153 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A |
154 "pmullw %%mm2, %%mm0\n\t" | |
155 "psrlw $8, %%mm0\n\t" | |
156 "pand %%mm5, %%mm1\n\t" //U0V0U0V0 | |
157 "movd %2, %%mm2\n\t" //src 0000DCBA | |
158 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A | |
159 "por %%mm1, %%mm0\n\t" | |
160 "paddb %%mm2, %%mm0\n\t" | |
161 "movq %%mm0, %0\n\t" | |
162 "1:\n\t" | |
163 :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x]) | |
164 : "%eax"); | |
165 } | |
166 #else | |
326 | 167 for(x=0;x<w;x++){ |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
168 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
169 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
170 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
171 #else |
3431
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
172 if(srca[x]) { |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
173 dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x]; |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
174 dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128; |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
175 } |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
176 #endif |
326 | 177 } |
2846 | 178 #endif |
179 src+=srcstride; | |
326 | 180 srca+=srcstride; |
181 dstbase+=dststride; | |
182 } | |
28290 | 183 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
184 __asm__ volatile(EMMS:::"memory"); |
2846 | 185 #endif |
326 | 186 return; |
187 } | |
188 | |
12516 | 189 static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
190 int y; | |
191 #if defined(FAST_OSD) | |
192 w=w>>1; | |
193 #endif | |
194 for(y=0;y<h;y++){ | |
195 register int x; | |
196 for(x=0;x<w;x++){ | |
197 #ifdef FAST_OSD | |
198 if(srca[2*x+0]) dstbase[4*x+2]=src[2*x+0]; | |
199 if(srca[2*x+1]) dstbase[4*x+0]=src[2*x+1]; | |
200 #else | |
201 if(srca[x]) { | |
202 dstbase[2*x+1]=((dstbase[2*x+1]*srca[x])>>8)+src[x]; | |
203 dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128; | |
204 } | |
205 #endif | |
206 } | |
207 src+=srcstride; | |
208 srca+=srcstride; | |
209 dstbase+=dststride; | |
210 } | |
211 } | |
212 | |
3142 | 213 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 214 int y; |
28290 | 215 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
216 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
217 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
218 "pcmpeqb %%mm6, %%mm6\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
219 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
220 #endif |
326 | 221 for(y=0;y<h;y++){ |
222 register unsigned char *dst = dstbase; | |
223 register int x; | |
28290 | 224 #if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX) |
225 #if HAVE_MMX | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
226 __asm__ volatile( |
2839 | 227 PREFETCHW" %0\n\t" |
228 PREFETCH" %1\n\t" | |
229 PREFETCH" %2\n\t" | |
230 ::"m"(*dst),"m"(*srca),"m"(*src):"memory"); | |
231 for(x=0;x<w;x+=2){ | |
2843 | 232 if(srca[x] || srca[x+1]) |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
233 __asm__ volatile( |
2839 | 234 PREFETCHW" 32%0\n\t" |
235 PREFETCH" 32%1\n\t" | |
236 PREFETCH" 32%2\n\t" | |
237 "movq %0, %%mm0\n\t" // dstbase | |
238 "movq %%mm0, %%mm1\n\t" | |
239 "movq %%mm0, %%mm5\n\t" | |
240 "punpcklbw %%mm7, %%mm0\n\t" | |
241 "punpckhbw %%mm7, %%mm1\n\t" | |
242 "movd %1, %%mm2\n\t" // srca ABCD0000 | |
243 "paddb %%mm6, %%mm2\n\t" | |
244 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD | |
245 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB | |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
246 "psrlq $8, %%mm2\n\t" // srca AAABBBB0 |
2839 | 247 "movq %%mm2, %%mm3\n\t" |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
248 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B |
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
249 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00 |
2839 | 250 "pmullw %%mm2, %%mm0\n\t" |
251 "pmullw %%mm3, %%mm1\n\t" | |
252 "psrlw $8, %%mm0\n\t" | |
253 "psrlw $8, %%mm1\n\t" | |
254 "packuswb %%mm1, %%mm0\n\t" | |
255 "movd %2, %%mm2 \n\t" // src ABCD0000 | |
256 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD | |
257 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB | |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
258 "psrlq $8, %%mm2\n\t" // src AAABBBB0 |
2839 | 259 "paddb %%mm2, %%mm0\n\t" |
260 "pand %4, %%mm5\n\t" | |
261 "pand %3, %%mm0\n\t" | |
262 "por %%mm0, %%mm5\n\t" | |
263 "movq %%mm5, %0\n\t" | |
264 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh)); | |
265 dst += 6; | |
266 } | |
267 #else /* HAVE_MMX */ | |
268 for(x=0;x<w;x++){ | |
269 if(srca[x]){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
270 __asm__ volatile( |
2839 | 271 "movzbl (%0), %%ecx\n\t" |
272 "movzbl 1(%0), %%eax\n\t" | |
273 | |
274 "imull %1, %%ecx\n\t" | |
275 "imull %1, %%eax\n\t" | |
276 | |
5139 | 277 "addl %2, %%ecx\n\t" |
2839 | 278 "addl %2, %%eax\n\t" |
279 | |
280 "movb %%ch, (%0)\n\t" | |
281 "movb %%ah, 1(%0)\n\t" | |
5139 | 282 |
283 "movzbl 2(%0), %%eax\n\t" | |
284 "imull %1, %%eax\n\t" | |
285 "addl %2, %%eax\n\t" | |
286 "movb %%ah, 2(%0)\n\t" | |
2839 | 287 : |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
12516
diff
changeset
|
288 :"D" (dst), |
2839 | 289 "r" ((unsigned)srca[x]), |
290 "r" (((unsigned)src[x])<<8) | |
5139 | 291 :"%eax", "%ecx" |
2839 | 292 ); |
293 } | |
294 dst += 3; | |
295 } | |
5139 | 296 #endif /* !HAVE_MMX */ |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
297 #else /*non x86 arch or x86_64 with MMX disabled */ |
326 | 298 for(x=0;x<w;x++){ |
299 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
300 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
301 dst[0]=dst[1]=dst[2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
302 #else |
326 | 303 dst[0]=((dst[0]*srca[x])>>8)+src[x]; |
304 dst[1]=((dst[1]*srca[x])>>8)+src[x]; | |
305 dst[2]=((dst[2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
306 #endif |
326 | 307 } |
308 dst+=3; // 24bpp | |
309 } | |
2839 | 310 #endif /* arch_x86 */ |
326 | 311 src+=srcstride; |
312 srca+=srcstride; | |
313 dstbase+=dststride; | |
314 } | |
28290 | 315 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
316 __asm__ volatile(EMMS:::"memory"); |
2839 | 317 #endif |
326 | 318 return; |
319 } | |
320 | |
3142 | 321 static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 322 int y; |
9960 | 323 #ifdef WORDS_BIGENDIAN |
324 dstbase++; | |
325 #endif | |
28290 | 326 #if HAVE_MMX |
28335 | 327 #if HAVE_AMD3DNOW |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
328 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
329 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
330 "pcmpeqb %%mm6, %%mm6\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
331 ::); |
28335 | 332 #else /* HAVE_AMD3DNOW */ |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
333 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
334 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
335 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
336 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
337 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
338 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
339 ::); |
28335 | 340 #endif /* HAVE_AMD3DNOW */ |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
341 #endif /* HAVE_MMX */ |
326 | 342 for(y=0;y<h;y++){ |
343 register int x; | |
28290 | 344 #if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX) |
345 #if HAVE_MMX | |
28335 | 346 #if HAVE_AMD3DNOW |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
347 __asm__ volatile( |
2835 | 348 PREFETCHW" %0\n\t" |
349 PREFETCH" %1\n\t" | |
350 PREFETCH" %2\n\t" | |
2839 | 351 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); |
2835 | 352 for(x=0;x<w;x+=2){ |
2843 | 353 if(srca[x] || srca[x+1]) |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
354 __asm__ volatile( |
2835 | 355 PREFETCHW" 32%0\n\t" |
356 PREFETCH" 32%1\n\t" | |
357 PREFETCH" 32%2\n\t" | |
358 "movq %0, %%mm0\n\t" // dstbase | |
359 "movq %%mm0, %%mm1\n\t" | |
360 "punpcklbw %%mm7, %%mm0\n\t" | |
361 "punpckhbw %%mm7, %%mm1\n\t" | |
362 "movd %1, %%mm2\n\t" // srca ABCD0000 | |
363 "paddb %%mm6, %%mm2\n\t" | |
364 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD | |
365 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB | |
366 "movq %%mm2, %%mm3\n\t" | |
367 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A | |
368 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B | |
369 "pmullw %%mm2, %%mm0\n\t" | |
370 "pmullw %%mm3, %%mm1\n\t" | |
371 "psrlw $8, %%mm0\n\t" | |
372 "psrlw $8, %%mm1\n\t" | |
373 "packuswb %%mm1, %%mm0\n\t" | |
374 "movd %2, %%mm2 \n\t" // src ABCD0000 | |
375 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD | |
376 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB | |
377 "paddb %%mm2, %%mm0\n\t" | |
378 "movq %%mm0, %0\n\t" | |
379 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])); | |
380 } | |
2846 | 381 #else //this is faster for intels crap |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
382 __asm__ volatile( |
2846 | 383 PREFETCHW" %0\n\t" |
384 PREFETCH" %1\n\t" | |
385 PREFETCH" %2\n\t" | |
386 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); | |
387 for(x=0;x<w;x+=4){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
388 __asm__ volatile( |
2846 | 389 "movl %1, %%eax\n\t" |
390 "orl %%eax, %%eax\n\t" | |
391 " jz 1f\n\t" | |
392 PREFETCHW" 32%0\n\t" | |
393 PREFETCH" 32%1\n\t" | |
394 PREFETCH" 32%2\n\t" | |
395 "movq %0, %%mm0\n\t" // dstbase | |
396 "movq %%mm0, %%mm1\n\t" | |
397 "pand %%mm4, %%mm0\n\t" //0R0B0R0B | |
398 "psrlw $8, %%mm1\n\t" //0?0G0?0G | |
399 "movd %%eax, %%mm2\n\t" //srca 0000DCBA | |
25903
7a1397677cb3
Avoid a MANGLE, there is no register pressure and the generated code
reimar
parents:
21369
diff
changeset
|
400 "paddb %3, %%mm2\n\t" |
2846 | 401 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA |
402 "movq %%mm2, %%mm3\n\t" | |
403 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A | |
404 "pmullw %%mm2, %%mm0\n\t" | |
405 "pmullw %%mm2, %%mm1\n\t" | |
406 "psrlw $8, %%mm0\n\t" | |
407 "pand %%mm5, %%mm1\n\t" | |
408 "por %%mm1, %%mm0\n\t" | |
409 "movd %2, %%mm2 \n\t" //src 0000DCBA | |
410 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA | |
411 "movq %%mm2, %%mm6\n\t" | |
412 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA | |
413 "paddb %%mm2, %%mm0\n\t" | |
414 "movq %%mm0, %0\n\t" | |
415 | |
416 "movq 8%0, %%mm0\n\t" // dstbase | |
417 "movq %%mm0, %%mm1\n\t" | |
418 "pand %%mm4, %%mm0\n\t" //0R0B0R0B | |
419 "psrlw $8, %%mm1\n\t" //0?0G0?0G | |
420 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C | |
421 "pmullw %%mm3, %%mm0\n\t" | |
422 "pmullw %%mm3, %%mm1\n\t" | |
423 "psrlw $8, %%mm0\n\t" | |
424 "pand %%mm5, %%mm1\n\t" | |
425 "por %%mm1, %%mm0\n\t" | |
426 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC | |
427 "paddb %%mm6, %%mm0\n\t" | |
428 "movq %%mm0, 8%0\n\t" | |
429 "1:\n\t" | |
25903
7a1397677cb3
Avoid a MANGLE, there is no register pressure and the generated code
reimar
parents:
21369
diff
changeset
|
430 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]), "m" (bFF) |
2846 | 431 : "%eax"); |
432 } | |
433 #endif | |
2839 | 434 #else /* HAVE_MMX */ |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
435 for(x=0;x<w;x++){ |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
436 if(srca[x]){ |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
437 __asm__ volatile( |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
438 "movzbl (%0), %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
439 "movzbl 1(%0), %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
440 "movzbl 2(%0), %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
441 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
442 "imull %1, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
443 "imull %1, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
444 "imull %1, %%edx\n\t" |
2578 | 445 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
446 "addl %2, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
447 "addl %2, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
448 "addl %2, %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
449 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
450 "movb %%ch, (%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
451 "movb %%ah, 1(%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
452 "movb %%dh, 2(%0)\n\t" |
2578 | 453 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
454 : |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
455 :"r" (&dstbase[4*x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
456 "r" ((unsigned)srca[x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
457 "r" (((unsigned)src[x])<<8) |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
458 :"%eax", "%ecx", "%edx" |
2578 | 459 ); |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
460 } |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
461 } |
2839 | 462 #endif /* HAVE_MMX */ |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
463 #else /*non x86 arch or x86_64 with MMX disabled */ |
326 | 464 for(x=0;x<w;x++){ |
465 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
466 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
467 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
468 #else |
326 | 469 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x]; |
470 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; | |
471 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
472 #endif |
326 | 473 } |
474 } | |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
475 #endif /* arch_x86 */ |
326 | 476 src+=srcstride; |
477 srca+=srcstride; | |
478 dstbase+=dststride; | |
479 } | |
28290 | 480 #if HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
481 __asm__ volatile(EMMS:::"memory"); |
2578 | 482 #endif |
326 | 483 return; |
484 } |