Mercurial > mplayer.hg
annotate libvo/osd_template.c @ 28132:c28c36d595d5
Fix imaadpcm extradata with lavc encoder.
The formula to calculate frame size was wrong, duplicated code from the encoder
and did not take endianness into account when writing the value into extradata.
Patch by Edouard Gomez [ed gomez (at) free fr].
author | reimar |
---|---|
date | Sat, 20 Dec 2008 10:18:15 +0000 |
parents | 08d18fe9da52 |
children | 25337a2147e7 |
rev | line source |
---|---|
326 | 1 // Generic alpha renderers for all YUV modes and RGB depths. |
2846 | 2 // Optimized by Nick and Michael |
3142 | 3 // Code from Michael Niedermayer (michaelni@gmx.at) is under GPL |
326 | 4 |
3142 | 5 #undef PREFETCH |
6 #undef EMMS | |
7 #undef PREFETCHW | |
8 #undef PAVGB | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
9 |
3142 | 10 #ifdef HAVE_3DNOW |
11 #define PREFETCH "prefetch" | |
12 #define PREFETCHW "prefetchw" | |
13 #define PAVGB "pavgusb" | |
14 #elif defined ( HAVE_MMX2 ) | |
15 #define PREFETCH "prefetchnta" | |
16 #define PREFETCHW "prefetcht0" | |
17 #define PAVGB "pavgb" | |
18 #else | |
25973
ef4297ed0d12
libvo: change asm syntax to use ASMALIGN and " # nop"
uau
parents:
25903
diff
changeset
|
19 #define PREFETCH " # nop" |
ef4297ed0d12
libvo: change asm syntax to use ASMALIGN and " # nop"
uau
parents:
25903
diff
changeset
|
20 #define PREFETCHW " # nop" |
2846 | 21 #endif |
622 | 22 |
3142 | 23 #ifdef HAVE_3DNOW |
24 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |
25 #define EMMS "femms" | |
26 #else | |
27 #define EMMS "emms" | |
28 #endif | |
29 | |
30 static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ | |
326 | 31 int y; |
2846 | 32 #if defined(FAST_OSD) && !defined(HAVE_MMX) |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
33 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
34 #endif |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
35 #ifdef HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
36 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
37 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
38 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
39 "movq %%mm5, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
40 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
41 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
42 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
43 #endif |
326 | 44 for(y=0;y<h;y++){ |
45 register int x; | |
2846 | 46 #ifdef HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
47 __asm__ volatile( |
2846 | 48 PREFETCHW" %0\n\t" |
49 PREFETCH" %1\n\t" | |
50 PREFETCH" %2\n\t" | |
51 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); | |
52 for(x=0;x<w;x+=8){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
53 __asm__ volatile( |
2846 | 54 "movl %1, %%eax\n\t" |
55 "orl 4%1, %%eax\n\t" | |
56 " jz 1f\n\t" | |
57 PREFETCHW" 32%0\n\t" | |
58 PREFETCH" 32%1\n\t" | |
59 PREFETCH" 32%2\n\t" | |
60 "movq %0, %%mm0\n\t" // dstbase | |
61 "movq %%mm0, %%mm1\n\t" | |
62 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y | |
63 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y | |
64 "movq %1, %%mm2\n\t" //srca HGFEDCBA | |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
65 "paddb %%mm7, %%mm2\n\t" |
2846 | 66 "movq %%mm2, %%mm3\n\t" |
67 "pand %%mm4, %%mm2\n\t" //0G0E0C0A | |
68 "psrlw $8, %%mm3\n\t" //0H0F0D0B | |
69 "pmullw %%mm2, %%mm0\n\t" | |
70 "pmullw %%mm3, %%mm1\n\t" | |
71 "psrlw $8, %%mm0\n\t" | |
72 "pand %%mm5, %%mm1\n\t" | |
73 "por %%mm1, %%mm0\n\t" | |
74 "paddb %2, %%mm0\n\t" | |
75 "movq %%mm0, %0\n\t" | |
76 "1:\n\t" | |
77 :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x]) | |
78 : "%eax"); | |
79 } | |
80 #else | |
326 | 81 for(x=0;x<w;x++){ |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
82 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
83 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
84 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
85 #else |
326 | 86 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x]; |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
87 #endif |
326 | 88 } |
2846 | 89 #endif |
326 | 90 src+=srcstride; |
91 srca+=srcstride; | |
92 dstbase+=dststride; | |
93 } | |
2846 | 94 #ifdef HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
95 __asm__ volatile(EMMS:::"memory"); |
2846 | 96 #endif |
326 | 97 return; |
98 } | |
99 | |
3142 | 100 static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 101 int y; |
2846 | 102 #if defined(FAST_OSD) && !defined(HAVE_MMX) |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
103 w=w>>1; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
104 #endif |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
105 #ifdef HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
106 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
107 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
108 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
109 "movq %%mm5, %%mm6\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
110 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
111 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
112 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
113 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
114 #endif |
326 | 115 for(y=0;y<h;y++){ |
116 register int x; | |
2846 | 117 #ifdef HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
118 __asm__ volatile( |
2846 | 119 PREFETCHW" %0\n\t" |
120 PREFETCH" %1\n\t" | |
121 PREFETCH" %2\n\t" | |
122 ::"m"(*dstbase),"m"(*srca),"m"(*src)); | |
123 for(x=0;x<w;x+=4){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
124 __asm__ volatile( |
2846 | 125 "movl %1, %%eax\n\t" |
126 "orl %%eax, %%eax\n\t" | |
127 " jz 1f\n\t" | |
128 PREFETCHW" 32%0\n\t" | |
129 PREFETCH" 32%1\n\t" | |
130 PREFETCH" 32%2\n\t" | |
131 "movq %0, %%mm0\n\t" // dstbase | |
132 "movq %%mm0, %%mm1\n\t" | |
133 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y | |
134 "movd %%eax, %%mm2\n\t" //srca 0000DCBA | |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
135 "paddb %%mm6, %%mm2\n\t" |
2846 | 136 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A |
137 "pmullw %%mm2, %%mm0\n\t" | |
138 "psrlw $8, %%mm0\n\t" | |
139 "pand %%mm5, %%mm1\n\t" //U0V0U0V0 | |
140 "movd %2, %%mm2\n\t" //src 0000DCBA | |
141 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A | |
142 "por %%mm1, %%mm0\n\t" | |
143 "paddb %%mm2, %%mm0\n\t" | |
144 "movq %%mm0, %0\n\t" | |
145 "1:\n\t" | |
146 :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x]) | |
147 : "%eax"); | |
148 } | |
149 #else | |
326 | 150 for(x=0;x<w;x++){ |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
151 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
152 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
153 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
154 #else |
3431
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
155 if(srca[x]) { |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
156 dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x]; |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
157 dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128; |
63ecec3bdf93
yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents:
3142
diff
changeset
|
158 } |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
159 #endif |
326 | 160 } |
2846 | 161 #endif |
162 src+=srcstride; | |
326 | 163 srca+=srcstride; |
164 dstbase+=dststride; | |
165 } | |
2846 | 166 #ifdef HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
167 __asm__ volatile(EMMS:::"memory"); |
2846 | 168 #endif |
326 | 169 return; |
170 } | |
171 | |
12516 | 172 static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
173 int y; | |
174 #if defined(FAST_OSD) | |
175 w=w>>1; | |
176 #endif | |
177 for(y=0;y<h;y++){ | |
178 register int x; | |
179 for(x=0;x<w;x++){ | |
180 #ifdef FAST_OSD | |
181 if(srca[2*x+0]) dstbase[4*x+2]=src[2*x+0]; | |
182 if(srca[2*x+1]) dstbase[4*x+0]=src[2*x+1]; | |
183 #else | |
184 if(srca[x]) { | |
185 dstbase[2*x+1]=((dstbase[2*x+1]*srca[x])>>8)+src[x]; | |
186 dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128; | |
187 } | |
188 #endif | |
189 } | |
190 src+=srcstride; | |
191 srca+=srcstride; | |
192 dstbase+=dststride; | |
193 } | |
194 } | |
195 | |
3142 | 196 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 197 int y; |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
198 #ifdef HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
199 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
200 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
201 "pcmpeqb %%mm6, %%mm6\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
202 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
203 #endif |
326 | 204 for(y=0;y<h;y++){ |
205 register unsigned char *dst = dstbase; | |
206 register int x; | |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
207 #if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX)) |
2839 | 208 #ifdef HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
209 __asm__ volatile( |
2839 | 210 PREFETCHW" %0\n\t" |
211 PREFETCH" %1\n\t" | |
212 PREFETCH" %2\n\t" | |
213 ::"m"(*dst),"m"(*srca),"m"(*src):"memory"); | |
214 for(x=0;x<w;x+=2){ | |
2843 | 215 if(srca[x] || srca[x+1]) |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
216 __asm__ volatile( |
2839 | 217 PREFETCHW" 32%0\n\t" |
218 PREFETCH" 32%1\n\t" | |
219 PREFETCH" 32%2\n\t" | |
220 "movq %0, %%mm0\n\t" // dstbase | |
221 "movq %%mm0, %%mm1\n\t" | |
222 "movq %%mm0, %%mm5\n\t" | |
223 "punpcklbw %%mm7, %%mm0\n\t" | |
224 "punpckhbw %%mm7, %%mm1\n\t" | |
225 "movd %1, %%mm2\n\t" // srca ABCD0000 | |
226 "paddb %%mm6, %%mm2\n\t" | |
227 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD | |
228 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB | |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
229 "psrlq $8, %%mm2\n\t" // srca AAABBBB0 |
2839 | 230 "movq %%mm2, %%mm3\n\t" |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
231 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B |
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
232 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00 |
2839 | 233 "pmullw %%mm2, %%mm0\n\t" |
234 "pmullw %%mm3, %%mm1\n\t" | |
235 "psrlw $8, %%mm0\n\t" | |
236 "psrlw $8, %%mm1\n\t" | |
237 "packuswb %%mm1, %%mm0\n\t" | |
238 "movd %2, %%mm2 \n\t" // src ABCD0000 | |
239 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD | |
240 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB | |
16483
b47e38d24ddf
Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents:
13720
diff
changeset
|
241 "psrlq $8, %%mm2\n\t" // src AAABBBB0 |
2839 | 242 "paddb %%mm2, %%mm0\n\t" |
243 "pand %4, %%mm5\n\t" | |
244 "pand %3, %%mm0\n\t" | |
245 "por %%mm0, %%mm5\n\t" | |
246 "movq %%mm5, %0\n\t" | |
247 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh)); | |
248 dst += 6; | |
249 } | |
250 #else /* HAVE_MMX */ | |
251 for(x=0;x<w;x++){ | |
252 if(srca[x]){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
253 __asm__ volatile( |
2839 | 254 "movzbl (%0), %%ecx\n\t" |
255 "movzbl 1(%0), %%eax\n\t" | |
256 | |
257 "imull %1, %%ecx\n\t" | |
258 "imull %1, %%eax\n\t" | |
259 | |
5139 | 260 "addl %2, %%ecx\n\t" |
2839 | 261 "addl %2, %%eax\n\t" |
262 | |
263 "movb %%ch, (%0)\n\t" | |
264 "movb %%ah, 1(%0)\n\t" | |
5139 | 265 |
266 "movzbl 2(%0), %%eax\n\t" | |
267 "imull %1, %%eax\n\t" | |
268 "addl %2, %%eax\n\t" | |
269 "movb %%ah, 2(%0)\n\t" | |
2839 | 270 : |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
12516
diff
changeset
|
271 :"D" (dst), |
2839 | 272 "r" ((unsigned)srca[x]), |
273 "r" (((unsigned)src[x])<<8) | |
5139 | 274 :"%eax", "%ecx" |
2839 | 275 ); |
276 } | |
277 dst += 3; | |
278 } | |
5139 | 279 #endif /* !HAVE_MMX */ |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
280 #else /*non x86 arch or x86_64 with MMX disabled */ |
326 | 281 for(x=0;x<w;x++){ |
282 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
283 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
284 dst[0]=dst[1]=dst[2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
285 #else |
326 | 286 dst[0]=((dst[0]*srca[x])>>8)+src[x]; |
287 dst[1]=((dst[1]*srca[x])>>8)+src[x]; | |
288 dst[2]=((dst[2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
289 #endif |
326 | 290 } |
291 dst+=3; // 24bpp | |
292 } | |
2839 | 293 #endif /* arch_x86 */ |
326 | 294 src+=srcstride; |
295 srca+=srcstride; | |
296 dstbase+=dststride; | |
297 } | |
2839 | 298 #ifdef HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
299 __asm__ volatile(EMMS:::"memory"); |
2839 | 300 #endif |
326 | 301 return; |
302 } | |
303 | |
3142 | 304 static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ |
326 | 305 int y; |
9960 | 306 #ifdef WORDS_BIGENDIAN |
307 dstbase++; | |
308 #endif | |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
309 #ifdef HAVE_MMX |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
310 #ifdef HAVE_3DNOW |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
311 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
312 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
313 "pcmpeqb %%mm6, %%mm6\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
314 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
315 #else /* HAVE_3DNOW */ |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
316 __asm__ volatile( |
18683
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
317 "pxor %%mm7, %%mm7\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
318 "pcmpeqb %%mm5, %%mm5\n\t" // F..F |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
319 "movq %%mm5, %%mm4\n\t" |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
320 "psllw $8, %%mm5\n\t" //FF00FF00FF00 |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
321 "psrlw $8, %%mm4\n\t" //00FF00FF00FF |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
322 ::); |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
323 #endif /* HAVE_3DNOW */ |
d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents:
16483
diff
changeset
|
324 #endif /* HAVE_MMX */ |
326 | 325 for(y=0;y<h;y++){ |
326 register int x; | |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
327 #if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX)) |
2833 | 328 #ifdef HAVE_MMX |
2846 | 329 #ifdef HAVE_3DNOW |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
330 __asm__ volatile( |
2835 | 331 PREFETCHW" %0\n\t" |
332 PREFETCH" %1\n\t" | |
333 PREFETCH" %2\n\t" | |
2839 | 334 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); |
2835 | 335 for(x=0;x<w;x+=2){ |
2843 | 336 if(srca[x] || srca[x+1]) |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
337 __asm__ volatile( |
2835 | 338 PREFETCHW" 32%0\n\t" |
339 PREFETCH" 32%1\n\t" | |
340 PREFETCH" 32%2\n\t" | |
341 "movq %0, %%mm0\n\t" // dstbase | |
342 "movq %%mm0, %%mm1\n\t" | |
343 "punpcklbw %%mm7, %%mm0\n\t" | |
344 "punpckhbw %%mm7, %%mm1\n\t" | |
345 "movd %1, %%mm2\n\t" // srca ABCD0000 | |
346 "paddb %%mm6, %%mm2\n\t" | |
347 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD | |
348 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB | |
349 "movq %%mm2, %%mm3\n\t" | |
350 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A | |
351 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B | |
352 "pmullw %%mm2, %%mm0\n\t" | |
353 "pmullw %%mm3, %%mm1\n\t" | |
354 "psrlw $8, %%mm0\n\t" | |
355 "psrlw $8, %%mm1\n\t" | |
356 "packuswb %%mm1, %%mm0\n\t" | |
357 "movd %2, %%mm2 \n\t" // src ABCD0000 | |
358 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD | |
359 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB | |
360 "paddb %%mm2, %%mm0\n\t" | |
361 "movq %%mm0, %0\n\t" | |
362 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])); | |
363 } | |
2846 | 364 #else //this is faster for intels crap |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
365 __asm__ volatile( |
2846 | 366 PREFETCHW" %0\n\t" |
367 PREFETCH" %1\n\t" | |
368 PREFETCH" %2\n\t" | |
369 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); | |
370 for(x=0;x<w;x+=4){ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
371 __asm__ volatile( |
2846 | 372 "movl %1, %%eax\n\t" |
373 "orl %%eax, %%eax\n\t" | |
374 " jz 1f\n\t" | |
375 PREFETCHW" 32%0\n\t" | |
376 PREFETCH" 32%1\n\t" | |
377 PREFETCH" 32%2\n\t" | |
378 "movq %0, %%mm0\n\t" // dstbase | |
379 "movq %%mm0, %%mm1\n\t" | |
380 "pand %%mm4, %%mm0\n\t" //0R0B0R0B | |
381 "psrlw $8, %%mm1\n\t" //0?0G0?0G | |
382 "movd %%eax, %%mm2\n\t" //srca 0000DCBA | |
25903
7a1397677cb3
Avoid a MANGLE, there is no register pressure and the generated code
reimar
parents:
21369
diff
changeset
|
383 "paddb %3, %%mm2\n\t" |
2846 | 384 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA |
385 "movq %%mm2, %%mm3\n\t" | |
386 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A | |
387 "pmullw %%mm2, %%mm0\n\t" | |
388 "pmullw %%mm2, %%mm1\n\t" | |
389 "psrlw $8, %%mm0\n\t" | |
390 "pand %%mm5, %%mm1\n\t" | |
391 "por %%mm1, %%mm0\n\t" | |
392 "movd %2, %%mm2 \n\t" //src 0000DCBA | |
393 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA | |
394 "movq %%mm2, %%mm6\n\t" | |
395 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA | |
396 "paddb %%mm2, %%mm0\n\t" | |
397 "movq %%mm0, %0\n\t" | |
398 | |
399 "movq 8%0, %%mm0\n\t" // dstbase | |
400 "movq %%mm0, %%mm1\n\t" | |
401 "pand %%mm4, %%mm0\n\t" //0R0B0R0B | |
402 "psrlw $8, %%mm1\n\t" //0?0G0?0G | |
403 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C | |
404 "pmullw %%mm3, %%mm0\n\t" | |
405 "pmullw %%mm3, %%mm1\n\t" | |
406 "psrlw $8, %%mm0\n\t" | |
407 "pand %%mm5, %%mm1\n\t" | |
408 "por %%mm1, %%mm0\n\t" | |
409 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC | |
410 "paddb %%mm6, %%mm0\n\t" | |
411 "movq %%mm0, 8%0\n\t" | |
412 "1:\n\t" | |
25903
7a1397677cb3
Avoid a MANGLE, there is no register pressure and the generated code
reimar
parents:
21369
diff
changeset
|
413 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]), "m" (bFF) |
2846 | 414 : "%eax"); |
415 } | |
416 #endif | |
2839 | 417 #else /* HAVE_MMX */ |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
418 for(x=0;x<w;x++){ |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
419 if(srca[x]){ |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
420 __asm__ volatile( |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
421 "movzbl (%0), %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
422 "movzbl 1(%0), %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
423 "movzbl 2(%0), %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
424 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
425 "imull %1, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
426 "imull %1, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
427 "imull %1, %%edx\n\t" |
2578 | 428 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
429 "addl %2, %%ecx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
430 "addl %2, %%eax\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
431 "addl %2, %%edx\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
432 |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
433 "movb %%ch, (%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
434 "movb %%ah, 1(%0)\n\t" |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
435 "movb %%dh, 2(%0)\n\t" |
2578 | 436 |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
437 : |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
438 :"r" (&dstbase[4*x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
439 "r" ((unsigned)srca[x]), |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
440 "r" (((unsigned)src[x])<<8) |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
441 :"%eax", "%ecx", "%edx" |
2578 | 442 ); |
2823
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
443 } |
004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents:
2807
diff
changeset
|
444 } |
2839 | 445 #endif /* HAVE_MMX */ |
21369
9d42ff736ea5
Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents:
20577
diff
changeset
|
446 #else /*non x86 arch or x86_64 with MMX disabled */ |
326 | 447 for(x=0;x<w;x++){ |
448 if(srca[x]){ | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
449 #ifdef FAST_OSD |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
450 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x]; |
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
451 #else |
326 | 452 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x]; |
453 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; | |
454 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; | |
947
76fd9463b9d3
FAST_OSD option to disable font outline antialiasing
arpi_esp
parents:
622
diff
changeset
|
455 #endif |
326 | 456 } |
457 } | |
2798
ee2cd36a81a2
Code cleanup - emms is not required when MMX block is commented out.
nick
parents:
2578
diff
changeset
|
458 #endif /* arch_x86 */ |
326 | 459 src+=srcstride; |
460 srca+=srcstride; | |
461 dstbase+=dststride; | |
462 } | |
2833 | 463 #ifdef HAVE_MMX |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25973
diff
changeset
|
464 __asm__ volatile(EMMS:::"memory"); |
2578 | 465 #endif |
326 | 466 return; |
467 } |