annotate libvo/osd_template.c @ 30846:72ae3dd3aa6e

Replace separate Win32, XAnim and Real codec directories by one global dir. Nowadays MPlayer does not use the codecs from other installed programs. A runtime setting will soon take over the rare case that binary codecs should be searched for in non-standard directories.
author diego
date Sun, 14 Mar 2010 16:13:32 +0000
parents f01023c524c3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
28446
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
1 /*
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
2 * generic alpha renderers for all YUV modes and RGB depths
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
3 * Optimized by Nick and Michael.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
4 *
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
5 * This file is part of MPlayer.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
6 *
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
7 * MPlayer is free software; you can redistribute it and/or modify
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
8 * it under the terms of the GNU General Public License as published by
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
9 * the Free Software Foundation; either version 2 of the License, or
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
10 * (at your option) any later version.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
11 *
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
12 * MPlayer is distributed in the hope that it will be useful,
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
15 * GNU General Public License for more details.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
16 *
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
17 * You should have received a copy of the GNU General Public License along
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
18 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
20 */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
21
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
22 #undef PREFETCH
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
23 #undef EMMS
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
24 #undef PREFETCHW
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
25 #undef PAVGB
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
26
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28290
diff changeset
27 #if HAVE_AMD3DNOW
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
28 #define PREFETCH "prefetch"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
29 #define PREFETCHW "prefetchw"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
30 #define PAVGB "pavgusb"
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
31 #elif HAVE_MMX2
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
32 #define PREFETCH "prefetchnta"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
33 #define PREFETCHW "prefetcht0"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
34 #define PAVGB "pavgb"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
35 #else
25973
ef4297ed0d12 libvo: change asm syntax to use ASMALIGN and " # nop"
uau
parents: 25903
diff changeset
36 #define PREFETCH " # nop"
ef4297ed0d12 libvo: change asm syntax to use ASMALIGN and " # nop"
uau
parents: 25903
diff changeset
37 #define PREFETCHW " # nop"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
38 #endif
622
6737025afed0 to be sure in that header is okey
arpi_esp
parents: 326
diff changeset
39
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28290
diff changeset
40 #if HAVE_AMD3DNOW
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
41 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
42 #define EMMS "femms"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
43 #else
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
44 #define EMMS "emms"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
45 #endif
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
46
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
47 static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
48 int y;
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
49 #if defined(FAST_OSD) && !HAVE_MMX
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
50 w=w>>1;
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
51 #endif
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
52 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
53 __asm__ volatile(
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
54 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
55 "movq %%mm5, %%mm4\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
56 "movq %%mm5, %%mm7\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
57 "psllw $8, %%mm5\n\t" //FF00FF00FF00
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
58 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 28446
diff changeset
59 ::);
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
60 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
61 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
62 register int x;
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
63 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
64 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
65 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
66 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
67 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
68 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
69 for(x=0;x<w;x+=8){
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
70 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
71 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
72 "orl 4%1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
73 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
74 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
75 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
76 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
77 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
78 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
79 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
80 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
81 "movq %1, %%mm2\n\t" //srca HGFEDCBA
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
82 "paddb %%mm7, %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
83 "movq %%mm2, %%mm3\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
84 "pand %%mm4, %%mm2\n\t" //0G0E0C0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
85 "psrlw $8, %%mm3\n\t" //0H0F0D0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
86 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
87 "pmullw %%mm3, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
88 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
89 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
90 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
91 "paddb %2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
92 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
93 "1:\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
94 :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x])
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
95 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
96 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
97 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
98 for(x=0;x<w;x++){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
99 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
100 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
101 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
102 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
103 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
104 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
105 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
106 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
107 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
108 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
109 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
110 }
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
111 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
112 __asm__ volatile(EMMS:::"memory");
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
113 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
114 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
115 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
116
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
117 static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
118 int y;
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
119 #if defined(FAST_OSD) && !HAVE_MMX
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
120 w=w>>1;
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
121 #endif
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
122 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
123 __asm__ volatile(
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
124 "pxor %%mm7, %%mm7\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
125 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
126 "movq %%mm5, %%mm6\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
127 "movq %%mm5, %%mm4\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
128 "psllw $8, %%mm5\n\t" //FF00FF00FF00
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
129 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 28446
diff changeset
130 ::);
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
131 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
132 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
133 register int x;
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
134 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
135 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
136 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
137 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
138 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
139 ::"m"(*dstbase),"m"(*srca),"m"(*src));
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
140 for(x=0;x<w;x+=4){
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
141 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
142 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
143 "orl %%eax, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
144 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
145 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
146 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
147 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
148 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
149 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
150 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
151 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
152 "paddb %%mm6, %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
153 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
154 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
155 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
156 "pand %%mm5, %%mm1\n\t" //U0V0U0V0
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
157 "movd %2, %%mm2\n\t" //src 0000DCBA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
158 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
159 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
160 "paddb %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
161 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
162 "1:\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
163 :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x])
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
164 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
165 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
166 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
167 for(x=0;x<w;x++){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
168 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
169 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
170 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
171 #else
3431
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
172 if(srca[x]) {
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
173 dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x];
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
174 dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128;
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
175 }
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
176 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
177 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
178 #endif
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
179 src+=srcstride;
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
180 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
181 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
182 }
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
183 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
184 __asm__ volatile(EMMS:::"memory");
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
185 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
186 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
187 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
188
12516
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
189 static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
190 int y;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
191 #if defined(FAST_OSD)
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
192 w=w>>1;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
193 #endif
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
194 for(y=0;y<h;y++){
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
195 register int x;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
196 for(x=0;x<w;x++){
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
197 #ifdef FAST_OSD
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
198 if(srca[2*x+0]) dstbase[4*x+2]=src[2*x+0];
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
199 if(srca[2*x+1]) dstbase[4*x+0]=src[2*x+1];
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
200 #else
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
201 if(srca[x]) {
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
202 dstbase[2*x+1]=((dstbase[2*x+1]*srca[x])>>8)+src[x];
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
203 dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
204 }
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
205 #endif
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
206 }
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
207 src+=srcstride;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
208 srca+=srcstride;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
209 dstbase+=dststride;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
210 }
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
211 }
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
212
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
213 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
214 int y;
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
215 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
216 __asm__ volatile(
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
217 "pxor %%mm7, %%mm7\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
218 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 28446
diff changeset
219 ::);
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
220 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
221 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
222 register unsigned char *dst = dstbase;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
223 register int x;
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
224 #if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX)
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
225 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
226 __asm__ volatile(
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
227 PREFETCHW" %0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
228 PREFETCH" %1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
229 PREFETCH" %2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
230 ::"m"(*dst),"m"(*srca),"m"(*src):"memory");
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
231 for(x=0;x<w;x+=2){
2843
5be2017077fb Use new logic suggested by Michael Niedermayer
nick
parents: 2839
diff changeset
232 if(srca[x] || srca[x+1])
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
233 __asm__ volatile(
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
234 PREFETCHW" 32%0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
235 PREFETCH" 32%1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
236 PREFETCH" 32%2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
237 "movq %0, %%mm0\n\t" // dstbase
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
238 "movq %%mm0, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
239 "movq %%mm0, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
240 "punpcklbw %%mm7, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
241 "punpckhbw %%mm7, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
242 "movd %1, %%mm2\n\t" // srca ABCD0000
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
243 "paddb %%mm6, %%mm2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
244 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
245 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
16483
b47e38d24ddf Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents: 13720
diff changeset
246 "psrlq $8, %%mm2\n\t" // srca AAABBBB0
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
247 "movq %%mm2, %%mm3\n\t"
16483
b47e38d24ddf Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents: 13720
diff changeset
248 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B
b47e38d24ddf Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents: 13720
diff changeset
249 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
250 "pmullw %%mm2, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
251 "pmullw %%mm3, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
252 "psrlw $8, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
253 "psrlw $8, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
254 "packuswb %%mm1, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
255 "movd %2, %%mm2 \n\t" // src ABCD0000
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
256 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
257 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
16483
b47e38d24ddf Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents: 13720
diff changeset
258 "psrlq $8, %%mm2\n\t" // src AAABBBB0
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
259 "paddb %%mm2, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
260 "pand %4, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
261 "pand %3, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
262 "por %%mm0, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
263 "movq %%mm5, %0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
264 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh));
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
265 dst += 6;
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
266 }
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
267 #else /* HAVE_MMX */
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
268 for(x=0;x<w;x++){
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
269 if(srca[x]){
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
270 __asm__ volatile(
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
271 "movzbl (%0), %%ecx\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
272 "movzbl 1(%0), %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
273
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
274 "imull %1, %%ecx\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
275 "imull %1, %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
276
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
277 "addl %2, %%ecx\n\t"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
278 "addl %2, %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
279
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
280 "movb %%ch, (%0)\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
281 "movb %%ah, 1(%0)\n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 28446
diff changeset
282
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
283 "movzbl 2(%0), %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
284 "imull %1, %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
285 "addl %2, %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
286 "movb %%ah, 2(%0)\n\t"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
287 :
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 12516
diff changeset
288 :"D" (dst),
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
289 "r" ((unsigned)srca[x]),
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
290 "r" (((unsigned)src[x])<<8)
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
291 :"%eax", "%ecx"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
292 );
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
293 }
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
294 dst += 3;
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
295 }
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
296 #endif /* !HAVE_MMX */
21369
9d42ff736ea5 Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents: 20577
diff changeset
297 #else /*non x86 arch or x86_64 with MMX disabled */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
298 for(x=0;x<w;x++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
299 if(srca[x]){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
300 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
301 dst[0]=dst[1]=dst[2]=src[x];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
302 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
303 dst[0]=((dst[0]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
304 dst[1]=((dst[1]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
305 dst[2]=((dst[2]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
306 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
307 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
308 dst+=3; // 24bpp
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
309 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
310 #endif /* arch_x86 */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
311 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
312 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
313 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
314 }
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
315 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
316 __asm__ volatile(EMMS:::"memory");
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
317 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
318 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
319 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
320
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
321 static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
322 int y;
29401
f01023c524c3 Replace WORDS_BIGENDIAN by HAVE_BIGENDIAN in all internal code.
diego
parents: 29263
diff changeset
323 #if HAVE_BIGENDIAN
9960
9d7477d0d64d big endian yellow fix?
michael
parents: 5139
diff changeset
324 dstbase++;
9d7477d0d64d big endian yellow fix?
michael
parents: 5139
diff changeset
325 #endif
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
326 #if HAVE_MMX
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28290
diff changeset
327 #if HAVE_AMD3DNOW
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
328 __asm__ volatile(
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
329 "pxor %%mm7, %%mm7\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
330 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
331 ::);
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28290
diff changeset
332 #else /* HAVE_AMD3DNOW */
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
333 __asm__ volatile(
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
334 "pxor %%mm7, %%mm7\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
335 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
336 "movq %%mm5, %%mm4\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
337 "psllw $8, %%mm5\n\t" //FF00FF00FF00
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
338 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
339 ::);
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28290
diff changeset
340 #endif /* HAVE_AMD3DNOW */
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
341 #endif /* HAVE_MMX */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
342 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
343 register int x;
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
344 #if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX)
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
345 #if HAVE_MMX
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28290
diff changeset
346 #if HAVE_AMD3DNOW
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
347 __asm__ volatile(
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
348 PREFETCHW" %0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
349 PREFETCH" %1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
350 PREFETCH" %2\n\t"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
351 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
352 for(x=0;x<w;x+=2){
2843
5be2017077fb Use new logic suggested by Michael Niedermayer
nick
parents: 2839
diff changeset
353 if(srca[x] || srca[x+1])
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
354 __asm__ volatile(
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
355 PREFETCHW" 32%0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
356 PREFETCH" 32%1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
357 PREFETCH" 32%2\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
358 "movq %0, %%mm0\n\t" // dstbase
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
359 "movq %%mm0, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
360 "punpcklbw %%mm7, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
361 "punpckhbw %%mm7, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
362 "movd %1, %%mm2\n\t" // srca ABCD0000
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
363 "paddb %%mm6, %%mm2\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
364 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
365 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
366 "movq %%mm2, %%mm3\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
367 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
368 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
369 "pmullw %%mm2, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
370 "pmullw %%mm3, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
371 "psrlw $8, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
372 "psrlw $8, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
373 "packuswb %%mm1, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
374 "movd %2, %%mm2 \n\t" // src ABCD0000
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
375 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
376 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
377 "paddb %%mm2, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
378 "movq %%mm0, %0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
379 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
380 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
381 #else //this is faster for intels crap
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
382 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
383 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
384 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
385 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
386 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
387 for(x=0;x<w;x+=4){
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
388 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
389 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
390 "orl %%eax, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
391 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
392 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
393 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
394 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
395 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
396 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
397 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
398 "psrlw $8, %%mm1\n\t" //0?0G0?0G
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
399 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
25903
7a1397677cb3 Avoid a MANGLE, there is no register pressure and the generated code
reimar
parents: 21369
diff changeset
400 "paddb %3, %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
401 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
402 "movq %%mm2, %%mm3\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
403 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
404 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
405 "pmullw %%mm2, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
406 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
407 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
408 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
409 "movd %2, %%mm2 \n\t" //src 0000DCBA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
410 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
411 "movq %%mm2, %%mm6\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
412 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
413 "paddb %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
414 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
415
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
416 "movq 8%0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
417 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
418 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
419 "psrlw $8, %%mm1\n\t" //0?0G0?0G
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
420 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
421 "pmullw %%mm3, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
422 "pmullw %%mm3, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
423 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
424 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
425 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
426 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
427 "paddb %%mm6, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
428 "movq %%mm0, 8%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
429 "1:\n\t"
25903
7a1397677cb3 Avoid a MANGLE, there is no register pressure and the generated code
reimar
parents: 21369
diff changeset
430 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]), "m" (bFF)
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
431 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
432 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
433 #endif
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
434 #else /* HAVE_MMX */
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
435 for(x=0;x<w;x++){
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
436 if(srca[x]){
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
437 __asm__ volatile(
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
438 "movzbl (%0), %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
439 "movzbl 1(%0), %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
440 "movzbl 2(%0), %%edx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
441
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
442 "imull %1, %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
443 "imull %1, %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
444 "imull %1, %%edx\n\t"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
445
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
446 "addl %2, %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
447 "addl %2, %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
448 "addl %2, %%edx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
449
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
450 "movb %%ch, (%0)\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
451 "movb %%ah, 1(%0)\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
452 "movb %%dh, 2(%0)\n\t"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
453
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
454 :
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
455 :"r" (&dstbase[4*x]),
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
456 "r" ((unsigned)srca[x]),
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
457 "r" (((unsigned)src[x])<<8)
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
458 :"%eax", "%ecx", "%edx"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
459 );
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
460 }
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
461 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
462 #endif /* HAVE_MMX */
21369
9d42ff736ea5 Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents: 20577
diff changeset
463 #else /*non x86 arch or x86_64 with MMX disabled */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
464 for(x=0;x<w;x++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
465 if(srca[x]){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
466 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
467 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
468 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
469 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
470 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
471 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
472 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
473 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
474 }
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
475 #endif /* arch_x86 */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
476 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
477 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
478 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
479 }
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
480 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
481 __asm__ volatile(EMMS:::"memory");
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
482 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
483 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
484 }