annotate libvo/osd_template.c @ 5206:2ca5a9bfaa98

allow sh_audio struct to be initialized by demuxer, add parsing of mp4 esds header to mov demuxer, init faad from info from mov header
author atmos4
date Tue, 19 Mar 2002 21:48:55 +0000
parents 473058a6211e
children 9d7477d0d64d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
1 // Generic alpha renderers for all YUV modes and RGB depths.
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
2 // Optimized by Nick and Michael
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
3 // Code from Michael Niedermayer (michaelni@gmx.at) is under GPL
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
4
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
5 #undef PREFETCH
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
6 #undef EMMS
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
7 #undef PREFETCHW
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
8 #undef PAVGB
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
9
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
10 #ifdef HAVE_3DNOW
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
11 #define PREFETCH "prefetch"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
12 #define PREFETCHW "prefetchw"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
13 #define PAVGB "pavgusb"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
14 #elif defined ( HAVE_MMX2 )
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
15 #define PREFETCH "prefetchnta"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
16 #define PREFETCHW "prefetcht0"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
17 #define PAVGB "pavgb"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
18 #else
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
19 #define PREFETCH "/nop"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
20 #define PREFETCHW "/nop"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
21 #endif
622
6737025afed0 to be sure in that header is okey
arpi_esp
parents: 326
diff changeset
22
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
23 #ifdef HAVE_3DNOW
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
24 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
25 #define EMMS "femms"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
26 #else
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
27 #define EMMS "emms"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
28 #endif
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
29
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
30 static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
31 int y;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
32 #if defined(FAST_OSD) && !defined(HAVE_MMX)
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
33 w=w>>1;
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
34 #endif
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
35 PROFILE_START();
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
36 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
37 register int x;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
38 #ifdef HAVE_MMX
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
39 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
40 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
41 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
42 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
43 // "pxor %%mm7, %%mm7\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
44 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
45 "movq %%mm5, %%mm4\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
46 "psllw $8, %%mm5\n\t" //FF00FF00FF00
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
47 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
48 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
49 for(x=0;x<w;x+=8){
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
50 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
51 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
52 "orl 4%1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
53 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
54 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
55 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
56 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
57 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
58 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
59 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
60 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
61 "movq %1, %%mm2\n\t" //srca HGFEDCBA
4245
27cb0e43de32 mangling in libvo
atmos4
parents: 3431
diff changeset
62 "paddb "MANGLE(bFF)", %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
63 "movq %%mm2, %%mm3\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
64 "pand %%mm4, %%mm2\n\t" //0G0E0C0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
65 "psrlw $8, %%mm3\n\t" //0H0F0D0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
66 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
67 "pmullw %%mm3, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
68 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
69 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
70 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
71 "paddb %2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
72 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
73 "1:\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
74 :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x])
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
75 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
76 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
77 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
78 for(x=0;x<w;x++){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
79 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
80 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
81 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
82 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
83 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
84 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
85 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
86 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
87 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
88 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
89 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
90 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
91 #ifdef HAVE_MMX
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
92 asm volatile(EMMS:::"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
93 #endif
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
94 PROFILE_END("vo_draw_alpha_yv12");
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
95 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
96 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
97
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
98 static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
99 int y;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
100 #if defined(FAST_OSD) && !defined(HAVE_MMX)
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
101 w=w>>1;
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
102 #endif
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
103 PROFILE_START();
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
104 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
105 register int x;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
106 #ifdef HAVE_MMX
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
107 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
108 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
109 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
110 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
111 "pxor %%mm7, %%mm7\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
112 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
113 "movq %%mm5, %%mm4\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
114 "psllw $8, %%mm5\n\t" //FF00FF00FF00
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
115 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
116 ::"m"(*dstbase),"m"(*srca),"m"(*src));
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
117 for(x=0;x<w;x+=4){
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
118 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
119 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
120 "orl %%eax, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
121 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
122 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
123 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
124 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
125 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
126 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
127 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
128 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
4245
27cb0e43de32 mangling in libvo
atmos4
parents: 3431
diff changeset
129 "paddb "MANGLE(bFF)", %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
130 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
131 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
132 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
133 "pand %%mm5, %%mm1\n\t" //U0V0U0V0
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
134 "movd %2, %%mm2\n\t" //src 0000DCBA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
135 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
136 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
137 "paddb %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
138 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
139 "1:\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
140 :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x])
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
141 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
142 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
143 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
144 for(x=0;x<w;x++){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
145 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
146 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
147 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
148 #else
3431
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
149 if(srca[x]) {
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
150 dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x];
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
151 dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128;
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
152 }
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
153 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
154 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
155 #endif
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
156 src+=srcstride;
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
157 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
158 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
159 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
160 #ifdef HAVE_MMX
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
161 asm volatile(EMMS:::"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
162 #endif
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
163 PROFILE_END("vo_draw_alpha_yuy2");
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
164 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
165 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
166
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
167 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
168 int y;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
169 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
170 register unsigned char *dst = dstbase;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
171 register int x;
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
172 #ifdef ARCH_X86
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
173 #ifdef HAVE_MMX
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
174 asm volatile(
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
175 PREFETCHW" %0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
176 PREFETCH" %1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
177 PREFETCH" %2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
178 "pxor %%mm7, %%mm7\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
179 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
180 ::"m"(*dst),"m"(*srca),"m"(*src):"memory");
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
181 for(x=0;x<w;x+=2){
2843
5be2017077fb Use new logic suggested by Michael Niedermayer
nick
parents: 2839
diff changeset
182 if(srca[x] || srca[x+1])
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
183 asm volatile(
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
184 PREFETCHW" 32%0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
185 PREFETCH" 32%1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
186 PREFETCH" 32%2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
187 "movq %0, %%mm0\n\t" // dstbase
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
188 "movq %%mm0, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
189 "movq %%mm0, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
190 "punpcklbw %%mm7, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
191 "punpckhbw %%mm7, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
192 "movd %1, %%mm2\n\t" // srca ABCD0000
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
193 "paddb %%mm6, %%mm2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
194 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
195 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
196 "movq %%mm2, %%mm3\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
197 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
198 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
199 "pmullw %%mm2, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
200 "pmullw %%mm3, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
201 "psrlw $8, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
202 "psrlw $8, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
203 "packuswb %%mm1, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
204 "movd %2, %%mm2 \n\t" // src ABCD0000
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
205 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
206 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
207 "paddb %%mm2, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
208 "pand %4, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
209 "pand %3, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
210 "por %%mm0, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
211 "movq %%mm5, %0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
212 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh));
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
213 dst += 6;
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
214 }
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
215 #else /* HAVE_MMX */
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
216 for(x=0;x<w;x++){
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
217 if(srca[x]){
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
218 asm volatile(
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
219 "movzbl (%0), %%ecx\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
220 "movzbl 1(%0), %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
221
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
222 "imull %1, %%ecx\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
223 "imull %1, %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
224
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
225 "addl %2, %%ecx\n\t"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
226 "addl %2, %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
227
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
228 "movb %%ch, (%0)\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
229 "movb %%ah, 1(%0)\n\t"
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
230
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
231 "movzbl 2(%0), %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
232 "imull %1, %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
233 "addl %2, %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
234 "movb %%ah, 2(%0)\n\t"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
235 :
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
236 :"r" (dst),
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
237 "r" ((unsigned)srca[x]),
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
238 "r" (((unsigned)src[x])<<8)
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
239 :"%eax", "%ecx"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
240 );
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
241 }
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
242 dst += 3;
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
243 }
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
244 #endif /* !HAVE_MMX */
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
245 #else /*non x86 arch*/
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
246 for(x=0;x<w;x++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
247 if(srca[x]){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
248 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
249 dst[0]=dst[1]=dst[2]=src[x];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
250 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
251 dst[0]=((dst[0]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
252 dst[1]=((dst[1]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
253 dst[2]=((dst[2]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
254 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
255 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
256 dst+=3; // 24bpp
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
257 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
258 #endif /* arch_x86 */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
259 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
260 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
261 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
262 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
263 #ifdef HAVE_MMX
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
264 asm volatile(EMMS:::"memory");
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
265 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
266 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
267 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
268
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
269 static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
270 int y;
2833
1b6c207c0410 Enable MMX stuff
nick
parents: 2823
diff changeset
271 PROFILE_START();
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
272 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
273 register int x;
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
274 #ifdef ARCH_X86
2833
1b6c207c0410 Enable MMX stuff
nick
parents: 2823
diff changeset
275 #ifdef HAVE_MMX
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
276 #ifdef HAVE_3DNOW
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
277 asm volatile(
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
278 PREFETCHW" %0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
279 PREFETCH" %1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
280 PREFETCH" %2\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
281 "pxor %%mm7, %%mm7\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
282 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
283 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
284 for(x=0;x<w;x+=2){
2843
5be2017077fb Use new logic suggested by Michael Niedermayer
nick
parents: 2839
diff changeset
285 if(srca[x] || srca[x+1])
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
286 asm volatile(
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
287 PREFETCHW" 32%0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
288 PREFETCH" 32%1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
289 PREFETCH" 32%2\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
290 "movq %0, %%mm0\n\t" // dstbase
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
291 "movq %%mm0, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
292 "punpcklbw %%mm7, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
293 "punpckhbw %%mm7, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
294 "movd %1, %%mm2\n\t" // srca ABCD0000
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
295 "paddb %%mm6, %%mm2\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
296 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
297 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
298 "movq %%mm2, %%mm3\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
299 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
300 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
301 "pmullw %%mm2, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
302 "pmullw %%mm3, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
303 "psrlw $8, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
304 "psrlw $8, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
305 "packuswb %%mm1, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
306 "movd %2, %%mm2 \n\t" // src ABCD0000
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
307 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
308 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
309 "paddb %%mm2, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
310 "movq %%mm0, %0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
311 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
312 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
313 #else //this is faster for intels crap
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
314 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
315 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
316 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
317 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
318 "pxor %%mm7, %%mm7\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
319 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
320 "movq %%mm5, %%mm4\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
321 "psllw $8, %%mm5\n\t" //FF00FF00FF00
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
322 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
323 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
324 for(x=0;x<w;x+=4){
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
325 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
326 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
327 "orl %%eax, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
328 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
329 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
330 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
331 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
332 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
333 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
334 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
335 "psrlw $8, %%mm1\n\t" //0?0G0?0G
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
336 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
4245
27cb0e43de32 mangling in libvo
atmos4
parents: 3431
diff changeset
337 "paddb "MANGLE(bFF)", %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
338 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
339 "movq %%mm2, %%mm3\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
340 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
341 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
342 "pmullw %%mm2, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
343 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
344 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
345 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
346 "movd %2, %%mm2 \n\t" //src 0000DCBA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
347 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
348 "movq %%mm2, %%mm6\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
349 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
350 "paddb %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
351 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
352
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
353 "movq 8%0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
354 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
355 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
356 "psrlw $8, %%mm1\n\t" //0?0G0?0G
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
357 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
358 "pmullw %%mm3, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
359 "pmullw %%mm3, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
360 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
361 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
362 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
363 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
364 "paddb %%mm6, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
365 "movq %%mm0, 8%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
366 "1:\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
367 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
368 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
369 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
370 #endif
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
371 #else /* HAVE_MMX */
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
372 for(x=0;x<w;x++){
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
373 if(srca[x]){
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
374 asm volatile(
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
375 "movzbl (%0), %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
376 "movzbl 1(%0), %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
377 "movzbl 2(%0), %%edx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
378
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
379 "imull %1, %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
380 "imull %1, %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
381 "imull %1, %%edx\n\t"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
382
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
383 "addl %2, %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
384 "addl %2, %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
385 "addl %2, %%edx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
386
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
387 "movb %%ch, (%0)\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
388 "movb %%ah, 1(%0)\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
389 "movb %%dh, 2(%0)\n\t"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
390
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
391 :
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
392 :"r" (&dstbase[4*x]),
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
393 "r" ((unsigned)srca[x]),
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
394 "r" (((unsigned)src[x])<<8)
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
395 :"%eax", "%ecx", "%edx"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
396 );
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
397 }
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
398 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
399 #endif /* HAVE_MMX */
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
400 #else /*non x86 arch*/
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
401 for(x=0;x<w;x++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
402 if(srca[x]){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
403 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
404 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
405 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
406 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
407 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
408 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
409 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
410 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
411 }
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
412 #endif /* arch_x86 */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
413 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
414 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
415 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
416 }
2833
1b6c207c0410 Enable MMX stuff
nick
parents: 2823
diff changeset
417 #ifdef HAVE_MMX
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
418 asm volatile(EMMS:::"memory");
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
419 #endif
2833
1b6c207c0410 Enable MMX stuff
nick
parents: 2823
diff changeset
420 PROFILE_END("vo_draw_alpha_rgb32");
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
421 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
422 }