annotate libvo/osd_template.c @ 11623:ecaf7047b6e8

Patch from the author, Zoltan Hidvegi: The filmdint filter does not handle NTSC "telecined" 15fps movies where there is a frame break in the middle of every second NTSC frame, it outputs only 15 frames for every 30 input frames, ignoring the io option. You can notice this during encoding such a sequence you will have lots of diplicate frames / skip frames messages. The patch below fixes this.
author rfelker
date Thu, 11 Dec 2003 04:47:42 +0000
parents 14c8c762c2b7
children 6f7b5123ac56
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
1 // Generic alpha renderers for all YUV modes and RGB depths.
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
2 // Optimized by Nick and Michael
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
3 // Code from Michael Niedermayer (michaelni@gmx.at) is under GPL
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
4
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
5 #undef PREFETCH
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
6 #undef EMMS
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
7 #undef PREFETCHW
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
8 #undef PAVGB
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
9
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
10 #ifdef HAVE_3DNOW
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
11 #define PREFETCH "prefetch"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
12 #define PREFETCHW "prefetchw"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
13 #define PAVGB "pavgusb"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
14 #elif defined ( HAVE_MMX2 )
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
15 #define PREFETCH "prefetchnta"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
16 #define PREFETCHW "prefetcht0"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
17 #define PAVGB "pavgb"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
18 #else
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
19 #define PREFETCH "/nop"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
20 #define PREFETCHW "/nop"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
21 #endif
622
6737025afed0 to be sure in that header is okey
arpi_esp
parents: 326
diff changeset
22
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
23 #ifdef HAVE_3DNOW
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
24 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
25 #define EMMS "femms"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
26 #else
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
27 #define EMMS "emms"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
28 #endif
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
29
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
30 static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
31 int y;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
32 #if defined(FAST_OSD) && !defined(HAVE_MMX)
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
33 w=w>>1;
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
34 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
35 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
36 register int x;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
37 #ifdef HAVE_MMX
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
38 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
39 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
40 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
41 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
42 // "pxor %%mm7, %%mm7\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
43 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
44 "movq %%mm5, %%mm4\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
45 "psllw $8, %%mm5\n\t" //FF00FF00FF00
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
46 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
47 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
48 for(x=0;x<w;x+=8){
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
49 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
50 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
51 "orl 4%1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
52 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
53 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
54 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
55 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
56 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
57 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
58 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
59 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
60 "movq %1, %%mm2\n\t" //srca HGFEDCBA
4245
27cb0e43de32 mangling in libvo
atmos4
parents: 3431
diff changeset
61 "paddb "MANGLE(bFF)", %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
62 "movq %%mm2, %%mm3\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
63 "pand %%mm4, %%mm2\n\t" //0G0E0C0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
64 "psrlw $8, %%mm3\n\t" //0H0F0D0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
65 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
66 "pmullw %%mm3, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
67 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
68 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
69 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
70 "paddb %2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
71 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
72 "1:\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
73 :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x])
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
74 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
75 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
76 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
77 for(x=0;x<w;x++){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
78 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
79 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
80 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
81 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
82 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
83 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
84 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
85 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
86 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
87 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
88 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
89 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
90 #ifdef HAVE_MMX
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
91 asm volatile(EMMS:::"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
92 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
93 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
94 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
95
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
96 static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
97 int y;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
98 #if defined(FAST_OSD) && !defined(HAVE_MMX)
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
99 w=w>>1;
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
100 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
101 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
102 register int x;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
103 #ifdef HAVE_MMX
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
104 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
105 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
106 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
107 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
108 "pxor %%mm7, %%mm7\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
109 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
110 "movq %%mm5, %%mm4\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
111 "psllw $8, %%mm5\n\t" //FF00FF00FF00
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
112 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
113 ::"m"(*dstbase),"m"(*srca),"m"(*src));
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
114 for(x=0;x<w;x+=4){
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
115 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
116 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
117 "orl %%eax, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
118 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
119 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
120 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
121 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
122 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
123 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
124 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
125 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
4245
27cb0e43de32 mangling in libvo
atmos4
parents: 3431
diff changeset
126 "paddb "MANGLE(bFF)", %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
127 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
128 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
129 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
130 "pand %%mm5, %%mm1\n\t" //U0V0U0V0
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
131 "movd %2, %%mm2\n\t" //src 0000DCBA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
132 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
133 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
134 "paddb %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
135 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
136 "1:\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
137 :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x])
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
138 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
139 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
140 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
141 for(x=0;x<w;x++){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
142 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
143 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
144 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
145 #else
3431
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
146 if(srca[x]) {
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
147 dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x];
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
148 dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128;
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
149 }
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
150 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
151 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
152 #endif
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
153 src+=srcstride;
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
154 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
155 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
156 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
157 #ifdef HAVE_MMX
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
158 asm volatile(EMMS:::"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
159 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
160 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
161 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
162
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
163 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
164 int y;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
165 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
166 register unsigned char *dst = dstbase;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
167 register int x;
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
168 #ifdef ARCH_X86
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
169 #ifdef HAVE_MMX
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
170 asm volatile(
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
171 PREFETCHW" %0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
172 PREFETCH" %1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
173 PREFETCH" %2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
174 "pxor %%mm7, %%mm7\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
175 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
176 ::"m"(*dst),"m"(*srca),"m"(*src):"memory");
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
177 for(x=0;x<w;x+=2){
2843
5be2017077fb Use new logic suggested by Michael Niedermayer
nick
parents: 2839
diff changeset
178 if(srca[x] || srca[x+1])
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
179 asm volatile(
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
180 PREFETCHW" 32%0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
181 PREFETCH" 32%1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
182 PREFETCH" 32%2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
183 "movq %0, %%mm0\n\t" // dstbase
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
184 "movq %%mm0, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
185 "movq %%mm0, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
186 "punpcklbw %%mm7, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
187 "punpckhbw %%mm7, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
188 "movd %1, %%mm2\n\t" // srca ABCD0000
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
189 "paddb %%mm6, %%mm2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
190 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
191 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
192 "movq %%mm2, %%mm3\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
193 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
194 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
195 "pmullw %%mm2, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
196 "pmullw %%mm3, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
197 "psrlw $8, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
198 "psrlw $8, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
199 "packuswb %%mm1, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
200 "movd %2, %%mm2 \n\t" // src ABCD0000
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
201 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
202 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
203 "paddb %%mm2, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
204 "pand %4, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
205 "pand %3, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
206 "por %%mm0, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
207 "movq %%mm5, %0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
208 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh));
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
209 dst += 6;
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
210 }
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
211 #else /* HAVE_MMX */
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
212 for(x=0;x<w;x++){
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
213 if(srca[x]){
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
214 asm volatile(
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
215 "movzbl (%0), %%ecx\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
216 "movzbl 1(%0), %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
217
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
218 "imull %1, %%ecx\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
219 "imull %1, %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
220
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
221 "addl %2, %%ecx\n\t"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
222 "addl %2, %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
223
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
224 "movb %%ch, (%0)\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
225 "movb %%ah, 1(%0)\n\t"
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
226
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
227 "movzbl 2(%0), %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
228 "imull %1, %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
229 "addl %2, %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
230 "movb %%ah, 2(%0)\n\t"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
231 :
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
232 :"r" (dst),
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
233 "r" ((unsigned)srca[x]),
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
234 "r" (((unsigned)src[x])<<8)
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
235 :"%eax", "%ecx"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
236 );
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
237 }
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
238 dst += 3;
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
239 }
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
240 #endif /* !HAVE_MMX */
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
241 #else /*non x86 arch*/
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
242 for(x=0;x<w;x++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
243 if(srca[x]){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
244 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
245 dst[0]=dst[1]=dst[2]=src[x];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
246 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
247 dst[0]=((dst[0]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
248 dst[1]=((dst[1]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
249 dst[2]=((dst[2]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
250 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
251 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
252 dst+=3; // 24bpp
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
253 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
254 #endif /* arch_x86 */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
255 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
256 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
257 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
258 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
259 #ifdef HAVE_MMX
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
260 asm volatile(EMMS:::"memory");
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
261 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
262 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
263 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
264
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
265 static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
266 int y;
9960
9d7477d0d64d big endian yellow fix?
michael
parents: 5139
diff changeset
267 #ifdef WORDS_BIGENDIAN
9d7477d0d64d big endian yellow fix?
michael
parents: 5139
diff changeset
268 dstbase++;
9d7477d0d64d big endian yellow fix?
michael
parents: 5139
diff changeset
269 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
270 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
271 register int x;
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
272 #ifdef ARCH_X86
2833
1b6c207c0410 Enable MMX stuff
nick
parents: 2823
diff changeset
273 #ifdef HAVE_MMX
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
274 #ifdef HAVE_3DNOW
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
275 asm volatile(
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
276 PREFETCHW" %0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
277 PREFETCH" %1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
278 PREFETCH" %2\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
279 "pxor %%mm7, %%mm7\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
280 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
281 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
282 for(x=0;x<w;x+=2){
2843
5be2017077fb Use new logic suggested by Michael Niedermayer
nick
parents: 2839
diff changeset
283 if(srca[x] || srca[x+1])
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
284 asm volatile(
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
285 PREFETCHW" 32%0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
286 PREFETCH" 32%1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
287 PREFETCH" 32%2\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
288 "movq %0, %%mm0\n\t" // dstbase
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
289 "movq %%mm0, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
290 "punpcklbw %%mm7, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
291 "punpckhbw %%mm7, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
292 "movd %1, %%mm2\n\t" // srca ABCD0000
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
293 "paddb %%mm6, %%mm2\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
294 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
295 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
296 "movq %%mm2, %%mm3\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
297 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
298 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
299 "pmullw %%mm2, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
300 "pmullw %%mm3, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
301 "psrlw $8, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
302 "psrlw $8, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
303 "packuswb %%mm1, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
304 "movd %2, %%mm2 \n\t" // src ABCD0000
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
305 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
306 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
307 "paddb %%mm2, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
308 "movq %%mm0, %0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
309 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
310 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
311 #else //this is faster for intels crap
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
312 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
313 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
314 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
315 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
316 "pxor %%mm7, %%mm7\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
317 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
318 "movq %%mm5, %%mm4\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
319 "psllw $8, %%mm5\n\t" //FF00FF00FF00
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
320 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
321 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
322 for(x=0;x<w;x+=4){
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
323 asm volatile(
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
324 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
325 "orl %%eax, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
326 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
327 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
328 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
329 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
330 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
331 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
332 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
333 "psrlw $8, %%mm1\n\t" //0?0G0?0G
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
334 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
4245
27cb0e43de32 mangling in libvo
atmos4
parents: 3431
diff changeset
335 "paddb "MANGLE(bFF)", %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
336 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
337 "movq %%mm2, %%mm3\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
338 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
339 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
340 "pmullw %%mm2, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
341 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
342 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
343 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
344 "movd %2, %%mm2 \n\t" //src 0000DCBA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
345 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
346 "movq %%mm2, %%mm6\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
347 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
348 "paddb %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
349 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
350
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
351 "movq 8%0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
352 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
353 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
354 "psrlw $8, %%mm1\n\t" //0?0G0?0G
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
355 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
356 "pmullw %%mm3, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
357 "pmullw %%mm3, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
358 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
359 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
360 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
361 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
362 "paddb %%mm6, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
363 "movq %%mm0, 8%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
364 "1:\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
365 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
366 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
367 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
368 #endif
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
369 #else /* HAVE_MMX */
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
370 for(x=0;x<w;x++){
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
371 if(srca[x]){
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
372 asm volatile(
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
373 "movzbl (%0), %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
374 "movzbl 1(%0), %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
375 "movzbl 2(%0), %%edx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
376
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
377 "imull %1, %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
378 "imull %1, %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
379 "imull %1, %%edx\n\t"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
380
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
381 "addl %2, %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
382 "addl %2, %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
383 "addl %2, %%edx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
384
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
385 "movb %%ch, (%0)\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
386 "movb %%ah, 1(%0)\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
387 "movb %%dh, 2(%0)\n\t"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
388
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
389 :
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
390 :"r" (&dstbase[4*x]),
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
391 "r" ((unsigned)srca[x]),
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
392 "r" (((unsigned)src[x])<<8)
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
393 :"%eax", "%ecx", "%edx"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
394 );
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
395 }
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
396 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
397 #endif /* HAVE_MMX */
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
398 #else /*non x86 arch*/
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
399 for(x=0;x<w;x++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
400 if(srca[x]){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
401 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
402 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
403 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
404 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
405 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
406 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
407 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
408 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
409 }
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
410 #endif /* arch_x86 */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
411 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
412 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
413 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
414 }
2833
1b6c207c0410 Enable MMX stuff
nick
parents: 2823
diff changeset
415 #ifdef HAVE_MMX
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
416 asm volatile(EMMS:::"memory");
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
417 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
418 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
419 }