annotate libvo/osd_template.c @ 28132:c28c36d595d5

Fix imaadpcm extradata with lavc encoder. The formula to calculate frame size was wrong, duplicated code from the encoder and did not take endianness into account when writing the value into extradata. Patch by Edouard Gomez [ed gomez (at) free fr].
author reimar
date Sat, 20 Dec 2008 10:18:15 +0000
parents 08d18fe9da52
children 25337a2147e7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
1 // Generic alpha renderers for all YUV modes and RGB depths.
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
2 // Optimized by Nick and Michael
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
3 // Code from Michael Niedermayer (michaelni@gmx.at) is under GPL
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
4
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
5 #undef PREFETCH
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
6 #undef EMMS
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
7 #undef PREFETCHW
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
8 #undef PAVGB
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
9
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
10 #ifdef HAVE_3DNOW
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
11 #define PREFETCH "prefetch"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
12 #define PREFETCHW "prefetchw"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
13 #define PAVGB "pavgusb"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
14 #elif defined ( HAVE_MMX2 )
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
15 #define PREFETCH "prefetchnta"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
16 #define PREFETCHW "prefetcht0"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
17 #define PAVGB "pavgb"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
18 #else
25973
ef4297ed0d12 libvo: change asm syntax to use ASMALIGN and " # nop"
uau
parents: 25903
diff changeset
19 #define PREFETCH " # nop"
ef4297ed0d12 libvo: change asm syntax to use ASMALIGN and " # nop"
uau
parents: 25903
diff changeset
20 #define PREFETCHW " # nop"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
21 #endif
622
6737025afed0 to be sure in that header is okey
arpi_esp
parents: 326
diff changeset
22
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
23 #ifdef HAVE_3DNOW
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
24 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
25 #define EMMS "femms"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
26 #else
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
27 #define EMMS "emms"
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
28 #endif
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
29
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
30 static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
31 int y;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
32 #if defined(FAST_OSD) && !defined(HAVE_MMX)
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
33 w=w>>1;
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
34 #endif
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
35 #ifdef HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
36 __asm__ volatile(
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
37 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
38 "movq %%mm5, %%mm4\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
39 "movq %%mm5, %%mm7\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
40 "psllw $8, %%mm5\n\t" //FF00FF00FF00
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
41 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
42 ::);
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
43 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
44 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
45 register int x;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
46 #ifdef HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
47 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
48 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
49 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
50 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
51 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
52 for(x=0;x<w;x+=8){
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
53 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
54 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
55 "orl 4%1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
56 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
57 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
58 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
59 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
60 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
61 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
62 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
63 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
64 "movq %1, %%mm2\n\t" //srca HGFEDCBA
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
65 "paddb %%mm7, %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
66 "movq %%mm2, %%mm3\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
67 "pand %%mm4, %%mm2\n\t" //0G0E0C0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
68 "psrlw $8, %%mm3\n\t" //0H0F0D0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
69 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
70 "pmullw %%mm3, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
71 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
72 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
73 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
74 "paddb %2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
75 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
76 "1:\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
77 :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x])
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
78 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
79 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
80 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
81 for(x=0;x<w;x++){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
82 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
83 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
84 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
85 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
86 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
87 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
88 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
89 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
90 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
91 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
92 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
93 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
94 #ifdef HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
95 __asm__ volatile(EMMS:::"memory");
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
96 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
97 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
98 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
99
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
100 static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
101 int y;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
102 #if defined(FAST_OSD) && !defined(HAVE_MMX)
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
103 w=w>>1;
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
104 #endif
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
105 #ifdef HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
106 __asm__ volatile(
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
107 "pxor %%mm7, %%mm7\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
108 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
109 "movq %%mm5, %%mm6\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
110 "movq %%mm5, %%mm4\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
111 "psllw $8, %%mm5\n\t" //FF00FF00FF00
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
112 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
113 ::);
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
114 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
115 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
116 register int x;
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
117 #ifdef HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
118 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
119 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
120 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
121 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
122 ::"m"(*dstbase),"m"(*srca),"m"(*src));
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
123 for(x=0;x<w;x+=4){
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
124 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
125 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
126 "orl %%eax, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
127 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
128 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
129 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
130 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
131 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
132 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
133 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
134 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
135 "paddb %%mm6, %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
136 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
137 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
138 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
139 "pand %%mm5, %%mm1\n\t" //U0V0U0V0
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
140 "movd %2, %%mm2\n\t" //src 0000DCBA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
141 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
142 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
143 "paddb %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
144 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
145 "1:\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
146 :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x])
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
147 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
148 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
149 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
150 for(x=0;x<w;x++){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
151 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
152 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
153 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
154 #else
3431
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
155 if(srca[x]) {
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
156 dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x];
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
157 dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128;
63ecec3bdf93 yuy2 in C color bugfix patch from Artur Zaprzala <artur.zaprzala@talex.com.pl>
michael
parents: 3142
diff changeset
158 }
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
159 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
160 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
161 #endif
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
162 src+=srcstride;
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
163 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
164 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
165 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
166 #ifdef HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
167 __asm__ volatile(EMMS:::"memory");
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
168 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
169 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
170 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
171
12516
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
172 static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
173 int y;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
174 #if defined(FAST_OSD)
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
175 w=w>>1;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
176 #endif
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
177 for(y=0;y<h;y++){
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
178 register int x;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
179 for(x=0;x<w;x++){
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
180 #ifdef FAST_OSD
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
181 if(srca[2*x+0]) dstbase[4*x+2]=src[2*x+0];
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
182 if(srca[2*x+1]) dstbase[4*x+0]=src[2*x+1];
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
183 #else
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
184 if(srca[x]) {
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
185 dstbase[2*x+1]=((dstbase[2*x+1]*srca[x])>>8)+src[x];
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
186 dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
187 }
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
188 #endif
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
189 }
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
190 src+=srcstride;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
191 srca+=srcstride;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
192 dstbase+=dststride;
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
193 }
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
194 }
6f7b5123ac56 draw alpha for uyvy
nplourde
parents: 10516
diff changeset
195
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
196 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
197 int y;
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
198 #ifdef HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
199 __asm__ volatile(
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
200 "pxor %%mm7, %%mm7\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
201 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
202 ::);
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
203 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
204 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
205 register unsigned char *dst = dstbase;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
206 register int x;
21369
9d42ff736ea5 Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents: 20577
diff changeset
207 #if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX))
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
208 #ifdef HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
209 __asm__ volatile(
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
210 PREFETCHW" %0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
211 PREFETCH" %1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
212 PREFETCH" %2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
213 ::"m"(*dst),"m"(*srca),"m"(*src):"memory");
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
214 for(x=0;x<w;x+=2){
2843
5be2017077fb Use new logic suggested by Michael Niedermayer
nick
parents: 2839
diff changeset
215 if(srca[x] || srca[x+1])
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
216 __asm__ volatile(
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
217 PREFETCHW" 32%0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
218 PREFETCH" 32%1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
219 PREFETCH" 32%2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
220 "movq %0, %%mm0\n\t" // dstbase
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
221 "movq %%mm0, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
222 "movq %%mm0, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
223 "punpcklbw %%mm7, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
224 "punpckhbw %%mm7, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
225 "movd %1, %%mm2\n\t" // srca ABCD0000
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
226 "paddb %%mm6, %%mm2\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
227 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
228 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
16483
b47e38d24ddf Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents: 13720
diff changeset
229 "psrlq $8, %%mm2\n\t" // srca AAABBBB0
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
230 "movq %%mm2, %%mm3\n\t"
16483
b47e38d24ddf Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents: 13720
diff changeset
231 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B
b47e38d24ddf Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents: 13720
diff changeset
232 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
233 "pmullw %%mm2, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
234 "pmullw %%mm3, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
235 "psrlw $8, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
236 "psrlw $8, %%mm1\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
237 "packuswb %%mm1, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
238 "movd %2, %%mm2 \n\t" // src ABCD0000
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
239 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
240 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
16483
b47e38d24ddf Fix MMX accelerated RGB24 OSD, fixes "ugly OSD with -vo gl2".
reimar
parents: 13720
diff changeset
241 "psrlq $8, %%mm2\n\t" // src AAABBBB0
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
242 "paddb %%mm2, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
243 "pand %4, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
244 "pand %3, %%mm0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
245 "por %%mm0, %%mm5\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
246 "movq %%mm5, %0\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
247 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh));
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
248 dst += 6;
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
249 }
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
250 #else /* HAVE_MMX */
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
251 for(x=0;x<w;x++){
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
252 if(srca[x]){
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
253 __asm__ volatile(
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
254 "movzbl (%0), %%ecx\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
255 "movzbl 1(%0), %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
256
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
257 "imull %1, %%ecx\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
258 "imull %1, %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
259
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
260 "addl %2, %%ecx\n\t"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
261 "addl %2, %%eax\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
262
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
263 "movb %%ch, (%0)\n\t"
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
264 "movb %%ah, 1(%0)\n\t"
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
265
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
266 "movzbl 2(%0), %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
267 "imull %1, %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
268 "addl %2, %%eax\n\t"
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
269 "movb %%ah, 2(%0)\n\t"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
270 :
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 12516
diff changeset
271 :"D" (dst),
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
272 "r" ((unsigned)srca[x]),
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
273 "r" (((unsigned)src[x])<<8)
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
274 :"%eax", "%ecx"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
275 );
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
276 }
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
277 dst += 3;
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
278 }
5139
473058a6211e workaround lack of -fomit-frame-pointer
michael
parents: 4245
diff changeset
279 #endif /* !HAVE_MMX */
21369
9d42ff736ea5 Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents: 20577
diff changeset
280 #else /*non x86 arch or x86_64 with MMX disabled */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
281 for(x=0;x<w;x++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
282 if(srca[x]){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
283 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
284 dst[0]=dst[1]=dst[2]=src[x];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
285 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
286 dst[0]=((dst[0]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
287 dst[1]=((dst[1]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
288 dst[2]=((dst[2]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
289 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
290 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
291 dst+=3; // 24bpp
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
292 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
293 #endif /* arch_x86 */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
294 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
295 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
296 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
297 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
298 #ifdef HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
299 __asm__ volatile(EMMS:::"memory");
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
300 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
301 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
302 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
303
3142
0f6cce3a8059 runtime cpu detection
michael
parents: 2850
diff changeset
304 static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
305 int y;
9960
9d7477d0d64d big endian yellow fix?
michael
parents: 5139
diff changeset
306 #ifdef WORDS_BIGENDIAN
9d7477d0d64d big endian yellow fix?
michael
parents: 5139
diff changeset
307 dstbase++;
9d7477d0d64d big endian yellow fix?
michael
parents: 5139
diff changeset
308 #endif
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
309 #ifdef HAVE_MMX
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
310 #ifdef HAVE_3DNOW
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
311 __asm__ volatile(
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
312 "pxor %%mm7, %%mm7\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
313 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
314 ::);
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
315 #else /* HAVE_3DNOW */
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
316 __asm__ volatile(
18683
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
317 "pxor %%mm7, %%mm7\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
318 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
319 "movq %%mm5, %%mm4\n\t"
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
320 "psllw $8, %%mm5\n\t" //FF00FF00FF00
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
321 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
322 ::);
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
323 #endif /* HAVE_3DNOW */
d940ecaff286 moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
gpoirier
parents: 16483
diff changeset
324 #endif /* HAVE_MMX */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
325 for(y=0;y<h;y++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
326 register int x;
21369
9d42ff736ea5 Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents: 20577
diff changeset
327 #if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX))
2833
1b6c207c0410 Enable MMX stuff
nick
parents: 2823
diff changeset
328 #ifdef HAVE_MMX
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
329 #ifdef HAVE_3DNOW
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
330 __asm__ volatile(
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
331 PREFETCHW" %0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
332 PREFETCH" %1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
333 PREFETCH" %2\n\t"
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
334 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
335 for(x=0;x<w;x+=2){
2843
5be2017077fb Use new logic suggested by Michael Niedermayer
nick
parents: 2839
diff changeset
336 if(srca[x] || srca[x+1])
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
337 __asm__ volatile(
2835
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
338 PREFETCHW" 32%0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
339 PREFETCH" 32%1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
340 PREFETCH" 32%2\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
341 "movq %0, %%mm0\n\t" // dstbase
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
342 "movq %%mm0, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
343 "punpcklbw %%mm7, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
344 "punpckhbw %%mm7, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
345 "movd %1, %%mm2\n\t" // srca ABCD0000
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
346 "paddb %%mm6, %%mm2\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
347 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
348 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
349 "movq %%mm2, %%mm3\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
350 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
351 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
352 "pmullw %%mm2, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
353 "pmullw %%mm3, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
354 "psrlw $8, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
355 "psrlw $8, %%mm1\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
356 "packuswb %%mm1, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
357 "movd %2, %%mm2 \n\t" // src ABCD0000
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
358 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
359 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
360 "paddb %%mm2, %%mm0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
361 "movq %%mm0, %0\n\t"
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
362 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
86fdf7897315 Minor speedup for K6-2, K7, P3
nick
parents: 2833
diff changeset
363 }
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
364 #else //this is faster for intels crap
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
365 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
366 PREFETCHW" %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
367 PREFETCH" %1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
368 PREFETCH" %2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
369 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
370 for(x=0;x<w;x+=4){
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
371 __asm__ volatile(
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
372 "movl %1, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
373 "orl %%eax, %%eax\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
374 " jz 1f\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
375 PREFETCHW" 32%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
376 PREFETCH" 32%1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
377 PREFETCH" 32%2\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
378 "movq %0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
379 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
380 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
381 "psrlw $8, %%mm1\n\t" //0?0G0?0G
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
382 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
25903
7a1397677cb3 Avoid a MANGLE, there is no register pressure and the generated code
reimar
parents: 21369
diff changeset
383 "paddb %3, %%mm2\n\t"
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
384 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
385 "movq %%mm2, %%mm3\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
386 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
387 "pmullw %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
388 "pmullw %%mm2, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
389 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
390 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
391 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
392 "movd %2, %%mm2 \n\t" //src 0000DCBA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
393 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
394 "movq %%mm2, %%mm6\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
395 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
396 "paddb %%mm2, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
397 "movq %%mm0, %0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
398
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
399 "movq 8%0, %%mm0\n\t" // dstbase
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
400 "movq %%mm0, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
401 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
402 "psrlw $8, %%mm1\n\t" //0?0G0?0G
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
403 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
404 "pmullw %%mm3, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
405 "pmullw %%mm3, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
406 "psrlw $8, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
407 "pand %%mm5, %%mm1\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
408 "por %%mm1, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
409 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
410 "paddb %%mm6, %%mm0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
411 "movq %%mm0, 8%0\n\t"
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
412 "1:\n\t"
25903
7a1397677cb3 Avoid a MANGLE, there is no register pressure and the generated code
reimar
parents: 21369
diff changeset
413 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]), "m" (bFF)
2846
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
414 : "%eax");
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
415 }
ab51228bf3cf p2/p3 bgr32 version (20%faster)
michael
parents: 2843
diff changeset
416 #endif
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
417 #else /* HAVE_MMX */
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
418 for(x=0;x<w;x++){
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
419 if(srca[x]){
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
420 __asm__ volatile(
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
421 "movzbl (%0), %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
422 "movzbl 1(%0), %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
423 "movzbl 2(%0), %%edx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
424
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
425 "imull %1, %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
426 "imull %1, %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
427 "imull %1, %%edx\n\t"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
428
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
429 "addl %2, %%ecx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
430 "addl %2, %%eax\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
431 "addl %2, %%edx\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
432
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
433 "movb %%ch, (%0)\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
434 "movb %%ah, 1(%0)\n\t"
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
435 "movb %%dh, 2(%0)\n\t"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
436
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
437 :
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
438 :"r" (&dstbase[4*x]),
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
439 "r" ((unsigned)srca[x]),
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
440 "r" (((unsigned)src[x])<<8)
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
441 :"%eax", "%ecx", "%edx"
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
442 );
2823
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
443 }
004ee19ebfcf Extract parallelism from OSD stuff + MMX2 optimization.
nick
parents: 2807
diff changeset
444 }
2839
03ccbb72e2e9 Cloning 32 stuff to 24
nick
parents: 2835
diff changeset
445 #endif /* HAVE_MMX */
21369
9d42ff736ea5 Avoid compiling code using %ah etc. on AMD64, since that will not work
reimar
parents: 20577
diff changeset
446 #else /*non x86 arch or x86_64 with MMX disabled */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
447 for(x=0;x<w;x++){
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
448 if(srca[x]){
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
449 #ifdef FAST_OSD
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
450 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x];
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
451 #else
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
452 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
453 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x];
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
454 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x];
947
76fd9463b9d3 FAST_OSD option to disable font outline antialiasing
arpi_esp
parents: 622
diff changeset
455 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
456 }
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
457 }
2798
ee2cd36a81a2 Code cleanup - emms is not required when MMX block is commented out.
nick
parents: 2578
diff changeset
458 #endif /* arch_x86 */
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
459 src+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
460 srca+=srcstride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
461 dstbase+=dststride;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
462 }
2833
1b6c207c0410 Enable MMX stuff
nick
parents: 2823
diff changeset
463 #ifdef HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25973
diff changeset
464 __asm__ volatile(EMMS:::"memory");
2578
d363fde389b5 slow mmx & not so slow asm versions (outcommented)
michael
parents: 947
diff changeset
465 #endif
326
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
466 return;
f6b5c2dbc88e OSD alpha renderers moved to osd.c
arpi_esp
parents:
diff changeset
467 }