comparison libvo/osd.c @ 2839:03ccbb72e2e9

Cloning 32 stuff to 24
author nick
date Sun, 11 Nov 2001 16:09:19 +0000
parents 86fdf7897315
children 5be2017077fb
comparison
equal deleted inserted replaced
2838:3e045cebc446 2839:03ccbb72e2e9
52 dstbase+=dststride; 52 dstbase+=dststride;
53 } 53 }
54 return; 54 return;
55 } 55 }
56 56
57 #ifdef HAVE_MMX
58 static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL;
59 static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL;
60 #endif
57 void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ 61 void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
58 int y; 62 int y;
59 for(y=0;y<h;y++){ 63 for(y=0;y<h;y++){
60 register unsigned char *dst = dstbase; 64 register unsigned char *dst = dstbase;
61 register int x;
62 for(x=0;x<w;x++){
63 if(srca[x]){
64 #ifdef FAST_OSD
65 dst[0]=dst[1]=dst[2]=src[x];
66 #else
67 dst[0]=((dst[0]*srca[x])>>8)+src[x];
68 dst[1]=((dst[1]*srca[x])>>8)+src[x];
69 dst[2]=((dst[2]*srca[x])>>8)+src[x];
70 #endif
71 }
72 dst+=3; // 24bpp
73 }
74 src+=srcstride;
75 srca+=srcstride;
76 dstbase+=dststride;
77 }
78 return;
79 }
80
81 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
82 int y;
83 PROFILE_START();
84 for(y=0;y<h;y++){
85 register int x; 65 register int x;
86 #ifdef ARCH_X86 66 #ifdef ARCH_X86
87 #ifdef HAVE_MMX 67 #ifdef HAVE_MMX
88 asm volatile( 68 asm volatile(
89 PREFETCHW" %0\n\t" 69 PREFETCHW" %0\n\t"
90 PREFETCH" %1\n\t" 70 PREFETCH" %1\n\t"
91 PREFETCH" %2\n\t" 71 PREFETCH" %2\n\t"
92 "pxor %%mm7, %%mm7\n\t" 72 "pxor %%mm7, %%mm7\n\t"
93 "pcmpeqb %%mm6, %%mm6\n\t" // F..F 73 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
94 ::"m"(dstbase),"m"(srca),"m"(src):"memory"); 74 ::"m"(*dst),"m"(*srca),"m"(*src):"memory");
75 for(x=0;x<w;x+=2){
76 asm volatile(
77 PREFETCHW" 32%0\n\t"
78 PREFETCH" 32%1\n\t"
79 PREFETCH" 32%2\n\t"
80 "movq %0, %%mm0\n\t" // dstbase
81 "movq %%mm0, %%mm1\n\t"
82 "movq %%mm0, %%mm5\n\t"
83 "punpcklbw %%mm7, %%mm0\n\t"
84 "punpckhbw %%mm7, %%mm1\n\t"
85 "movd %1, %%mm2\n\t" // srca ABCD0000
86 "paddb %%mm6, %%mm2\n\t"
87 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
88 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
89 "movq %%mm2, %%mm3\n\t"
90 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
91 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
92 "pmullw %%mm2, %%mm0\n\t"
93 "pmullw %%mm3, %%mm1\n\t"
94 "psrlw $8, %%mm0\n\t"
95 "psrlw $8, %%mm1\n\t"
96 "packuswb %%mm1, %%mm0\n\t"
97 "movd %2, %%mm2 \n\t" // src ABCD0000
98 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
99 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
100 "paddb %%mm2, %%mm0\n\t"
101 "pand %4, %%mm5\n\t"
102 "pand %3, %%mm0\n\t"
103 "por %%mm0, %%mm5\n\t"
104 "movq %%mm5, %0\n\t"
105 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh));
106 dst += 6;
107 }
108 #else /* HAVE_MMX */
109 for(x=0;x<w;x++){
110 if(srca[x]){
111 asm volatile(
112 "movzbl (%0), %%ecx\n\t"
113 "movzbl 1(%0), %%eax\n\t"
114 "movzbl 2(%0), %%edx\n\t"
115
116 "imull %1, %%ecx\n\t"
117 "imull %1, %%eax\n\t"
118 "imull %1, %%edx\n\t"
119
120 "addl %2, %%ecx\n\t"
121 "addl %2, %%eax\n\t"
122 "addl %2, %%edx\n\t"
123
124 "movb %%ch, (%0)\n\t"
125 "movb %%ah, 1(%0)\n\t"
126 "movb %%dh, 2(%0)\n\t"
127
128 :
129 :"r" (dst),
130 "r" ((unsigned)srca[x]),
131 "r" (((unsigned)src[x])<<8)
132 :"%eax", "%ecx", "%edx"
133 );
134 }
135 dst += 3;
136 }
137 #endif /* HAVE_MMX */
138 #else /*non x86 arch*/
139 for(x=0;x<w;x++){
140 if(srca[x]){
141 #ifdef FAST_OSD
142 dst[0]=dst[1]=dst[2]=src[x];
143 #else
144 dst[0]=((dst[0]*srca[x])>>8)+src[x];
145 dst[1]=((dst[1]*srca[x])>>8)+src[x];
146 dst[2]=((dst[2]*srca[x])>>8)+src[x];
147 #endif
148 }
149 dst+=3; // 24bpp
150 }
151 #endif /* arch_x86 */
152 src+=srcstride;
153 srca+=srcstride;
154 dstbase+=dststride;
155 }
156 #ifdef HAVE_MMX
157 asm volatile(EMMS:::"memory");
158 #endif
159 return;
160 }
161
162 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
163 int y;
164 PROFILE_START();
165 for(y=0;y<h;y++){
166 register int x;
167 #ifdef ARCH_X86
168 #ifdef HAVE_MMX
169 asm volatile(
170 PREFETCHW" %0\n\t"
171 PREFETCH" %1\n\t"
172 PREFETCH" %2\n\t"
173 "pxor %%mm7, %%mm7\n\t"
174 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
175 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
95 for(x=0;x<w;x+=2){ 176 for(x=0;x<w;x+=2){
96 asm volatile( 177 asm volatile(
97 PREFETCHW" 32%0\n\t" 178 PREFETCHW" 32%0\n\t"
98 PREFETCH" 32%1\n\t" 179 PREFETCH" 32%1\n\t"
99 PREFETCH" 32%2\n\t" 180 PREFETCH" 32%2\n\t"
118 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB 199 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
119 "paddb %%mm2, %%mm0\n\t" 200 "paddb %%mm2, %%mm0\n\t"
120 "movq %%mm0, %0\n\t" 201 "movq %%mm0, %0\n\t"
121 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])); 202 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
122 } 203 }
123 #else /* 0 HAVE_MMX2*/ 204 #else /* HAVE_MMX */
124 for(x=0;x<w;x++){ 205 for(x=0;x<w;x++){
125 if(srca[x]){ 206 if(srca[x]){
126 asm volatile( 207 asm volatile(
127 "movzbl (%0), %%ecx\n\t" 208 "movzbl (%0), %%ecx\n\t"
128 "movzbl 1(%0), %%eax\n\t" 209 "movzbl 1(%0), %%eax\n\t"
146 "r" (((unsigned)src[x])<<8) 227 "r" (((unsigned)src[x])<<8)
147 :"%eax", "%ecx", "%edx" 228 :"%eax", "%ecx", "%edx"
148 ); 229 );
149 } 230 }
150 } 231 }
151 #endif /* 0 HAVE_MMX*/ 232 #endif /* HAVE_MMX */
152 #else /*non x86 arch*/ 233 #else /*non x86 arch*/
153 for(x=0;x<w;x++){ 234 for(x=0;x<w;x++){
154 if(srca[x]){ 235 if(srca[x]){
155 #ifdef FAST_OSD 236 #ifdef FAST_OSD
156 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x]; 237 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x];