comparison libvo/osd.c @ 2798:ee2cd36a81a2

Code cleanup - emms is not required when MMX block is commented out. Special notes for Michael Niedermayer: Are you still here? If you don't like for(cond;cond;cond) C-constructions and prefer asm ones: "jb 1b" then use .align 16 pseudo assembler instructions else loops rather will be not aligned on correct boundary. (16 it's for K7 for pent should be 8). Your parts have a lot such lacks.
author nick
date Sat, 10 Nov 2001 18:40:49 +0000
parents d363fde389b5
children 7847d6b7ad3d
comparison
equal deleted inserted replaced
2797:4c9966c2b2ed 2798:ee2cd36a81a2
4 //#define FAST_OSD 4 //#define FAST_OSD
5 //#define FAST_OSD_TABLE 5 //#define FAST_OSD_TABLE
6 6
7 #include "config.h" 7 #include "config.h"
8 #include "osd.h" 8 #include "osd.h"
9 #include "../mmx_defs.h"
9 10
10 void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ 11 void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
11 int y; 12 int y;
12 #ifdef FAST_OSD 13 #ifdef FAST_OSD
13 w=w>>1; 14 w=w>>1;
77 78
78 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ 79 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
79 int y; 80 int y;
80 for(y=0;y<h;y++){ 81 for(y=0;y<h;y++){
81 register int x; 82 register int x;
82 // printf("%d, %d, %d\n", (int)src&31, (int)srca%31, (int)dstbase&31); 83 #ifdef ARCH_X86
83 #ifdef HAVE_MMXFIXME 84 #if 0 /*def HAVE_MMX2*/
84 /* asm( 85 asm volatile(
85 "pxor %%mm7, %%mm7 \n\t" 86 "pxor %%mm7, %%mm7 \n\t"
86 "xorl %%eax, %%eax \n\t" 87 "xorl %%eax, %%eax \n\t"
87 "pcmpeqb %%mm6, %%mm6 \n\t" // F..F 88 "pcmpeqb %%mm6, %%mm6 \n\t" // F..F
89 ".align 16\n\t"
88 "1: \n\t" 90 "1: \n\t"
89 "movq (%0, %%eax, 4), %%mm0 \n\t" // dstbase 91 "movq (%0, %%eax, 4), %%mm0 \n\t" // dstbase
90 "movq %%mm0, %%mm1 \n\t" 92 "movq %%mm0, %%mm1 \n\t"
91 "punpcklbw %%mm7, %%mm0 \n\t" 93 "punpcklbw %%mm7, %%mm0 \n\t"
92 "punpckhbw %%mm7, %%mm1 \n\t" 94 "punpckhbw %%mm7, %%mm1 \n\t"
104 "packuswb %%mm1, %%mm0 \n\t" 106 "packuswb %%mm1, %%mm0 \n\t"
105 "movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000 107 "movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000
106 "punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD 108 "punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD
107 "punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB 109 "punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB
108 "paddb %%mm2, %%mm0 \n\t" 110 "paddb %%mm2, %%mm0 \n\t"
109 "movq %%mm0, (%0, %%eax, 4) \n\t" 111 MOVNTQ" %%mm0, (%0, %%eax, 4) \n\t"
110 "addl $2, %%eax \n\t" 112 "addl $2, %%eax \n\t"
111 "cmpl %3, %%eax \n\t" 113 "cmpl %3, %%eax \n\t"
112 " jb 1b \n\t" 114 " jb 1b \n\t"
113 115
114 :: "r" (dstbase), "r" (srca), "r" (src), "r" (w) 116 :: "r" (dstbase), "r" (srca), "r" (src), "r" (w)
115 : "%eax" 117 : "%eax"
116 );*/ 118 );
117 asm( 119 #else /* 0 HAVE_MMX2*/
120 asm volatile(
118 "xorl %%eax, %%eax \n\t" 121 "xorl %%eax, %%eax \n\t"
119 "xorl %%ebx, %%ebx \n\t" 122 "xorl %%ebx, %%ebx \n\t"
120 "xorl %%edx, %%edx \n\t" 123 "xorl %%edx, %%edx \n\t"
124 ".align 16\n\t"
121 "1: \n\t" 125 "1: \n\t"
122 "movb (%1, %%eax), %%bl \n\t" 126 "movb (%1, %%eax), %%bl \n\t"
123 "cmpb $0, %%bl \n\t" 127 "cmpb $0, %%bl \n\t"
124 " jz 2f \n\t" 128 " jz 2f \n\t"
125 "movzxb (%2, %%eax), %%edx \n\t" 129 "movzxb (%2, %%eax), %%edx \n\t"
146 " jb 1b \n\t" 150 " jb 1b \n\t"
147 151
148 :: "r" (dstbase), "r" (srca), "r" (src), "m" (w) 152 :: "r" (dstbase), "r" (srca), "r" (src), "m" (w)
149 : "%eax", "%ebx", "%ecx", "%edx" 153 : "%eax", "%ebx", "%ecx", "%edx"
150 ); 154 );
151 #else //HAVE_MMX 155 #endif /* 0 HAVE_MMX*/
156 #else /*non x86 arch*/
152 for(x=0;x<w;x++){ 157 for(x=0;x<w;x++){
153 if(srca[x]){ 158 if(srca[x]){
154 #ifdef FAST_OSD 159 #ifdef FAST_OSD
155 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x]; 160 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x];
156 #else 161 #else
158 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; 163 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x];
159 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; 164 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x];
160 #endif 165 #endif
161 } 166 }
162 } 167 }
163 #endif // !HAVE_MMX 168 #endif /* arch_x86 */
164 src+=srcstride; 169 src+=srcstride;
165 srca+=srcstride; 170 srca+=srcstride;
166 dstbase+=dststride; 171 dstbase+=dststride;
167 } 172 }
168 #ifdef HAVE_3DNOW 173 #if 0 /*def HAVE_MMX2*/
169 asm("femms\n\t"); 174 asm volatile(SFENCE:::"memory");
170 #elif defined (HAVE_MMX) 175 asm volatile(EMMS:::"memory");
171 asm("emms\n\t"); 176 #endif
172 #endif
173
174 return; 177 return;
175 } 178 }
176 179
177 #ifdef FAST_OSD_TABLE 180 #ifdef FAST_OSD_TABLE
178 static unsigned short fast_osd_15bpp_table[256]; 181 static unsigned short fast_osd_15bpp_table[256];