comparison libvo/osd.c @ 2833:1b6c207c0410

Enable MMX stuff I don't know why Michael Niedermayer disabled it but: my benchmarks of 25 frames for TIMER+SUBTITLE (measured by RDTSC): Non-MMX stuff: total=306142159 MMX stuff: total=159534150
author nick
date Sun, 11 Nov 2001 14:42:10 +0000
parents 004ee19ebfcf
children 86fdf7897315
comparison
equal deleted inserted replaced
2832:99d169b25fbf 2833:1b6c207c0410
5 //#define FAST_OSD_TABLE 5 //#define FAST_OSD_TABLE
6 6
7 #include "config.h" 7 #include "config.h"
8 #include "osd.h" 8 #include "osd.h"
9 #include "../mmx_defs.h" 9 #include "../mmx_defs.h"
10 //#define ENABLE_PROFILE
11 #include "../my_profile.h"
10 12
11 void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ 13 void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
12 int y; 14 int y;
13 #ifdef FAST_OSD 15 #ifdef FAST_OSD
14 w=w>>1; 16 w=w>>1;
74 dstbase+=dststride; 76 dstbase+=dststride;
75 } 77 }
76 return; 78 return;
77 } 79 }
78 80
79 #ifdef PROFILE_ME
80 static inline unsigned long long int read_tsc( void )
81 {
82 unsigned long long int retval;
83 __asm __volatile ("rdtsc":"=A"(retval)::"memory");
84 return retval;
85 }
86 #endif
87
88 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ 81 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
89 int y; 82 int y;
90 #ifdef PROFILE_ME 83 PROFILE_START();
91 unsigned long long v1,v2;
92 v1 = read_tsc();
93 #endif
94 for(y=0;y<h;y++){ 84 for(y=0;y<h;y++){
95 register int x; 85 register int x;
96 #ifdef ARCH_X86 86 #ifdef ARCH_X86
97 #ifdef HAVE_MMX2 87 #ifdef HAVE_MMX
98 asm volatile( 88 asm volatile(
99 "pxor %%mm7, %%mm7 \n\t" 89 "pxor %%mm7, %%mm7 \n\t"
100 "xorl %%eax, %%eax \n\t" 90 "xorl %%eax, %%eax \n\t"
101 "pcmpeqb %%mm6, %%mm6 \n\t" // F..F 91 "pcmpeqb %%mm6, %%mm6 \n\t" // F..F
102 ".balign 16\n\t" 92 ".balign 16\n\t"
119 "packuswb %%mm1, %%mm0 \n\t" 109 "packuswb %%mm1, %%mm0 \n\t"
120 "movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000 110 "movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000
121 "punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD 111 "punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD
122 "punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB 112 "punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB
123 "paddb %%mm2, %%mm0 \n\t" 113 "paddb %%mm2, %%mm0 \n\t"
124 MOVNTQ" %%mm0, (%0, %%eax, 4) \n\t" 114 "movq %%mm0, (%0, %%eax, 4) \n\t"
125 "addl $2, %%eax \n\t" 115 "addl $2, %%eax \n\t"
126 "cmpl %3, %%eax \n\t" 116 "cmpl %3, %%eax \n\t"
127 " jb 1b \n\t" 117 " jb 1b \n\t"
128 118
129 :: "r" (dstbase), "r" (srca), "r" (src), "r" (w) 119 :: "r" (dstbase), "r" (srca), "r" (src), "r" (w)
173 #endif /* arch_x86 */ 163 #endif /* arch_x86 */
174 src+=srcstride; 164 src+=srcstride;
175 srca+=srcstride; 165 srca+=srcstride;
176 dstbase+=dststride; 166 dstbase+=dststride;
177 } 167 }
178 #ifdef HAVE_MMX2 168 #ifdef HAVE_MMX
179 asm volatile(SFENCE:::"memory");
180 asm volatile(EMMS:::"memory"); 169 asm volatile(EMMS:::"memory");
181 #endif 170 #endif
182 #ifdef PROFILE_ME 171 PROFILE_END("vo_draw_alpha_rgb32");
183 v2 = read_tsc();
184 printf("rd_tsc: %llu\n\t",v2-v1);
185 #endif
186 return; 172 return;
187 } 173 }
188 174
189 #ifdef FAST_OSD_TABLE 175 #ifdef FAST_OSD_TABLE
190 static unsigned short fast_osd_15bpp_table[256]; 176 static unsigned short fast_osd_15bpp_table[256];