Mercurial > mplayer.hg
changeset 2823:004ee19ebfcf
Extract parallelism from OSD stuff + MMX2 optimization.
I've found that mplayer's measuring is not precise :(
Here my test with using RDTSC:
Old stuff:
rd_tsc: 774377
rd_tsc: 765985
rd_tsc: 265309
New CPU optimized stuff:
rd_tsc: 661154
rd_tsc: 641317
rd_tsc: 222448
New MMX2 optimized stuff:
rd_tsc: 269544
rd_tsc: 329189
rd_tsc: 173110
author | nick |
---|---|
date | Sun, 11 Nov 2001 11:18:50 +0000 |
parents | 7679d983c52f |
children | 2f7f02fa1199 |
files | libvo/osd.c libvo/osd_template.c |
diffstat | 2 files changed, 86 insertions(+), 68 deletions(-) [+] |
line wrap: on
line diff
--- a/libvo/osd.c Sun Nov 11 04:31:59 2001 +0000 +++ b/libvo/osd.c Sun Nov 11 11:18:50 2001 +0000 @@ -76,12 +76,25 @@ return; } +#ifdef PROFILE_ME +static inline unsigned long long int read_tsc( void ) +{ + unsigned long long int retval; + __asm __volatile ("rdtsc":"=A"(retval)::"memory"); + return retval; +} +#endif + void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; +#ifdef PROFILE_ME +unsigned long long v1,v2; +v1 = read_tsc(); +#endif for(y=0;y<h;y++){ register int x; #ifdef ARCH_X86 -#if 0 /*def HAVE_MMX2*/ +#ifdef HAVE_MMX2 asm volatile( "pxor %%mm7, %%mm7 \n\t" "xorl %%eax, %%eax \n\t" @@ -117,41 +130,33 @@ : "%eax" ); #else /* 0 HAVE_MMX2*/ - asm volatile( - "xorl %%eax, %%eax \n\t" - "xorl %%ebx, %%ebx \n\t" - "xorl %%edx, %%edx \n\t" - ".balign 16\n\t" - "1: \n\t" - "movb (%1, %%eax), %%bl \n\t" - "cmpb $0, %%bl \n\t" - " jz 2f \n\t" - "movzbl (%2, %%eax), %%edx \n\t" - "shll $8, %%edx \n\t" - "decb %%bl \n\t" - "movzbl (%0, %%eax, 4), %%ecx \n\t" - "imull %%ebx, %%ecx \n\t" - "addl %%edx, %%ecx \n\t" - "movb %%ch, (%0, %%eax, 4) \n\t" + for(x=0;x<w;x++){ + if(srca[x]){ + asm volatile( + "movzbl (%0), %%ecx\n\t" + "movzbl 1(%0), %%eax\n\t" + "movzbl 2(%0), %%edx\n\t" + + "imull %1, %%ecx\n\t" + "imull %1, %%eax\n\t" + "imull %1, %%edx\n\t" - "movzbl 1(%0, %%eax, 4), %%ecx \n\t" - "imull %%ebx, %%ecx \n\t" - "addl %%edx, %%ecx \n\t" - "movb %%ch, 1(%0, %%eax, 4) \n\t" + "addl %2, %%ecx\n\t" + "addl %2, %%eax\n\t" + "addl %2, %%edx\n\t" + + "movb %%ch, (%0)\n\t" + "movb %%ah, 1(%0)\n\t" + "movb %%dh, 2(%0)\n\t" - "movzbl 2(%0, %%eax, 4), %%ecx \n\t" - "imull %%ebx, %%ecx \n\t" - "addl %%edx, %%ecx \n\t" - "movb %%ch, 2(%0, %%eax, 4) \n\t" - - "2: \n\t" - "addl $1, %%eax \n\t" - "cmpl %3, %%eax \n\t" - " jb 1b \n\t" - - :: "r" (dstbase), "r" (srca), "r" (src), "m" (w) - : "%eax", "%ebx", "%ecx", "%edx" + : + :"r" (&dstbase[4*x]), + "r" ((unsigned)srca[x]), + "r" (((unsigned)src[x])<<8) + :"%eax", "%ecx", "%edx" ); + } + } #endif /* 0 HAVE_MMX*/ #else /*non x86 arch*/ for(x=0;x<w;x++){ @@ -170,10 +175,14 @@ srca+=srcstride; dstbase+=dststride; } -#if 0 /*def HAVE_MMX2*/ +#ifdef HAVE_MMX2 asm volatile(SFENCE:::"memory"); asm volatile(EMMS:::"memory"); #endif +#ifdef PROFILE_ME +v2 = read_tsc(); +printf("rd_tsc: %llu\n\t",v2-v1); +#endif return; }
--- a/libvo/osd_template.c Sun Nov 11 04:31:59 2001 +0000 +++ b/libvo/osd_template.c Sun Nov 11 11:18:50 2001 +0000 @@ -76,12 +76,25 @@ return; } +#ifdef PROFILE_ME +static inline unsigned long long int read_tsc( void ) +{ + unsigned long long int retval; + __asm __volatile ("rdtsc":"=A"(retval)::"memory"); + return retval; +} +#endif + void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; +#ifdef PROFILE_ME +unsigned long long v1,v2; +v1 = read_tsc(); +#endif for(y=0;y<h;y++){ register int x; #ifdef ARCH_X86 -#if 0 /*def HAVE_MMX2*/ +#ifdef HAVE_MMX2 asm volatile( "pxor %%mm7, %%mm7 \n\t" "xorl %%eax, %%eax \n\t" @@ -117,41 +130,33 @@ : "%eax" ); #else /* 0 HAVE_MMX2*/ - asm volatile( - "xorl %%eax, %%eax \n\t" - "xorl %%ebx, %%ebx \n\t" - "xorl %%edx, %%edx \n\t" - ".balign 16\n\t" - "1: \n\t" - "movb (%1, %%eax), %%bl \n\t" - "cmpb $0, %%bl \n\t" - " jz 2f \n\t" - "movzbl (%2, %%eax), %%edx \n\t" - "shll $8, %%edx \n\t" - "decb %%bl \n\t" - "movzbl (%0, %%eax, 4), %%ecx \n\t" - "imull %%ebx, %%ecx \n\t" - "addl %%edx, %%ecx \n\t" - "movb %%ch, (%0, %%eax, 4) \n\t" + for(x=0;x<w;x++){ + if(srca[x]){ + asm volatile( + "movzbl (%0), %%ecx\n\t" + "movzbl 1(%0), %%eax\n\t" + "movzbl 2(%0), %%edx\n\t" + + "imull %1, %%ecx\n\t" + "imull %1, %%eax\n\t" + "imull %1, %%edx\n\t" - "movzbl 1(%0, %%eax, 4), %%ecx \n\t" - "imull %%ebx, %%ecx \n\t" - "addl %%edx, %%ecx \n\t" - "movb %%ch, 1(%0, %%eax, 4) \n\t" + "addl %2, %%ecx\n\t" + "addl %2, %%eax\n\t" + "addl %2, %%edx\n\t" + + "movb %%ch, (%0)\n\t" + "movb %%ah, 1(%0)\n\t" + "movb %%dh, 2(%0)\n\t" - "movzbl 2(%0, %%eax, 4), %%ecx \n\t" - "imull %%ebx, %%ecx \n\t" - "addl %%edx, %%ecx \n\t" - "movb %%ch, 2(%0, %%eax, 4) \n\t" - - "2: \n\t" - "addl $1, %%eax \n\t" - "cmpl %3, %%eax \n\t" - " jb 1b \n\t" - - :: "r" (dstbase), "r" (srca), "r" (src), "m" (w) - : "%eax", "%ebx", "%ecx", "%edx" + : + :"r" (&dstbase[4*x]), + "r" ((unsigned)srca[x]), + "r" (((unsigned)src[x])<<8) + :"%eax", "%ecx", "%edx" ); + } + } #endif /* 0 HAVE_MMX*/ #else /*non x86 arch*/ for(x=0;x<w;x++){ @@ -170,10 +175,14 @@ srca+=srcstride; dstbase+=dststride; } -#if 0 /*def HAVE_MMX2*/ +#ifdef HAVE_MMX2 asm volatile(SFENCE:::"memory"); asm volatile(EMMS:::"memory"); #endif +#ifdef PROFILE_ME +v2 = read_tsc(); +printf("rd_tsc: %llu\n\t",v2-v1); +#endif return; }