Mercurial > mplayer.hg
changeset 18683:d940ecaff286
moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop.
Patch by Zuxy Meng < zuxy POIS meng AH gmail POIS com >
Original thread:
Date: Mon, 12 Jun 2006 00:31:53 -0700 (PDT)
Subject [MPlayer-dev-eng] [PATCH] Loop invariant motion in libvo/osd_template.c
author | gpoirier |
---|---|
date | Mon, 12 Jun 2006 11:14:10 +0000 |
parents | bee3186a06f7 |
children | c9de3673e299 |
files | libvo/osd_template.c |
diffstat | 1 files changed, 43 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/libvo/osd_template.c Sun Jun 11 21:29:00 2006 +0000 +++ b/libvo/osd_template.c Mon Jun 12 11:14:10 2006 +0000 @@ -32,6 +32,15 @@ #if defined(FAST_OSD) && !defined(HAVE_MMX) w=w>>1; #endif +#ifdef HAVE_MMX + asm volatile( + "pcmpeqb %%mm5, %%mm5\n\t" // F..F + "movq %%mm5, %%mm4\n\t" + "movq %%mm5, %%mm7\n\t" + "psllw $8, %%mm5\n\t" //FF00FF00FF00 + "psrlw $8, %%mm4\n\t" //00FF00FF00FF + ::); +#endif for(y=0;y<h;y++){ register int x; #ifdef HAVE_MMX @@ -39,11 +48,6 @@ PREFETCHW" %0\n\t" PREFETCH" %1\n\t" PREFETCH" %2\n\t" -// "pxor %%mm7, %%mm7\n\t" - "pcmpeqb %%mm5, %%mm5\n\t" // F..F - "movq %%mm5, %%mm4\n\t" - "psllw $8, %%mm5\n\t" //FF00FF00FF00 - "psrlw $8, %%mm4\n\t" //00FF00FF00FF ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); for(x=0;x<w;x+=8){ asm volatile( @@ -58,7 +62,7 @@ "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y "movq %1, %%mm2\n\t" //srca HGFEDCBA - "paddb "MANGLE(bFF)", %%mm2\n\t" + "paddb %%mm7, %%mm2\n\t" "movq %%mm2, %%mm3\n\t" "pand %%mm4, %%mm2\n\t" //0G0E0C0A "psrlw $8, %%mm3\n\t" //0H0F0D0B @@ -98,6 +102,16 @@ #if defined(FAST_OSD) && !defined(HAVE_MMX) w=w>>1; #endif +#ifdef HAVE_MMX + asm volatile( + "pxor %%mm7, %%mm7\n\t" + "pcmpeqb %%mm5, %%mm5\n\t" // F..F + "movq %%mm5, %%mm6\n\t" + "movq %%mm5, %%mm4\n\t" + "psllw $8, %%mm5\n\t" //FF00FF00FF00 + "psrlw $8, %%mm4\n\t" //00FF00FF00FF + ::); +#endif for(y=0;y<h;y++){ register int x; #ifdef HAVE_MMX @@ -105,11 +119,6 @@ PREFETCHW" %0\n\t" PREFETCH" %1\n\t" PREFETCH" %2\n\t" - "pxor %%mm7, %%mm7\n\t" - "pcmpeqb %%mm5, %%mm5\n\t" // F..F - "movq %%mm5, %%mm4\n\t" - "psllw $8, %%mm5\n\t" //FF00FF00FF00 - "psrlw $8, %%mm4\n\t" //00FF00FF00FF ::"m"(*dstbase),"m"(*srca),"m"(*src)); for(x=0;x<w;x+=4){ asm volatile( @@ -123,7 +132,7 @@ "movq %%mm0, %%mm1\n\t" "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y "movd %%eax, %%mm2\n\t" //srca 0000DCBA - "paddb "MANGLE(bFF)", %%mm2\n\t" + "paddb %%mm6, %%mm2\n\t" "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A "pmullw %%mm2, %%mm0\n\t" "psrlw $8, %%mm0\n\t" @@ -186,6 +195,12 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; +#ifdef HAVE_MMX + asm volatile( + "pxor %%mm7, %%mm7\n\t" + "pcmpeqb %%mm6, %%mm6\n\t" // F..F + ::); +#endif for(y=0;y<h;y++){ register unsigned char *dst = dstbase; register int x; @@ -195,8 +210,6 @@ PREFETCHW" %0\n\t" PREFETCH" %1\n\t" PREFETCH" %2\n\t" - "pxor %%mm7, %%mm7\n\t" - "pcmpeqb %%mm6, %%mm6\n\t" // F..F ::"m"(*dst),"m"(*srca),"m"(*src):"memory"); for(x=0;x<w;x+=2){ if(srca[x] || srca[x+1]) @@ -293,6 +306,22 @@ #ifdef WORDS_BIGENDIAN dstbase++; #endif +#ifdef HAVE_MMX +#ifdef HAVE_3DNOW + asm volatile( + "pxor %%mm7, %%mm7\n\t" + "pcmpeqb %%mm6, %%mm6\n\t" // F..F + ::); +#else /* HAVE_3DNOW */ + asm volatile( + "pxor %%mm7, %%mm7\n\t" + "pcmpeqb %%mm5, %%mm5\n\t" // F..F + "movq %%mm5, %%mm4\n\t" + "psllw $8, %%mm5\n\t" //FF00FF00FF00 + "psrlw $8, %%mm4\n\t" //00FF00FF00FF + ::); +#endif /* HAVE_3DNOW */ +#endif /* HAVE_MMX */ for(y=0;y<h;y++){ register int x; #if defined(ARCH_X86) || defined(ARCH_X86_64) @@ -302,8 +331,6 @@ PREFETCHW" %0\n\t" PREFETCH" %1\n\t" PREFETCH" %2\n\t" - "pxor %%mm7, %%mm7\n\t" - "pcmpeqb %%mm6, %%mm6\n\t" // F..F ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); for(x=0;x<w;x+=2){ if(srca[x] || srca[x+1]) @@ -339,11 +366,6 @@ PREFETCHW" %0\n\t" PREFETCH" %1\n\t" PREFETCH" %2\n\t" - "pxor %%mm7, %%mm7\n\t" - "pcmpeqb %%mm5, %%mm5\n\t" // F..F - "movq %%mm5, %%mm4\n\t" - "psllw $8, %%mm5\n\t" //FF00FF00FF00 - "psrlw $8, %%mm4\n\t" //00FF00FF00FF ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); for(x=0;x<w;x+=4){ asm volatile(