Mercurial > mplayer.hg
changeset 2578:d363fde389b5
slow mmx & not so slow asm versions (outcommented)
author | michael |
---|---|
date | Tue, 30 Oct 2001 22:35:02 +0000 |
parents | 1a118523b1e6 |
children | d10f16ef155c |
files | libvo/osd.c libvo/osd_template.c |
diffstat | 2 files changed, 154 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/libvo/osd.c Tue Oct 30 22:27:37 2001 +0000 +++ b/libvo/osd.c Tue Oct 30 22:35:02 2001 +0000 @@ -79,6 +79,76 @@ int y; for(y=0;y<h;y++){ register int x; +// printf("%d, %d, %d\n", (int)src&31, (int)srca%31, (int)dstbase&31); +#ifdef HAVE_MMXFIXME +/* asm( + "pxor %%mm7, %%mm7 \n\t" + "xorl %%eax, %%eax \n\t" + "pcmpeqb %%mm6, %%mm6 \n\t" // F..F + "1: \n\t" + "movq (%0, %%eax, 4), %%mm0 \n\t" // dstbase + "movq %%mm0, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "movd (%1, %%eax), %%mm2 \n\t" // srca ABCD0000 + "paddb %%mm6, %%mm2 \n\t" + "punpcklbw %%mm2, %%mm2 \n\t" // srca AABBCCDD + "punpcklbw %%mm2, %%mm2 \n\t" // srca AAAABBBB + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" // srca 0A0A0A0A + "punpckhbw %%mm7, %%mm3 \n\t" // srca 0B0B0B0B + "pmullw %%mm2, %%mm0 \n\t" + "pmullw %%mm3, %%mm1 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000 + "punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD + "punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB + "paddb %%mm2, %%mm0 \n\t" + "movq %%mm0, (%0, %%eax, 4) \n\t" + "addl $2, %%eax \n\t" + "cmpl %3, %%eax \n\t" + " jb 1b \n\t" + + :: "r" (dstbase), "r" (srca), "r" (src), "r" (w) + : "%eax" + );*/ + asm( + "xorl %%eax, %%eax \n\t" + "xorl %%ebx, %%ebx \n\t" + "xorl %%edx, %%edx \n\t" + "1: \n\t" + "movb (%1, %%eax), %%bl \n\t" + "cmpb $0, %%bl \n\t" + " jz 2f \n\t" + "movzxb (%2, %%eax), %%edx \n\t" + "shll $8, %%edx \n\t" + "decb %%bl \n\t" + "movzxb (%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, (%0, %%eax, 4) \n\t" + + "movzxb 1(%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, 1(%0, %%eax, 4) \n\t" + + "movzxb 2(%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, 2(%0, %%eax, 4) \n\t" + + "2: \n\t" + "addl $1, %%eax \n\t" + "cmpl %3, %%eax \n\t" + " jb 1b \n\t" + + :: "r" (dstbase), "r" (srca), "r" (src), "m" (w) + : "%eax", "%ebx", "%ecx", "%edx" + ); +#else //HAVE_MMX for(x=0;x<w;x++){ if(srca[x]){ #ifdef FAST_OSD @@ -90,10 +160,17 @@ #endif } } +#endif // !HAVE_MMX src+=srcstride; srca+=srcstride; dstbase+=dststride; } +#ifdef HAVE_3DNOW + asm("femms\n\t"); +#elif defined (HAVE_MMX) + asm("emms\n\t"); +#endif + return; }
--- a/libvo/osd_template.c Tue Oct 30 22:27:37 2001 +0000 +++ b/libvo/osd_template.c Tue Oct 30 22:35:02 2001 +0000 @@ -79,6 +79,76 @@ int y; for(y=0;y<h;y++){ register int x; +// printf("%d, %d, %d\n", (int)src&31, (int)srca%31, (int)dstbase&31); +#ifdef HAVE_MMXFIXME +/* asm( + "pxor %%mm7, %%mm7 \n\t" + "xorl %%eax, %%eax \n\t" + "pcmpeqb %%mm6, %%mm6 \n\t" // F..F + "1: \n\t" + "movq (%0, %%eax, 4), %%mm0 \n\t" // dstbase + "movq %%mm0, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "movd (%1, %%eax), %%mm2 \n\t" // srca ABCD0000 + "paddb %%mm6, %%mm2 \n\t" + "punpcklbw %%mm2, %%mm2 \n\t" // srca AABBCCDD + "punpcklbw %%mm2, %%mm2 \n\t" // srca AAAABBBB + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" // srca 0A0A0A0A + "punpckhbw %%mm7, %%mm3 \n\t" // srca 0B0B0B0B + "pmullw %%mm2, %%mm0 \n\t" + "pmullw %%mm3, %%mm1 \n\t" + "psrlw $8, %%mm0 \n\t" + "psrlw $8, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movd (%2, %%eax), %%mm2 \n\t" // src ABCD0000 + "punpcklbw %%mm2, %%mm2 \n\t" // src AABBCCDD + "punpcklbw %%mm2, %%mm2 \n\t" // src AAAABBBB + "paddb %%mm2, %%mm0 \n\t" + "movq %%mm0, (%0, %%eax, 4) \n\t" + "addl $2, %%eax \n\t" + "cmpl %3, %%eax \n\t" + " jb 1b \n\t" + + :: "r" (dstbase), "r" (srca), "r" (src), "r" (w) + : "%eax" + );*/ + asm( + "xorl %%eax, %%eax \n\t" + "xorl %%ebx, %%ebx \n\t" + "xorl %%edx, %%edx \n\t" + "1: \n\t" + "movb (%1, %%eax), %%bl \n\t" + "cmpb $0, %%bl \n\t" + " jz 2f \n\t" + "movzxb (%2, %%eax), %%edx \n\t" + "shll $8, %%edx \n\t" + "decb %%bl \n\t" + "movzxb (%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, (%0, %%eax, 4) \n\t" + + "movzxb 1(%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, 1(%0, %%eax, 4) \n\t" + + "movzxb 2(%0, %%eax, 4), %%ecx \n\t" + "imull %%ebx, %%ecx \n\t" + "addl %%edx, %%ecx \n\t" + "movb %%ch, 2(%0, %%eax, 4) \n\t" + + "2: \n\t" + "addl $1, %%eax \n\t" + "cmpl %3, %%eax \n\t" + " jb 1b \n\t" + + :: "r" (dstbase), "r" (srca), "r" (src), "m" (w) + : "%eax", "%ebx", "%ecx", "%edx" + ); +#else //HAVE_MMX for(x=0;x<w;x++){ if(srca[x]){ #ifdef FAST_OSD @@ -90,10 +160,17 @@ #endif } } +#endif // !HAVE_MMX src+=srcstride; srca+=srcstride; dstbase+=dststride; } +#ifdef HAVE_3DNOW + asm("femms\n\t"); +#elif defined (HAVE_MMX) + asm("emms\n\t"); +#endif + return; }