Mercurial > mplayer.hg
changeset 2511:6db23dd30242
mmx, mmx2, 3dnow optimized 24to32
author | nick |
---|---|
date | Sun, 28 Oct 2001 12:02:16 +0000 |
parents | 42e1ae2c8f5f |
children | 8899131fae4d |
files | postproc/rgb2rgb.c postproc/rgb2rgb_template.c |
diffstat | 2 files changed, 72 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/postproc/rgb2rgb.c Sun Oct 28 11:48:50 2001 +0000 +++ b/postproc/rgb2rgb.c Sun Oct 28 12:02:16 2001 +0000 @@ -3,6 +3,26 @@ #include "rgb2rgb.h" #include "mmx.h" +#ifdef HAVE_3DNOW +#define PREFETCH "prefetch" +#define PREFETCHW "prefetchw" +#elif HAVE_MMX2 +#define PREFETCH "prefetchnta" +#define PREFETCHW "prefetcht0" +#endif + +#ifdef HAVE_3DNOW +#define EMMS "femms" +#else +#define EMMS "emms" +#endif + +#ifdef HAVE_MMX2 +#define MOVNTQ "movntq" +#else +#define MOVNTQ "movq" +#endif + void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) { uint8_t *dest = dst; @@ -14,10 +34,23 @@ #endif end = s + src_size; #ifdef HAVE_MMX +#ifdef PREFETCH + __asm __volatile( + PREFETCH" %0\n\t" + PREFETCH" 64%0\n\t" + PREFETCHW" %1\n\t" + PREFETCHW" 64%1\n\t"::"m"(*s),"m"(*dest):"memory"); +#endif mm_end = (uint8_t*)((((unsigned long)end)/16)*16); __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); while(s < mm_end) { +#ifdef PREFETCH + __asm __volatile( + PREFETCH" 128%0\n\t" + PREFETCHW" 128%1" + ::"m"(*s),"m"(*dest):"memory"); +#endif __asm __volatile( "movd %1, %%mm0\n\t" "movd 3%1, %%mm1\n\t" @@ -27,14 +60,15 @@ "punpckldq %%mm3, %%mm2\n\t" "pand %%mm7, %%mm0\n\t" "pand %%mm7, %%mm2\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm2, 8%0" + MOVNTQ" %%mm0, %0\n\t" + MOVNTQ" %%mm2, 8%0" :"=m"(*dest) :"m"(*s) :"memory"); dest += 16; s += 12; } + __asm __volatile(EMMS:::"memory"); #endif while(s < end) {
--- a/postproc/rgb2rgb_template.c Sun Oct 28 11:48:50 2001 +0000 +++ b/postproc/rgb2rgb_template.c Sun Oct 28 12:02:16 2001 +0000 @@ -3,6 +3,26 @@ #include "rgb2rgb.h" #include "mmx.h" +#ifdef HAVE_3DNOW +#define PREFETCH "prefetch" +#define PREFETCHW "prefetchw" +#elif HAVE_MMX2 +#define PREFETCH "prefetchnta" +#define PREFETCHW "prefetcht0" +#endif + +#ifdef HAVE_3DNOW +#define EMMS "femms" +#else +#define EMMS "emms" +#endif + +#ifdef HAVE_MMX2 +#define MOVNTQ "movntq" +#else +#define MOVNTQ "movq" +#endif + void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) { uint8_t *dest = dst; @@ -14,10 +34,23 @@ #endif end = s + src_size; #ifdef HAVE_MMX +#ifdef PREFETCH + __asm __volatile( + PREFETCH" %0\n\t" + PREFETCH" 64%0\n\t" + PREFETCHW" %1\n\t" + PREFETCHW" 64%1\n\t"::"m"(*s),"m"(*dest):"memory"); +#endif mm_end = (uint8_t*)((((unsigned long)end)/16)*16); __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); while(s < mm_end) { +#ifdef PREFETCH + __asm __volatile( + PREFETCH" 128%0\n\t" + PREFETCHW" 128%1" + ::"m"(*s),"m"(*dest):"memory"); +#endif __asm __volatile( "movd %1, %%mm0\n\t" "movd 3%1, %%mm1\n\t" @@ -27,14 +60,15 @@ "punpckldq %%mm3, %%mm2\n\t" "pand %%mm7, %%mm0\n\t" "pand %%mm7, %%mm2\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm2, 8%0" + MOVNTQ" %%mm0, %0\n\t" + MOVNTQ" %%mm2, 8%0" :"=m"(*dest) :"m"(*s) :"memory"); dest += 16; s += 12; } + __asm __volatile(EMMS:::"memory"); #endif while(s < end) {