# HG changeset patch # User michael # Date 1006809135 0 # Node ID 0f6cce3a8059553084f67f7b47e39b0e56ec6bd1 # Parent 21b6aab15ec9c591b2914c9c60be7230f35c1849 runtime cpu detection diff -r 21b6aab15ec9 -r 0f6cce3a8059 libvo/osd.c --- a/libvo/osd.c Mon Nov 26 18:53:32 2001 +0000 +++ b/libvo/osd.c Mon Nov 26 21:12:15 2001 +0000 @@ -1,414 +1,140 @@ // Generic alpha renderers for all YUV modes and RGB depths. // These are "reference implementations", should be optimized later (MMX, etc) -// Optimized by Nick and Michael +// Templating Code from Michael Niedermayer (michaelni@gmx.at) is under GPL //#define FAST_OSD //#define FAST_OSD_TABLE #include "config.h" #include "osd.h" -#include "../mmx_defs.h" //#define ENABLE_PROFILE #include "../my_profile.h" #include +#include "../cpudetect.h" -#ifdef HAVE_MMX -static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; +extern int verbose; // defined in mplayer.c + +#ifdef ARCH_X86 +#define CAN_COMPILE_X86_ASM #endif -void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; -#if defined(FAST_OSD) && !defined(HAVE_MMX) - w=w>>1; -#endif -PROFILE_START(); - for(y=0;y>8)+src[x]; -#endif - } -#endif - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } -#ifdef HAVE_MMX - asm volatile(EMMS:::"memory"); -#endif -PROFILE_END("vo_draw_alpha_yv12"); - return; -} - -void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; -#if defined(FAST_OSD) && !defined(HAVE_MMX) - w=w>>1; -#endif -PROFILE_START(); - for(y=0;y>8)+src[x]; -#endif - } -#endif - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } -#ifdef HAVE_MMX - asm volatile(EMMS:::"memory"); -#endif -PROFILE_END("vo_draw_alpha_yuy2"); - return; -} - -#ifdef HAVE_MMX +#ifdef CAN_COMPILE_X86_ASM +static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL; static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL; #endif + +//Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one +//Plain C versions +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_3DNOW +#undef ARCH_X86 +#define RENAME(a) a ## _C +#include "osd_template.c" + +#ifdef CAN_COMPILE_X86_ASM + +//X86 noMMX versions +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_3DNOW +#define ARCH_X86 +#define RENAME(a) a ## _X86 +#include "osd_template.c" + +//MMX versions +#undef RENAME +#define HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_3DNOW +#define ARCH_X86 +#define RENAME(a) a ## _MMX +#include "osd_template.c" + +//MMX2 versions +#undef RENAME +#define HAVE_MMX +#define HAVE_MMX2 +#undef HAVE_3DNOW +#define ARCH_X86 +#define RENAME(a) a ## _MMX2 +#include "osd_template.c" + +//3DNOW versions +#undef RENAME +#define HAVE_MMX +#undef HAVE_MMX2 +#define HAVE_3DNOW +#define ARCH_X86 +#define RENAME(a) a ## _3DNow +#include "osd_template.c" + +#endif //CAN_COMPILE_X86_ASM + +void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +#ifdef CAN_COMPILE_X86_ASM + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.has3DNow) + vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.hasMMX) + vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride); + else + vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +} + +void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +#ifdef CAN_COMPILE_X86_ASM + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.has3DNow) + vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.hasMMX) + vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride); + else + vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +} + void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; - for(y=0;y>8)+src[x]; - dst[1]=((dst[1]*srca[x])>>8)+src[x]; - dst[2]=((dst[2]*srca[x])>>8)+src[x]; + vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride); #endif - } - dst+=3; // 24bpp - } -#endif /* arch_x86 */ - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } -#ifdef HAVE_MMX - asm volatile(EMMS:::"memory"); -#endif - return; } void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; -PROFILE_START(); - for(y=0;y>8)+src[x]; - dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; - dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; -#endif - } - } -#endif /* arch_x86 */ - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } -#ifdef HAVE_MMX - asm volatile(EMMS:::"memory"); -#endif -PROFILE_END("vo_draw_alpha_rgb32"); - return; } #ifdef FAST_OSD_TABLE @@ -424,6 +150,23 @@ fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3); } #endif +//FIXME the optimized stuff is a lie for 15/16bpp as they arent optimized yet + if(verbose) + { +#ifdef CAN_COMPILE_X86_ASM + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + printf("Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n"); + else if(gCpuCaps.has3DNow) + printf("Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n"); + else if(gCpuCaps.hasMMX) + printf("Using MMX Optimized OnScreenDisplay\n"); + else + printf("Using X86 Optimized OnScreenDisplay\n"); +#else + printf("Using Unoptimized OnScreenDisplay\n"); +#endif + } } void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ diff -r 21b6aab15ec9 -r 0f6cce3a8059 libvo/osd_template.c --- a/libvo/osd_template.c Mon Nov 26 18:53:32 2001 +0000 +++ b/libvo/osd_template.c Mon Nov 26 21:12:15 2001 +0000 @@ -1,22 +1,33 @@ // Generic alpha renderers for all YUV modes and RGB depths. -// These are "reference implementations", should be optimized later (MMX, etc) // Optimized by Nick and Michael +// Code from Michael Niedermayer (michaelni@gmx.at) is under GPL -//#define FAST_OSD -//#define FAST_OSD_TABLE +#undef PREFETCH +#undef EMMS +#undef PREFETCHW +#undef PAVGB -#include "config.h" -#include "osd.h" -#include "../mmx_defs.h" -//#define ENABLE_PROFILE -#include "../my_profile.h" -#include - -#ifdef HAVE_MMX -static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; +#ifdef HAVE_3DNOW +#define PREFETCH "prefetch" +#define PREFETCHW "prefetchw" +#define PAVGB "pavgusb" +#elif defined ( HAVE_MMX2 ) +#define PREFETCH "prefetchnta" +#define PREFETCHW "prefetcht0" +#define PAVGB "pavgb" +#else +#define PREFETCH "/nop" +#define PREFETCHW "/nop" #endif -void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +#ifdef HAVE_3DNOW +/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ +#define EMMS "femms" +#else +#define EMMS "emms" +#endif + +static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; #if defined(FAST_OSD) && !defined(HAVE_MMX) w=w>>1; @@ -84,7 +95,7 @@ return; } -void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; #if defined(FAST_OSD) && !defined(HAVE_MMX) w=w>>1; @@ -150,11 +161,7 @@ return; } -#ifdef HAVE_MMX -static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL; -static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL; -#endif -void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ int y; for(y=0;y>3)<<10)|((i>>3)<<5)|(i>>3); - fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3); - } -#endif -} - -void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; - for(y=0;y>3; - dst[x]=(a<<10)|(a<<5)|a; -#endif -#else - unsigned char r=dst[x]&0x1F; - unsigned char g=(dst[x]>>5)&0x1F; - unsigned char b=(dst[x]>>10)&0x1F; - r=(((r*srca[x])>>5)+src[x])>>3; - g=(((g*srca[x])>>5)+src[x])>>3; - b=(((b*srca[x])>>5)+src[x])>>3; - dst[x]=(b<<10)|(g<<5)|r; -#endif - } - } - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } - return; -} - -void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; - for(y=0;y>3)<<11)|((src[x]>>2)<<5)|(src[x]>>3); -#endif -#else - unsigned char r=dst[x]&0x1F; - unsigned char g=(dst[x]>>5)&0x3F; - unsigned char b=(dst[x]>>11)&0x1F; - r=(((r*srca[x])>>5)+src[x])>>3; - g=(((g*srca[x])>>6)+src[x])>>2; - b=(((b*srca[x])>>5)+src[x])>>3; - dst[x]=(b<<11)|(g<<5)|r; -#endif - } - } - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } - return; -} -