Mercurial > mplayer.hg
changeset 32469:3fef2e17a03f
Move osd.[ch] and osd_template.c from libvo to sub.
author | cigaes |
---|---|
date | Wed, 27 Oct 2010 17:53:24 +0000 |
parents | 0c7c4ed0b7eb |
children | 139876e79725 |
files | Makefile libmenu/menu.c libmpcodecs/vf_expand.c libvo/osd.c libvo/osd.h libvo/osd_template.c libvo/vesa_lvo.c libvo/video_out_internal.h libvo/vosub_vidix.c sub/osd.c sub/osd.h sub/osd_template.c |
diffstat | 12 files changed, 955 insertions(+), 955 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Wed Oct 27 17:52:45 2010 +0000 +++ b/Makefile Wed Oct 27 17:53:24 2010 +0000 @@ -504,7 +504,6 @@ libmpdemux/video.c \ libmpdemux/yuv4mpeg.c \ libmpdemux/yuv4mpeg_ratio.c \ - libvo/osd.c \ osdep/$(GETCH) \ osdep/$(TIMER) \ stream/open.c \ @@ -517,6 +516,7 @@ stream/url.c \ sub/eosd.c \ sub/find_sub.c \ + sub/osd.c \ sub/spudec.c \ sub/sub.c \ sub/sub_cc.c \
--- a/libmenu/menu.c Wed Oct 27 17:52:45 2010 +0000 +++ b/libmenu/menu.c Wed Oct 27 17:53:24 2010 +0000 @@ -26,7 +26,7 @@ #include <fcntl.h> #include <unistd.h> -#include "libvo/osd.h" +#include "sub/osd.h" #include "sub/font_load.h" #include "sub/sub.h" #include "osdep/keycodes.h"
--- a/libmpcodecs/vf_expand.c Wed Oct 27 17:52:45 2010 +0000 +++ b/libmpcodecs/vf_expand.c Wed Oct 27 17:53:24 2010 +0000 @@ -36,7 +36,7 @@ #ifdef OSD_SUPPORT #include "sub/sub.h" -#include "libvo/osd.h" +#include "sub/osd.h" #endif #include "m_option.h"
--- a/libvo/osd.c Wed Oct 27 17:52:45 2010 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,428 +0,0 @@ -/* - * generic alpha renderers for all YUV modes and RGB depths - * These are "reference implementations", should be optimized later (MMX, etc). - * templating code by Michael Niedermayer (michaelni@gmx.at) - * - * This file is part of MPlayer. - * - * MPlayer is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * MPlayer is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with MPlayer; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -//#define FAST_OSD -//#define FAST_OSD_TABLE - -#include "config.h" -#include "osd.h" -#include "mp_msg.h" -#include <inttypes.h> -#include "cpudetect.h" - -#if ARCH_X86 -static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; -static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL; -static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL; -#endif - -//Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one -//Plain C versions -#if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT -#define COMPILE_C -#endif - -#if ARCH_X86 - -#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT -#define COMPILE_MMX -#endif - -#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT -#define COMPILE_MMX2 -#endif - -#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT -#define COMPILE_3DNOW -#endif - -#endif /* ARCH_X86 */ - -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_AMD3DNOW -#define HAVE_MMX 0 -#define HAVE_MMX2 0 -#define HAVE_AMD3DNOW 0 - -#if ! ARCH_X86 - -#ifdef COMPILE_C -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_AMD3DNOW -#define HAVE_MMX 0 -#define HAVE_MMX2 0 -#define HAVE_AMD3DNOW 0 -#define RENAME(a) a ## _C -#include "osd_template.c" -#endif - -#else - -//X86 noMMX versions -#ifdef COMPILE_C -#undef RENAME -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_AMD3DNOW -#define HAVE_MMX 0 -#define HAVE_MMX2 0 -#define HAVE_AMD3DNOW 0 -#define RENAME(a) a ## _X86 -#include "osd_template.c" -#endif - -//MMX versions -#ifdef COMPILE_MMX -#undef RENAME -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_AMD3DNOW -#define HAVE_MMX 1 -#define HAVE_MMX2 0 -#define HAVE_AMD3DNOW 0 -#define RENAME(a) a ## _MMX -#include "osd_template.c" -#endif - -//MMX2 versions -#ifdef COMPILE_MMX2 -#undef RENAME -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_AMD3DNOW -#define HAVE_MMX 1 -#define HAVE_MMX2 1 -#define HAVE_AMD3DNOW 0 -#define RENAME(a) a ## _MMX2 -#include "osd_template.c" -#endif - -//3DNOW versions -#ifdef COMPILE_3DNOW -#undef RENAME -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_AMD3DNOW -#define HAVE_MMX 1 -#define HAVE_MMX2 0 -#define HAVE_AMD3DNOW 1 -#define RENAME(a) a ## _3DNow -#include "osd_template.c" -#endif - -#endif /* ARCH_X86 */ - -void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ -#if CONFIG_RUNTIME_CPUDETECT -#if ARCH_X86 - // ordered by speed / fastest first - if(gCpuCaps.hasMMX2) - vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride); - else if(gCpuCaps.has3DNow) - vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride); - else if(gCpuCaps.hasMMX) - vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride); - else - vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride); -#else - vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride); -#endif -#else //CONFIG_RUNTIME_CPUDETECT -#if HAVE_MMX2 - vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride); -#elif HAVE_AMD3DNOW - vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride); -#elif HAVE_MMX - vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride); -#elif ARCH_X86 - vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride); -#else - vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride); -#endif -#endif //!CONFIG_RUNTIME_CPUDETECT -} - -void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ -#if CONFIG_RUNTIME_CPUDETECT -#if ARCH_X86 - // ordered by speed / fastest first - if(gCpuCaps.hasMMX2) - vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride); - else if(gCpuCaps.has3DNow) - vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride); - else if(gCpuCaps.hasMMX) - vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride); - else - vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride); -#else - vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride); -#endif -#else //CONFIG_RUNTIME_CPUDETECT -#if HAVE_MMX2 - vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride); -#elif HAVE_AMD3DNOW - vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride); -#elif HAVE_MMX - vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride); -#elif ARCH_X86 - vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride); -#else - vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride); -#endif -#endif //!CONFIG_RUNTIME_CPUDETECT -} - -void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ -#if CONFIG_RUNTIME_CPUDETECT -#if ARCH_X86 - // ordered by speed / fastest first - if(gCpuCaps.hasMMX2) - vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride); - else if(gCpuCaps.has3DNow) - vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride); - else if(gCpuCaps.hasMMX) - vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride); - else - vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride); -#else - vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride); -#endif -#else //CONFIG_RUNTIME_CPUDETECT -#if HAVE_MMX2 - vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride); -#elif HAVE_AMD3DNOW - vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride); -#elif HAVE_MMX - vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride); -#elif ARCH_X86 - vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride); -#else - vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride); -#endif -#endif //!CONFIG_RUNTIME_CPUDETECT -} - -void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ -#if CONFIG_RUNTIME_CPUDETECT -#if ARCH_X86 - // ordered by speed / fastest first - if(gCpuCaps.hasMMX2) - vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride); - else if(gCpuCaps.has3DNow) - vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride); - else if(gCpuCaps.hasMMX) - vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride); - else - vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride); -#else - vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride); -#endif -#else //CONFIG_RUNTIME_CPUDETECT -#if HAVE_MMX2 - vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride); -#elif HAVE_AMD3DNOW - vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride); -#elif HAVE_MMX - vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride); -#elif ARCH_X86 - vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride); -#else - vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride); -#endif -#endif //!CONFIG_RUNTIME_CPUDETECT -} - -void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ -#if CONFIG_RUNTIME_CPUDETECT -#if ARCH_X86 - // ordered by speed / fastest first - if(gCpuCaps.hasMMX2) - vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride); - else if(gCpuCaps.has3DNow) - vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride); - else if(gCpuCaps.hasMMX) - vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride); - else - vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride); -#else - vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride); -#endif -#else //CONFIG_RUNTIME_CPUDETECT -#if HAVE_MMX2 - vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride); -#elif HAVE_AMD3DNOW - vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride); -#elif HAVE_MMX - vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride); -#elif ARCH_X86 - vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride); -#else - vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride); -#endif -#endif //!CONFIG_RUNTIME_CPUDETECT -} - -#ifdef FAST_OSD_TABLE -static unsigned short fast_osd_12bpp_table[256]; -static unsigned short fast_osd_15bpp_table[256]; -static unsigned short fast_osd_16bpp_table[256]; -#endif - -void vo_draw_alpha_init(void){ -#ifdef FAST_OSD_TABLE - int i; - for(i=0;i<256;i++){ - fast_osd_12bpp_table[i]=((i>>4)<< 8)|((i>>4)<<4)|(i>>4); - fast_osd_15bpp_table[i]=((i>>3)<<10)|((i>>3)<<5)|(i>>3); - fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3); - } -#endif -//FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet - if( mp_msg_test(MSGT_OSD,MSGL_V) ) - { -#if CONFIG_RUNTIME_CPUDETECT -#if ARCH_X86 - // ordered per speed fasterst first - if(gCpuCaps.hasMMX2) - mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n"); - else if(gCpuCaps.has3DNow) - mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n"); - else if(gCpuCaps.hasMMX) - mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n"); - else - mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n"); -#else - mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n"); -#endif -#else //CONFIG_RUNTIME_CPUDETECT -#if HAVE_MMX2 - mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n"); -#elif HAVE_AMD3DNOW - mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n"); -#elif HAVE_MMX - mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n"); -#elif ARCH_X86 - mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n"); -#else - mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n"); -#endif -#endif //!CONFIG_RUNTIME_CPUDETECT - } -} - -void vo_draw_alpha_rgb12(int w, int h, unsigned char* src, unsigned char *srca, - int srcstride, unsigned char* dstbase, int dststride) { - int y; - for (y = 0; y < h; y++) { - register unsigned short *dst = (unsigned short*) dstbase; - register int x; - for (x = 0; x < w; x++) { - if(srca[x]){ -#ifdef FAST_OSD -#ifdef FAST_OSD_TABLE - dst[x] = fast_osd_12bpp_table[src[x]]; -#else - register unsigned int a = src[x] >> 4; - dst[x] = (a << 8) | (a << 4) | a; -#endif -#else - unsigned char r = dst[x] & 0x0F; - unsigned char g = (dst[x] >> 4) & 0x0F; - unsigned char b = (dst[x] >> 8) & 0x0F; - r = (((r*srca[x]) >> 4) + src[x]) >> 4; - g = (((g*srca[x]) >> 4) + src[x]) >> 4; - b = (((b*srca[x]) >> 4) + src[x]) >> 4; - dst[x] = (b << 8) | (g << 4) | r; -#endif - } - } - src += srcstride; - srca += srcstride; - dstbase += dststride; - } - return; -} - -void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; - for(y=0;y<h;y++){ - register unsigned short *dst = (unsigned short*) dstbase; - register int x; - for(x=0;x<w;x++){ - if(srca[x]){ -#ifdef FAST_OSD -#ifdef FAST_OSD_TABLE - dst[x]=fast_osd_15bpp_table[src[x]]; -#else - register unsigned int a=src[x]>>3; - dst[x]=(a<<10)|(a<<5)|a; -#endif -#else - unsigned char r=dst[x]&0x1F; - unsigned char g=(dst[x]>>5)&0x1F; - unsigned char b=(dst[x]>>10)&0x1F; - r=(((r*srca[x])>>5)+src[x])>>3; - g=(((g*srca[x])>>5)+src[x])>>3; - b=(((b*srca[x])>>5)+src[x])>>3; - dst[x]=(b<<10)|(g<<5)|r; -#endif - } - } - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } - return; -} - -void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; - for(y=0;y<h;y++){ - register unsigned short *dst = (unsigned short*) dstbase; - register int x; - for(x=0;x<w;x++){ - if(srca[x]){ -#ifdef FAST_OSD -#ifdef FAST_OSD_TABLE - dst[x]=fast_osd_16bpp_table[src[x]]; -#else - dst[x]=((src[x]>>3)<<11)|((src[x]>>2)<<5)|(src[x]>>3); -#endif -#else - unsigned char r=dst[x]&0x1F; - unsigned char g=(dst[x]>>5)&0x3F; - unsigned char b=(dst[x]>>11)&0x1F; - r=(((r*srca[x])>>5)+src[x])>>3; - g=(((g*srca[x])>>6)+src[x])>>2; - b=(((b*srca[x])>>5)+src[x])>>3; - dst[x]=(b<<11)|(g<<5)|r; -#endif - } - } - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } - return; -}
--- a/libvo/osd.h Wed Oct 27 17:52:45 2010 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -/* - * generic alpha renderers for all YUV modes and RGB depths - * These are "reference implementations", should be optimized later (MMX, etc). - * - * This file is part of MPlayer. - * - * MPlayer is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * MPlayer is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with MPlayer; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#ifndef MPLAYER_OSD_H -#define MPLAYER_OSD_H - -void vo_draw_alpha_init(void); // build tables - -void vo_draw_alpha_yv12(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); -void vo_draw_alpha_yuy2(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); -void vo_draw_alpha_uyvy(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); -void vo_draw_alpha_rgb24(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); -void vo_draw_alpha_rgb32(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); -void vo_draw_alpha_rgb12(int w, int h, unsigned char* src, unsigned char *srca, - int srcstride, unsigned char* dstbase, int dststride); -void vo_draw_alpha_rgb15(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); -void vo_draw_alpha_rgb16(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); - -#endif /* MPLAYER_OSD_H */
--- a/libvo/osd_template.c Wed Oct 27 17:52:45 2010 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,484 +0,0 @@ -/* - * generic alpha renderers for all YUV modes and RGB depths - * Optimized by Nick and Michael. - * - * This file is part of MPlayer. - * - * MPlayer is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * MPlayer is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with MPlayer; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#undef PREFETCH -#undef EMMS -#undef PREFETCHW -#undef PAVGB - -#if HAVE_AMD3DNOW -#define PREFETCH "prefetch" -#define PREFETCHW "prefetchw" -#define PAVGB "pavgusb" -#elif HAVE_MMX2 -#define PREFETCH "prefetchnta" -#define PREFETCHW "prefetcht0" -#define PAVGB "pavgb" -#else -#define PREFETCH " # nop" -#define PREFETCHW " # nop" -#endif - -#if HAVE_AMD3DNOW -/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ -#define EMMS "femms" -#else -#define EMMS "emms" -#endif - -static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; -#if defined(FAST_OSD) && !HAVE_MMX - w=w>>1; -#endif -#if HAVE_MMX - __asm__ volatile( - "pcmpeqb %%mm5, %%mm5\n\t" // F..F - "movq %%mm5, %%mm4\n\t" - "movq %%mm5, %%mm7\n\t" - "psllw $8, %%mm5\n\t" //FF00FF00FF00 - "psrlw $8, %%mm4\n\t" //00FF00FF00FF - ::); -#endif - for(y=0;y<h;y++){ - register int x; -#if HAVE_MMX - __asm__ volatile( - PREFETCHW" %0\n\t" - PREFETCH" %1\n\t" - PREFETCH" %2\n\t" - ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); - for(x=0;x<w;x+=8){ - __asm__ volatile( - "movl %1, %%eax\n\t" - "orl 4%1, %%eax\n\t" - " jz 1f\n\t" - PREFETCHW" 32%0\n\t" - PREFETCH" 32%1\n\t" - PREFETCH" 32%2\n\t" - "movq %0, %%mm0\n\t" // dstbase - "movq %%mm0, %%mm1\n\t" - "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y - "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y - "movq %1, %%mm2\n\t" //srca HGFEDCBA - "paddb %%mm7, %%mm2\n\t" - "movq %%mm2, %%mm3\n\t" - "pand %%mm4, %%mm2\n\t" //0G0E0C0A - "psrlw $8, %%mm3\n\t" //0H0F0D0B - "pmullw %%mm2, %%mm0\n\t" - "pmullw %%mm3, %%mm1\n\t" - "psrlw $8, %%mm0\n\t" - "pand %%mm5, %%mm1\n\t" - "por %%mm1, %%mm0\n\t" - "paddb %2, %%mm0\n\t" - "movq %%mm0, %0\n\t" - "1:\n\t" - :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x]) - : "%eax"); - } -#else - for(x=0;x<w;x++){ -#ifdef FAST_OSD - if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0]; - if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1]; -#else - if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x]; -#endif - } -#endif - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } -#if HAVE_MMX - __asm__ volatile(EMMS:::"memory"); -#endif - return; -} - -static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; -#if defined(FAST_OSD) && !HAVE_MMX - w=w>>1; -#endif -#if HAVE_MMX - __asm__ volatile( - "pxor %%mm7, %%mm7\n\t" - "pcmpeqb %%mm5, %%mm5\n\t" // F..F - "movq %%mm5, %%mm6\n\t" - "movq %%mm5, %%mm4\n\t" - "psllw $8, %%mm5\n\t" //FF00FF00FF00 - "psrlw $8, %%mm4\n\t" //00FF00FF00FF - ::); -#endif - for(y=0;y<h;y++){ - register int x; -#if HAVE_MMX - __asm__ volatile( - PREFETCHW" %0\n\t" - PREFETCH" %1\n\t" - PREFETCH" %2\n\t" - ::"m"(*dstbase),"m"(*srca),"m"(*src)); - for(x=0;x<w;x+=4){ - __asm__ volatile( - "movl %1, %%eax\n\t" - "orl %%eax, %%eax\n\t" - " jz 1f\n\t" - PREFETCHW" 32%0\n\t" - PREFETCH" 32%1\n\t" - PREFETCH" 32%2\n\t" - "movq %0, %%mm0\n\t" // dstbase - "movq %%mm0, %%mm1\n\t" - "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y - "movd %%eax, %%mm2\n\t" //srca 0000DCBA - "paddb %%mm6, %%mm2\n\t" - "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A - "pmullw %%mm2, %%mm0\n\t" - "psrlw $8, %%mm0\n\t" - "pand %%mm5, %%mm1\n\t" //U0V0U0V0 - "movd %2, %%mm2\n\t" //src 0000DCBA - "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A - "por %%mm1, %%mm0\n\t" - "paddb %%mm2, %%mm0\n\t" - "movq %%mm0, %0\n\t" - "1:\n\t" - :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x]) - : "%eax"); - } -#else - for(x=0;x<w;x++){ -#ifdef FAST_OSD - if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0]; - if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1]; -#else - if(srca[x]) { - dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x]; - dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128; - } -#endif - } -#endif - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } -#if HAVE_MMX - __asm__ volatile(EMMS:::"memory"); -#endif - return; -} - -static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; -#if defined(FAST_OSD) - w=w>>1; -#endif - for(y=0;y<h;y++){ - register int x; - for(x=0;x<w;x++){ -#ifdef FAST_OSD - if(srca[2*x+0]) dstbase[4*x+2]=src[2*x+0]; - if(srca[2*x+1]) dstbase[4*x+0]=src[2*x+1]; -#else - if(srca[x]) { - dstbase[2*x+1]=((dstbase[2*x+1]*srca[x])>>8)+src[x]; - dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128; - } -#endif - } - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } -} - -static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; -#if HAVE_MMX - __asm__ volatile( - "pxor %%mm7, %%mm7\n\t" - "pcmpeqb %%mm6, %%mm6\n\t" // F..F - ::); -#endif - for(y=0;y<h;y++){ - register unsigned char *dst = dstbase; - register int x; -#if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX) -#if HAVE_MMX - __asm__ volatile( - PREFETCHW" %0\n\t" - PREFETCH" %1\n\t" - PREFETCH" %2\n\t" - ::"m"(*dst),"m"(*srca),"m"(*src):"memory"); - for(x=0;x<w;x+=2){ - if(srca[x] || srca[x+1]) - __asm__ volatile( - PREFETCHW" 32%0\n\t" - PREFETCH" 32%1\n\t" - PREFETCH" 32%2\n\t" - "movq %0, %%mm0\n\t" // dstbase - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm5\n\t" - "punpcklbw %%mm7, %%mm0\n\t" - "punpckhbw %%mm7, %%mm1\n\t" - "movd %1, %%mm2\n\t" // srca ABCD0000 - "paddb %%mm6, %%mm2\n\t" - "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD - "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB - "psrlq $8, %%mm2\n\t" // srca AAABBBB0 - "movq %%mm2, %%mm3\n\t" - "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B - "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00 - "pmullw %%mm2, %%mm0\n\t" - "pmullw %%mm3, %%mm1\n\t" - "psrlw $8, %%mm0\n\t" - "psrlw $8, %%mm1\n\t" - "packuswb %%mm1, %%mm0\n\t" - "movd %2, %%mm2 \n\t" // src ABCD0000 - "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD - "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB - "psrlq $8, %%mm2\n\t" // src AAABBBB0 - "paddb %%mm2, %%mm0\n\t" - "pand %4, %%mm5\n\t" - "pand %3, %%mm0\n\t" - "por %%mm0, %%mm5\n\t" - "movq %%mm5, %0\n\t" - :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh)); - dst += 6; - } -#else /* HAVE_MMX */ - for(x=0;x<w;x++){ - if(srca[x]){ - __asm__ volatile( - "movzbl (%0), %%ecx\n\t" - "movzbl 1(%0), %%eax\n\t" - - "imull %1, %%ecx\n\t" - "imull %1, %%eax\n\t" - - "addl %2, %%ecx\n\t" - "addl %2, %%eax\n\t" - - "movb %%ch, (%0)\n\t" - "movb %%ah, 1(%0)\n\t" - - "movzbl 2(%0), %%eax\n\t" - "imull %1, %%eax\n\t" - "addl %2, %%eax\n\t" - "movb %%ah, 2(%0)\n\t" - : - :"D" (dst), - "r" ((unsigned)srca[x]), - "r" (((unsigned)src[x])<<8) - :"%eax", "%ecx" - ); - } - dst += 3; - } -#endif /* !HAVE_MMX */ -#else /*non x86 arch or x86_64 with MMX disabled */ - for(x=0;x<w;x++){ - if(srca[x]){ -#ifdef FAST_OSD - dst[0]=dst[1]=dst[2]=src[x]; -#else - dst[0]=((dst[0]*srca[x])>>8)+src[x]; - dst[1]=((dst[1]*srca[x])>>8)+src[x]; - dst[2]=((dst[2]*srca[x])>>8)+src[x]; -#endif - } - dst+=3; // 24bpp - } -#endif /* arch_x86 */ - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } -#if HAVE_MMX - __asm__ volatile(EMMS:::"memory"); -#endif - return; -} - -static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - int y; -#if HAVE_BIGENDIAN - dstbase++; -#endif -#if HAVE_MMX -#if HAVE_AMD3DNOW - __asm__ volatile( - "pxor %%mm7, %%mm7\n\t" - "pcmpeqb %%mm6, %%mm6\n\t" // F..F - ::); -#else /* HAVE_AMD3DNOW */ - __asm__ volatile( - "pxor %%mm7, %%mm7\n\t" - "pcmpeqb %%mm5, %%mm5\n\t" // F..F - "movq %%mm5, %%mm4\n\t" - "psllw $8, %%mm5\n\t" //FF00FF00FF00 - "psrlw $8, %%mm4\n\t" //00FF00FF00FF - ::); -#endif /* HAVE_AMD3DNOW */ -#endif /* HAVE_MMX */ - for(y=0;y<h;y++){ - register int x; -#if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX) -#if HAVE_MMX -#if HAVE_AMD3DNOW - __asm__ volatile( - PREFETCHW" %0\n\t" - PREFETCH" %1\n\t" - PREFETCH" %2\n\t" - ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); - for(x=0;x<w;x+=2){ - if(srca[x] || srca[x+1]) - __asm__ volatile( - PREFETCHW" 32%0\n\t" - PREFETCH" 32%1\n\t" - PREFETCH" 32%2\n\t" - "movq %0, %%mm0\n\t" // dstbase - "movq %%mm0, %%mm1\n\t" - "punpcklbw %%mm7, %%mm0\n\t" - "punpckhbw %%mm7, %%mm1\n\t" - "movd %1, %%mm2\n\t" // srca ABCD0000 - "paddb %%mm6, %%mm2\n\t" - "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD - "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB - "movq %%mm2, %%mm3\n\t" - "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A - "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B - "pmullw %%mm2, %%mm0\n\t" - "pmullw %%mm3, %%mm1\n\t" - "psrlw $8, %%mm0\n\t" - "psrlw $8, %%mm1\n\t" - "packuswb %%mm1, %%mm0\n\t" - "movd %2, %%mm2 \n\t" // src ABCD0000 - "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD - "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB - "paddb %%mm2, %%mm0\n\t" - "movq %%mm0, %0\n\t" - :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])); - } -#else //this is faster for intels crap - __asm__ volatile( - PREFETCHW" %0\n\t" - PREFETCH" %1\n\t" - PREFETCH" %2\n\t" - ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); - for(x=0;x<w;x+=4){ - __asm__ volatile( - "movl %1, %%eax\n\t" - "orl %%eax, %%eax\n\t" - " jz 1f\n\t" - PREFETCHW" 32%0\n\t" - PREFETCH" 32%1\n\t" - PREFETCH" 32%2\n\t" - "movq %0, %%mm0\n\t" // dstbase - "movq %%mm0, %%mm1\n\t" - "pand %%mm4, %%mm0\n\t" //0R0B0R0B - "psrlw $8, %%mm1\n\t" //0?0G0?0G - "movd %%eax, %%mm2\n\t" //srca 0000DCBA - "paddb %3, %%mm2\n\t" - "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA - "movq %%mm2, %%mm3\n\t" - "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A - "pmullw %%mm2, %%mm0\n\t" - "pmullw %%mm2, %%mm1\n\t" - "psrlw $8, %%mm0\n\t" - "pand %%mm5, %%mm1\n\t" - "por %%mm1, %%mm0\n\t" - "movd %2, %%mm2 \n\t" //src 0000DCBA - "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA - "movq %%mm2, %%mm6\n\t" - "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA - "paddb %%mm2, %%mm0\n\t" - "movq %%mm0, %0\n\t" - - "movq 8%0, %%mm0\n\t" // dstbase - "movq %%mm0, %%mm1\n\t" - "pand %%mm4, %%mm0\n\t" //0R0B0R0B - "psrlw $8, %%mm1\n\t" //0?0G0?0G - "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C - "pmullw %%mm3, %%mm0\n\t" - "pmullw %%mm3, %%mm1\n\t" - "psrlw $8, %%mm0\n\t" - "pand %%mm5, %%mm1\n\t" - "por %%mm1, %%mm0\n\t" - "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC - "paddb %%mm6, %%mm0\n\t" - "movq %%mm0, 8%0\n\t" - "1:\n\t" - :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]), "m" (bFF) - : "%eax"); - } -#endif -#else /* HAVE_MMX */ - for(x=0;x<w;x++){ - if(srca[x]){ - __asm__ volatile( - "movzbl (%0), %%ecx\n\t" - "movzbl 1(%0), %%eax\n\t" - "movzbl 2(%0), %%edx\n\t" - - "imull %1, %%ecx\n\t" - "imull %1, %%eax\n\t" - "imull %1, %%edx\n\t" - - "addl %2, %%ecx\n\t" - "addl %2, %%eax\n\t" - "addl %2, %%edx\n\t" - - "movb %%ch, (%0)\n\t" - "movb %%ah, 1(%0)\n\t" - "movb %%dh, 2(%0)\n\t" - - : - :"r" (&dstbase[4*x]), - "r" ((unsigned)srca[x]), - "r" (((unsigned)src[x])<<8) - :"%eax", "%ecx", "%edx" - ); - } - } -#endif /* HAVE_MMX */ -#else /*non x86 arch or x86_64 with MMX disabled */ - for(x=0;x<w;x++){ - if(srca[x]){ -#ifdef FAST_OSD - dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x]; -#else - dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x]; - dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; - dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; -#endif - } - } -#endif /* arch_x86 */ - src+=srcstride; - srca+=srcstride; - dstbase+=dststride; - } -#if HAVE_MMX - __asm__ volatile(EMMS:::"memory"); -#endif - return; -}
--- a/libvo/vesa_lvo.c Wed Oct 27 17:52:45 2010 +0000 +++ b/libvo/vesa_lvo.c Wed Oct 27 17:53:24 2010 +0000 @@ -38,7 +38,7 @@ #include "libmpcodecs/img_format.h" #include "drivers/mga_vid.h" /* <- should be changed to "linux/'something'.h" */ #include "fastmemcpy.h" -#include "osd.h" +#include "sub/osd.h" #include "video_out.h" #include "sub/sub.h" #include "libmpcodecs/vfcap.h"
--- a/libvo/video_out_internal.h Wed Oct 27 17:52:45 2010 +0000 +++ b/libvo/video_out_internal.h Wed Oct 27 17:53:24 2010 +0000 @@ -55,6 +55,6 @@ uninit\ }; -#include "osd.h" +#include "sub/osd.h" #endif /* MPLAYER_VIDEO_OUT_INTERNAL_H */
--- a/libvo/vosub_vidix.c Wed Oct 27 17:52:45 2010 +0000 +++ b/libvo/vosub_vidix.c Wed Oct 27 17:53:24 2010 +0000 @@ -40,7 +40,7 @@ #include "vidix/vidix.h" #include "fastmemcpy.h" -#include "osd.h" +#include "sub/osd.h" #include "video_out.h" #include "sub/sub.h" #include "vosub_vidix.h"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sub/osd.c Wed Oct 27 17:53:24 2010 +0000 @@ -0,0 +1,428 @@ +/* + * generic alpha renderers for all YUV modes and RGB depths + * These are "reference implementations", should be optimized later (MMX, etc). + * templating code by Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +//#define FAST_OSD +//#define FAST_OSD_TABLE + +#include "config.h" +#include "osd.h" +#include "mp_msg.h" +#include <inttypes.h> +#include "cpudetect.h" + +#if ARCH_X86 +static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; +static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL; +static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL; +#endif + +//Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one +//Plain C versions +#if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT +#define COMPILE_C +#endif + +#if ARCH_X86 + +#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT +#define COMPILE_MMX +#endif + +#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT +#define COMPILE_MMX2 +#endif + +#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT +#define COMPILE_3DNOW +#endif + +#endif /* ARCH_X86 */ + +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#define HAVE_MMX 0 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 0 + +#if ! ARCH_X86 + +#ifdef COMPILE_C +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#define HAVE_MMX 0 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 0 +#define RENAME(a) a ## _C +#include "osd_template.c" +#endif + +#else + +//X86 noMMX versions +#ifdef COMPILE_C +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#define HAVE_MMX 0 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 0 +#define RENAME(a) a ## _X86 +#include "osd_template.c" +#endif + +//MMX versions +#ifdef COMPILE_MMX +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#define HAVE_MMX 1 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 0 +#define RENAME(a) a ## _MMX +#include "osd_template.c" +#endif + +//MMX2 versions +#ifdef COMPILE_MMX2 +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#define HAVE_MMX 1 +#define HAVE_MMX2 1 +#define HAVE_AMD3DNOW 0 +#define RENAME(a) a ## _MMX2 +#include "osd_template.c" +#endif + +//3DNOW versions +#ifdef COMPILE_3DNOW +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#define HAVE_MMX 1 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 1 +#define RENAME(a) a ## _3DNow +#include "osd_template.c" +#endif + +#endif /* ARCH_X86 */ + +void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +#if CONFIG_RUNTIME_CPUDETECT +#if ARCH_X86 + // ordered by speed / fastest first + if(gCpuCaps.hasMMX2) + vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.has3DNow) + vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.hasMMX) + vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride); + else + vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +#else //CONFIG_RUNTIME_CPUDETECT +#if HAVE_MMX2 + vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride); +#elif HAVE_AMD3DNOW + vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride); +#elif HAVE_MMX + vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride); +#elif ARCH_X86 + vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +#endif //!CONFIG_RUNTIME_CPUDETECT +} + +void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +#if CONFIG_RUNTIME_CPUDETECT +#if ARCH_X86 + // ordered by speed / fastest first + if(gCpuCaps.hasMMX2) + vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.has3DNow) + vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.hasMMX) + vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride); + else + vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +#else //CONFIG_RUNTIME_CPUDETECT +#if HAVE_MMX2 + vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride); +#elif HAVE_AMD3DNOW + vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride); +#elif HAVE_MMX + vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride); +#elif ARCH_X86 + vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +#endif //!CONFIG_RUNTIME_CPUDETECT +} + +void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +#if CONFIG_RUNTIME_CPUDETECT +#if ARCH_X86 + // ordered by speed / fastest first + if(gCpuCaps.hasMMX2) + vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.has3DNow) + vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.hasMMX) + vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride); + else + vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +#else //CONFIG_RUNTIME_CPUDETECT +#if HAVE_MMX2 + vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride); +#elif HAVE_AMD3DNOW + vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride); +#elif HAVE_MMX + vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride); +#elif ARCH_X86 + vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +#endif //!CONFIG_RUNTIME_CPUDETECT +} + +void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +#if CONFIG_RUNTIME_CPUDETECT +#if ARCH_X86 + // ordered by speed / fastest first + if(gCpuCaps.hasMMX2) + vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.has3DNow) + vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.hasMMX) + vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride); + else + vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +#else //CONFIG_RUNTIME_CPUDETECT +#if HAVE_MMX2 + vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride); +#elif HAVE_AMD3DNOW + vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride); +#elif HAVE_MMX + vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride); +#elif ARCH_X86 + vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +#endif //!CONFIG_RUNTIME_CPUDETECT +} + +void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +#if CONFIG_RUNTIME_CPUDETECT +#if ARCH_X86 + // ordered by speed / fastest first + if(gCpuCaps.hasMMX2) + vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.has3DNow) + vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride); + else if(gCpuCaps.hasMMX) + vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride); + else + vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +#else //CONFIG_RUNTIME_CPUDETECT +#if HAVE_MMX2 + vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride); +#elif HAVE_AMD3DNOW + vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride); +#elif HAVE_MMX + vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride); +#elif ARCH_X86 + vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride); +#else + vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride); +#endif +#endif //!CONFIG_RUNTIME_CPUDETECT +} + +#ifdef FAST_OSD_TABLE +static unsigned short fast_osd_12bpp_table[256]; +static unsigned short fast_osd_15bpp_table[256]; +static unsigned short fast_osd_16bpp_table[256]; +#endif + +void vo_draw_alpha_init(void){ +#ifdef FAST_OSD_TABLE + int i; + for(i=0;i<256;i++){ + fast_osd_12bpp_table[i]=((i>>4)<< 8)|((i>>4)<<4)|(i>>4); + fast_osd_15bpp_table[i]=((i>>3)<<10)|((i>>3)<<5)|(i>>3); + fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3); + } +#endif +//FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet + if( mp_msg_test(MSGT_OSD,MSGL_V) ) + { +#if CONFIG_RUNTIME_CPUDETECT +#if ARCH_X86 + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n"); + else if(gCpuCaps.has3DNow) + mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n"); + else if(gCpuCaps.hasMMX) + mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n"); + else + mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n"); +#else + mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n"); +#endif +#else //CONFIG_RUNTIME_CPUDETECT +#if HAVE_MMX2 + mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n"); +#elif HAVE_AMD3DNOW + mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n"); +#elif HAVE_MMX + mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n"); +#elif ARCH_X86 + mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n"); +#else + mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n"); +#endif +#endif //!CONFIG_RUNTIME_CPUDETECT + } +} + +void vo_draw_alpha_rgb12(int w, int h, unsigned char* src, unsigned char *srca, + int srcstride, unsigned char* dstbase, int dststride) { + int y; + for (y = 0; y < h; y++) { + register unsigned short *dst = (unsigned short*) dstbase; + register int x; + for (x = 0; x < w; x++) { + if(srca[x]){ +#ifdef FAST_OSD +#ifdef FAST_OSD_TABLE + dst[x] = fast_osd_12bpp_table[src[x]]; +#else + register unsigned int a = src[x] >> 4; + dst[x] = (a << 8) | (a << 4) | a; +#endif +#else + unsigned char r = dst[x] & 0x0F; + unsigned char g = (dst[x] >> 4) & 0x0F; + unsigned char b = (dst[x] >> 8) & 0x0F; + r = (((r*srca[x]) >> 4) + src[x]) >> 4; + g = (((g*srca[x]) >> 4) + src[x]) >> 4; + b = (((b*srca[x]) >> 4) + src[x]) >> 4; + dst[x] = (b << 8) | (g << 4) | r; +#endif + } + } + src += srcstride; + srca += srcstride; + dstbase += dststride; + } + return; +} + +void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ + int y; + for(y=0;y<h;y++){ + register unsigned short *dst = (unsigned short*) dstbase; + register int x; + for(x=0;x<w;x++){ + if(srca[x]){ +#ifdef FAST_OSD +#ifdef FAST_OSD_TABLE + dst[x]=fast_osd_15bpp_table[src[x]]; +#else + register unsigned int a=src[x]>>3; + dst[x]=(a<<10)|(a<<5)|a; +#endif +#else + unsigned char r=dst[x]&0x1F; + unsigned char g=(dst[x]>>5)&0x1F; + unsigned char b=(dst[x]>>10)&0x1F; + r=(((r*srca[x])>>5)+src[x])>>3; + g=(((g*srca[x])>>5)+src[x])>>3; + b=(((b*srca[x])>>5)+src[x])>>3; + dst[x]=(b<<10)|(g<<5)|r; +#endif + } + } + src+=srcstride; + srca+=srcstride; + dstbase+=dststride; + } + return; +} + +void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ + int y; + for(y=0;y<h;y++){ + register unsigned short *dst = (unsigned short*) dstbase; + register int x; + for(x=0;x<w;x++){ + if(srca[x]){ +#ifdef FAST_OSD +#ifdef FAST_OSD_TABLE + dst[x]=fast_osd_16bpp_table[src[x]]; +#else + dst[x]=((src[x]>>3)<<11)|((src[x]>>2)<<5)|(src[x]>>3); +#endif +#else + unsigned char r=dst[x]&0x1F; + unsigned char g=(dst[x]>>5)&0x3F; + unsigned char b=(dst[x]>>11)&0x1F; + r=(((r*srca[x])>>5)+src[x])>>3; + g=(((g*srca[x])>>6)+src[x])>>2; + b=(((b*srca[x])>>5)+src[x])>>3; + dst[x]=(b<<11)|(g<<5)|r; +#endif + } + } + src+=srcstride; + srca+=srcstride; + dstbase+=dststride; + } + return; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sub/osd.h Wed Oct 27 17:53:24 2010 +0000 @@ -0,0 +1,37 @@ +/* + * generic alpha renderers for all YUV modes and RGB depths + * These are "reference implementations", should be optimized later (MMX, etc). + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_OSD_H +#define MPLAYER_OSD_H + +void vo_draw_alpha_init(void); // build tables + +void vo_draw_alpha_yv12(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); +void vo_draw_alpha_yuy2(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); +void vo_draw_alpha_uyvy(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); +void vo_draw_alpha_rgb24(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); +void vo_draw_alpha_rgb32(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); +void vo_draw_alpha_rgb12(int w, int h, unsigned char* src, unsigned char *srca, + int srcstride, unsigned char* dstbase, int dststride); +void vo_draw_alpha_rgb15(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); +void vo_draw_alpha_rgb16(int w, int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase, int dststride); + +#endif /* MPLAYER_OSD_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sub/osd_template.c Wed Oct 27 17:53:24 2010 +0000 @@ -0,0 +1,484 @@ +/* + * generic alpha renderers for all YUV modes and RGB depths + * Optimized by Nick and Michael. + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#undef PREFETCH +#undef EMMS +#undef PREFETCHW +#undef PAVGB + +#if HAVE_AMD3DNOW +#define PREFETCH "prefetch" +#define PREFETCHW "prefetchw" +#define PAVGB "pavgusb" +#elif HAVE_MMX2 +#define PREFETCH "prefetchnta" +#define PREFETCHW "prefetcht0" +#define PAVGB "pavgb" +#else +#define PREFETCH " # nop" +#define PREFETCHW " # nop" +#endif + +#if HAVE_AMD3DNOW +/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ +#define EMMS "femms" +#else +#define EMMS "emms" +#endif + +static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ + int y; +#if defined(FAST_OSD) && !HAVE_MMX + w=w>>1; +#endif +#if HAVE_MMX + __asm__ volatile( + "pcmpeqb %%mm5, %%mm5\n\t" // F..F + "movq %%mm5, %%mm4\n\t" + "movq %%mm5, %%mm7\n\t" + "psllw $8, %%mm5\n\t" //FF00FF00FF00 + "psrlw $8, %%mm4\n\t" //00FF00FF00FF + ::); +#endif + for(y=0;y<h;y++){ + register int x; +#if HAVE_MMX + __asm__ volatile( + PREFETCHW" %0\n\t" + PREFETCH" %1\n\t" + PREFETCH" %2\n\t" + ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); + for(x=0;x<w;x+=8){ + __asm__ volatile( + "movl %1, %%eax\n\t" + "orl 4%1, %%eax\n\t" + " jz 1f\n\t" + PREFETCHW" 32%0\n\t" + PREFETCH" 32%1\n\t" + PREFETCH" 32%2\n\t" + "movq %0, %%mm0\n\t" // dstbase + "movq %%mm0, %%mm1\n\t" + "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y + "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y + "movq %1, %%mm2\n\t" //srca HGFEDCBA + "paddb %%mm7, %%mm2\n\t" + "movq %%mm2, %%mm3\n\t" + "pand %%mm4, %%mm2\n\t" //0G0E0C0A + "psrlw $8, %%mm3\n\t" //0H0F0D0B + "pmullw %%mm2, %%mm0\n\t" + "pmullw %%mm3, %%mm1\n\t" + "psrlw $8, %%mm0\n\t" + "pand %%mm5, %%mm1\n\t" + "por %%mm1, %%mm0\n\t" + "paddb %2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + "1:\n\t" + :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x]) + : "%eax"); + } +#else + for(x=0;x<w;x++){ +#ifdef FAST_OSD + if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0]; + if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1]; +#else + if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x]; +#endif + } +#endif + src+=srcstride; + srca+=srcstride; + dstbase+=dststride; + } +#if HAVE_MMX + __asm__ volatile(EMMS:::"memory"); +#endif + return; +} + +static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ + int y; +#if defined(FAST_OSD) && !HAVE_MMX + w=w>>1; +#endif +#if HAVE_MMX + __asm__ volatile( + "pxor %%mm7, %%mm7\n\t" + "pcmpeqb %%mm5, %%mm5\n\t" // F..F + "movq %%mm5, %%mm6\n\t" + "movq %%mm5, %%mm4\n\t" + "psllw $8, %%mm5\n\t" //FF00FF00FF00 + "psrlw $8, %%mm4\n\t" //00FF00FF00FF + ::); +#endif + for(y=0;y<h;y++){ + register int x; +#if HAVE_MMX + __asm__ volatile( + PREFETCHW" %0\n\t" + PREFETCH" %1\n\t" + PREFETCH" %2\n\t" + ::"m"(*dstbase),"m"(*srca),"m"(*src)); + for(x=0;x<w;x+=4){ + __asm__ volatile( + "movl %1, %%eax\n\t" + "orl %%eax, %%eax\n\t" + " jz 1f\n\t" + PREFETCHW" 32%0\n\t" + PREFETCH" 32%1\n\t" + PREFETCH" 32%2\n\t" + "movq %0, %%mm0\n\t" // dstbase + "movq %%mm0, %%mm1\n\t" + "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y + "movd %%eax, %%mm2\n\t" //srca 0000DCBA + "paddb %%mm6, %%mm2\n\t" + "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A + "pmullw %%mm2, %%mm0\n\t" + "psrlw $8, %%mm0\n\t" + "pand %%mm5, %%mm1\n\t" //U0V0U0V0 + "movd %2, %%mm2\n\t" //src 0000DCBA + "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A + "por %%mm1, %%mm0\n\t" + "paddb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + "1:\n\t" + :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x]) + : "%eax"); + } +#else + for(x=0;x<w;x++){ +#ifdef FAST_OSD + if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0]; + if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1]; +#else + if(srca[x]) { + dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x]; + dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128; + } +#endif + } +#endif + src+=srcstride; + srca+=srcstride; + dstbase+=dststride; + } +#if HAVE_MMX + __asm__ volatile(EMMS:::"memory"); +#endif + return; +} + +static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ + int y; +#if defined(FAST_OSD) + w=w>>1; +#endif + for(y=0;y<h;y++){ + register int x; + for(x=0;x<w;x++){ +#ifdef FAST_OSD + if(srca[2*x+0]) dstbase[4*x+2]=src[2*x+0]; + if(srca[2*x+1]) dstbase[4*x+0]=src[2*x+1]; +#else + if(srca[x]) { + dstbase[2*x+1]=((dstbase[2*x+1]*srca[x])>>8)+src[x]; + dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128; + } +#endif + } + src+=srcstride; + srca+=srcstride; + dstbase+=dststride; + } +} + +static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ + int y; +#if HAVE_MMX + __asm__ volatile( + "pxor %%mm7, %%mm7\n\t" + "pcmpeqb %%mm6, %%mm6\n\t" // F..F + ::); +#endif + for(y=0;y<h;y++){ + register unsigned char *dst = dstbase; + register int x; +#if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX) +#if HAVE_MMX + __asm__ volatile( + PREFETCHW" %0\n\t" + PREFETCH" %1\n\t" + PREFETCH" %2\n\t" + ::"m"(*dst),"m"(*srca),"m"(*src):"memory"); + for(x=0;x<w;x+=2){ + if(srca[x] || srca[x+1]) + __asm__ volatile( + PREFETCHW" 32%0\n\t" + PREFETCH" 32%1\n\t" + PREFETCH" 32%2\n\t" + "movq %0, %%mm0\n\t" // dstbase + "movq %%mm0, %%mm1\n\t" + "movq %%mm0, %%mm5\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpckhbw %%mm7, %%mm1\n\t" + "movd %1, %%mm2\n\t" // srca ABCD0000 + "paddb %%mm6, %%mm2\n\t" + "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD + "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB + "psrlq $8, %%mm2\n\t" // srca AAABBBB0 + "movq %%mm2, %%mm3\n\t" + "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B + "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00 + "pmullw %%mm2, %%mm0\n\t" + "pmullw %%mm3, %%mm1\n\t" + "psrlw $8, %%mm0\n\t" + "psrlw $8, %%mm1\n\t" + "packuswb %%mm1, %%mm0\n\t" + "movd %2, %%mm2 \n\t" // src ABCD0000 + "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD + "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB + "psrlq $8, %%mm2\n\t" // src AAABBBB0 + "paddb %%mm2, %%mm0\n\t" + "pand %4, %%mm5\n\t" + "pand %3, %%mm0\n\t" + "por %%mm0, %%mm5\n\t" + "movq %%mm5, %0\n\t" + :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh)); + dst += 6; + } +#else /* HAVE_MMX */ + for(x=0;x<w;x++){ + if(srca[x]){ + __asm__ volatile( + "movzbl (%0), %%ecx\n\t" + "movzbl 1(%0), %%eax\n\t" + + "imull %1, %%ecx\n\t" + "imull %1, %%eax\n\t" + + "addl %2, %%ecx\n\t" + "addl %2, %%eax\n\t" + + "movb %%ch, (%0)\n\t" + "movb %%ah, 1(%0)\n\t" + + "movzbl 2(%0), %%eax\n\t" + "imull %1, %%eax\n\t" + "addl %2, %%eax\n\t" + "movb %%ah, 2(%0)\n\t" + : + :"D" (dst), + "r" ((unsigned)srca[x]), + "r" (((unsigned)src[x])<<8) + :"%eax", "%ecx" + ); + } + dst += 3; + } +#endif /* !HAVE_MMX */ +#else /*non x86 arch or x86_64 with MMX disabled */ + for(x=0;x<w;x++){ + if(srca[x]){ +#ifdef FAST_OSD + dst[0]=dst[1]=dst[2]=src[x]; +#else + dst[0]=((dst[0]*srca[x])>>8)+src[x]; + dst[1]=((dst[1]*srca[x])>>8)+src[x]; + dst[2]=((dst[2]*srca[x])>>8)+src[x]; +#endif + } + dst+=3; // 24bpp + } +#endif /* arch_x86 */ + src+=srcstride; + srca+=srcstride; + dstbase+=dststride; + } +#if HAVE_MMX + __asm__ volatile(EMMS:::"memory"); +#endif + return; +} + +static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ + int y; +#if HAVE_BIGENDIAN + dstbase++; +#endif +#if HAVE_MMX +#if HAVE_AMD3DNOW + __asm__ volatile( + "pxor %%mm7, %%mm7\n\t" + "pcmpeqb %%mm6, %%mm6\n\t" // F..F + ::); +#else /* HAVE_AMD3DNOW */ + __asm__ volatile( + "pxor %%mm7, %%mm7\n\t" + "pcmpeqb %%mm5, %%mm5\n\t" // F..F + "movq %%mm5, %%mm4\n\t" + "psllw $8, %%mm5\n\t" //FF00FF00FF00 + "psrlw $8, %%mm4\n\t" //00FF00FF00FF + ::); +#endif /* HAVE_AMD3DNOW */ +#endif /* HAVE_MMX */ + for(y=0;y<h;y++){ + register int x; +#if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX) +#if HAVE_MMX +#if HAVE_AMD3DNOW + __asm__ volatile( + PREFETCHW" %0\n\t" + PREFETCH" %1\n\t" + PREFETCH" %2\n\t" + ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); + for(x=0;x<w;x+=2){ + if(srca[x] || srca[x+1]) + __asm__ volatile( + PREFETCHW" 32%0\n\t" + PREFETCH" 32%1\n\t" + PREFETCH" 32%2\n\t" + "movq %0, %%mm0\n\t" // dstbase + "movq %%mm0, %%mm1\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpckhbw %%mm7, %%mm1\n\t" + "movd %1, %%mm2\n\t" // srca ABCD0000 + "paddb %%mm6, %%mm2\n\t" + "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD + "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB + "movq %%mm2, %%mm3\n\t" + "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A + "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B + "pmullw %%mm2, %%mm0\n\t" + "pmullw %%mm3, %%mm1\n\t" + "psrlw $8, %%mm0\n\t" + "psrlw $8, %%mm1\n\t" + "packuswb %%mm1, %%mm0\n\t" + "movd %2, %%mm2 \n\t" // src ABCD0000 + "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD + "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB + "paddb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])); + } +#else //this is faster for intels crap + __asm__ volatile( + PREFETCHW" %0\n\t" + PREFETCH" %1\n\t" + PREFETCH" %2\n\t" + ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); + for(x=0;x<w;x+=4){ + __asm__ volatile( + "movl %1, %%eax\n\t" + "orl %%eax, %%eax\n\t" + " jz 1f\n\t" + PREFETCHW" 32%0\n\t" + PREFETCH" 32%1\n\t" + PREFETCH" 32%2\n\t" + "movq %0, %%mm0\n\t" // dstbase + "movq %%mm0, %%mm1\n\t" + "pand %%mm4, %%mm0\n\t" //0R0B0R0B + "psrlw $8, %%mm1\n\t" //0?0G0?0G + "movd %%eax, %%mm2\n\t" //srca 0000DCBA + "paddb %3, %%mm2\n\t" + "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA + "movq %%mm2, %%mm3\n\t" + "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A + "pmullw %%mm2, %%mm0\n\t" + "pmullw %%mm2, %%mm1\n\t" + "psrlw $8, %%mm0\n\t" + "pand %%mm5, %%mm1\n\t" + "por %%mm1, %%mm0\n\t" + "movd %2, %%mm2 \n\t" //src 0000DCBA + "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA + "movq %%mm2, %%mm6\n\t" + "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA + "paddb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + + "movq 8%0, %%mm0\n\t" // dstbase + "movq %%mm0, %%mm1\n\t" + "pand %%mm4, %%mm0\n\t" //0R0B0R0B + "psrlw $8, %%mm1\n\t" //0?0G0?0G + "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C + "pmullw %%mm3, %%mm0\n\t" + "pmullw %%mm3, %%mm1\n\t" + "psrlw $8, %%mm0\n\t" + "pand %%mm5, %%mm1\n\t" + "por %%mm1, %%mm0\n\t" + "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC + "paddb %%mm6, %%mm0\n\t" + "movq %%mm0, 8%0\n\t" + "1:\n\t" + :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]), "m" (bFF) + : "%eax"); + } +#endif +#else /* HAVE_MMX */ + for(x=0;x<w;x++){ + if(srca[x]){ + __asm__ volatile( + "movzbl (%0), %%ecx\n\t" + "movzbl 1(%0), %%eax\n\t" + "movzbl 2(%0), %%edx\n\t" + + "imull %1, %%ecx\n\t" + "imull %1, %%eax\n\t" + "imull %1, %%edx\n\t" + + "addl %2, %%ecx\n\t" + "addl %2, %%eax\n\t" + "addl %2, %%edx\n\t" + + "movb %%ch, (%0)\n\t" + "movb %%ah, 1(%0)\n\t" + "movb %%dh, 2(%0)\n\t" + + : + :"r" (&dstbase[4*x]), + "r" ((unsigned)srca[x]), + "r" (((unsigned)src[x])<<8) + :"%eax", "%ecx", "%edx" + ); + } + } +#endif /* HAVE_MMX */ +#else /*non x86 arch or x86_64 with MMX disabled */ + for(x=0;x<w;x++){ + if(srca[x]){ +#ifdef FAST_OSD + dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x]; +#else + dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x]; + dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x]; + dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x]; +#endif + } + } +#endif /* arch_x86 */ + src+=srcstride; + srca+=srcstride; + dstbase+=dststride; + } +#if HAVE_MMX + __asm__ volatile(EMMS:::"memory"); +#endif + return; +}