diff sub/osd.c @ 32469:3fef2e17a03f

Move osd.[ch] and osd_template.c from libvo to sub.
author cigaes
date Wed, 27 Oct 2010 17:53:24 +0000
parents
children a0ff4fde7a48
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sub/osd.c	Wed Oct 27 17:53:24 2010 +0000
@@ -0,0 +1,428 @@
+/*
+ * generic alpha renderers for all YUV modes and RGB depths
+ * These are "reference implementations", should be optimized later (MMX, etc).
+ * templating code by Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+//#define FAST_OSD
+//#define FAST_OSD_TABLE
+
+#include "config.h"
+#include "osd.h"
+#include "mp_msg.h"
+#include <inttypes.h>
+#include "cpudetect.h"
+
+#if ARCH_X86
+static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
+static const unsigned long long mask24lh  __attribute__((aligned(8))) = 0xFFFF000000000000ULL;
+static const unsigned long long mask24hl  __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL;
+#endif
+
+//Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
+//Plain C versions
+#if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT
+#define COMPILE_C
+#endif
+
+#if ARCH_X86
+
+#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
+#define COMPILE_MMX
+#endif
+
+#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
+#define COMPILE_MMX2
+#endif
+
+#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
+#define COMPILE_3DNOW
+#endif
+
+#endif /* ARCH_X86 */
+
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 0
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+
+#if ! ARCH_X86
+
+#ifdef COMPILE_C
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 0
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define RENAME(a) a ## _C
+#include "osd_template.c"
+#endif
+
+#else
+
+//X86 noMMX versions
+#ifdef COMPILE_C
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 0
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define RENAME(a) a ## _X86
+#include "osd_template.c"
+#endif
+
+//MMX versions
+#ifdef COMPILE_MMX
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define RENAME(a) a ## _MMX
+#include "osd_template.c"
+#endif
+
+//MMX2 versions
+#ifdef COMPILE_MMX2
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 1
+#define HAVE_MMX2 1
+#define HAVE_AMD3DNOW 0
+#define RENAME(a) a ## _MMX2
+#include "osd_template.c"
+#endif
+
+//3DNOW versions
+#ifdef COMPILE_3DNOW
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 1
+#define RENAME(a) a ## _3DNow
+#include "osd_template.c"
+#endif
+
+#endif /* ARCH_X86 */
+
+void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+#if CONFIG_RUNTIME_CPUDETECT
+#if ARCH_X86
+	// ordered by speed / fastest first
+	if(gCpuCaps.hasMMX2)
+		vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+	else if(gCpuCaps.has3DNow)
+		vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+	else if(gCpuCaps.hasMMX)
+		vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+	else
+		vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
+#else
+		vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
+#endif
+#else //CONFIG_RUNTIME_CPUDETECT
+#if HAVE_MMX2
+		vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+#elif HAVE_AMD3DNOW
+		vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+#elif HAVE_MMX
+		vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+#elif ARCH_X86
+		vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
+#else
+		vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
+#endif
+#endif //!CONFIG_RUNTIME_CPUDETECT
+}
+
+void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+#if CONFIG_RUNTIME_CPUDETECT
+#if ARCH_X86
+	// ordered by speed / fastest first
+	if(gCpuCaps.hasMMX2)
+		vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+	else if(gCpuCaps.has3DNow)
+		vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+	else if(gCpuCaps.hasMMX)
+		vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+	else
+		vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
+#else
+		vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
+#endif
+#else //CONFIG_RUNTIME_CPUDETECT
+#if HAVE_MMX2
+		vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+#elif HAVE_AMD3DNOW
+		vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+#elif HAVE_MMX
+		vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+#elif ARCH_X86
+		vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
+#else
+		vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
+#endif
+#endif //!CONFIG_RUNTIME_CPUDETECT
+}
+
+void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+#if CONFIG_RUNTIME_CPUDETECT
+#if ARCH_X86
+	// ordered by speed / fastest first
+	if(gCpuCaps.hasMMX2)
+		vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+	else if(gCpuCaps.has3DNow)
+		vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+	else if(gCpuCaps.hasMMX)
+		vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+	else
+		vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
+#else
+		vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
+#endif
+#else //CONFIG_RUNTIME_CPUDETECT
+#if HAVE_MMX2
+		vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+#elif HAVE_AMD3DNOW
+		vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+#elif HAVE_MMX
+		vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+#elif ARCH_X86
+		vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
+#else
+		vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
+#endif
+#endif //!CONFIG_RUNTIME_CPUDETECT
+}
+
+void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+#if CONFIG_RUNTIME_CPUDETECT
+#if ARCH_X86
+	// ordered by speed / fastest first
+	if(gCpuCaps.hasMMX2)
+		vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+	else if(gCpuCaps.has3DNow)
+		vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+	else if(gCpuCaps.hasMMX)
+		vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+	else
+		vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
+#else
+		vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
+#endif
+#else //CONFIG_RUNTIME_CPUDETECT
+#if HAVE_MMX2
+		vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+#elif HAVE_AMD3DNOW
+		vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+#elif HAVE_MMX
+		vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+#elif ARCH_X86
+		vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
+#else
+		vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
+#endif
+#endif //!CONFIG_RUNTIME_CPUDETECT
+}
+
+void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+#if CONFIG_RUNTIME_CPUDETECT
+#if ARCH_X86
+	// ordered by speed / fastest first
+	if(gCpuCaps.hasMMX2)
+		vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+	else if(gCpuCaps.has3DNow)
+		vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+	else if(gCpuCaps.hasMMX)
+		vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+	else
+		vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
+#else
+		vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
+#endif
+#else //CONFIG_RUNTIME_CPUDETECT
+#if HAVE_MMX2
+		vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
+#elif HAVE_AMD3DNOW
+		vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
+#elif HAVE_MMX
+		vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
+#elif ARCH_X86
+		vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
+#else
+		vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
+#endif
+#endif //!CONFIG_RUNTIME_CPUDETECT
+}
+
+#ifdef FAST_OSD_TABLE
+static unsigned short fast_osd_12bpp_table[256];
+static unsigned short fast_osd_15bpp_table[256];
+static unsigned short fast_osd_16bpp_table[256];
+#endif
+
+void vo_draw_alpha_init(void){
+#ifdef FAST_OSD_TABLE
+    int i;
+    for(i=0;i<256;i++){
+        fast_osd_12bpp_table[i]=((i>>4)<< 8)|((i>>4)<<4)|(i>>4);
+        fast_osd_15bpp_table[i]=((i>>3)<<10)|((i>>3)<<5)|(i>>3);
+        fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3);
+    }
+#endif
+//FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
+	if( mp_msg_test(MSGT_OSD,MSGL_V) )
+	{
+#if CONFIG_RUNTIME_CPUDETECT
+#if ARCH_X86
+		// ordered per speed fasterst first
+		if(gCpuCaps.hasMMX2)
+			mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
+		else if(gCpuCaps.has3DNow)
+			mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
+		else if(gCpuCaps.hasMMX)
+			mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
+		else
+			mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
+#else
+			mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
+#endif
+#else //CONFIG_RUNTIME_CPUDETECT
+#if HAVE_MMX2
+			mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
+#elif HAVE_AMD3DNOW
+			mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
+#elif HAVE_MMX
+			mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
+#elif ARCH_X86
+			mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
+#else
+			mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
+#endif
+#endif //!CONFIG_RUNTIME_CPUDETECT
+	}
+}
+
+void vo_draw_alpha_rgb12(int w, int h, unsigned char* src, unsigned char *srca,
+                         int srcstride, unsigned char* dstbase, int dststride) {
+    int y;
+    for (y = 0; y < h; y++) {
+        register unsigned short *dst = (unsigned short*) dstbase;
+        register int x;
+        for (x = 0; x < w; x++) {
+            if(srca[x]){
+#ifdef FAST_OSD
+#ifdef FAST_OSD_TABLE
+                dst[x] = fast_osd_12bpp_table[src[x]];
+#else
+                register unsigned int a = src[x] >> 4;
+                dst[x] = (a << 8) | (a << 4) | a;
+#endif
+#else
+                unsigned char r = dst[x] & 0x0F;
+                unsigned char g = (dst[x] >> 4) & 0x0F;
+                unsigned char b = (dst[x] >> 8) & 0x0F;
+                r = (((r*srca[x]) >> 4) + src[x]) >> 4;
+                g = (((g*srca[x]) >> 4) + src[x]) >> 4;
+                b = (((b*srca[x]) >> 4) + src[x]) >> 4;
+                dst[x] = (b << 8) | (g << 4) | r;
+#endif
+            }
+        }
+        src += srcstride;
+        srca += srcstride;
+        dstbase += dststride;
+    }
+    return;
+}
+
+void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+    int y;
+    for(y=0;y<h;y++){
+        register unsigned short *dst = (unsigned short*) dstbase;
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+#ifdef FAST_OSD
+#ifdef FAST_OSD_TABLE
+                dst[x]=fast_osd_15bpp_table[src[x]];
+#else
+		register unsigned int a=src[x]>>3;
+                dst[x]=(a<<10)|(a<<5)|a;
+#endif
+#else
+                unsigned char r=dst[x]&0x1F;
+                unsigned char g=(dst[x]>>5)&0x1F;
+                unsigned char b=(dst[x]>>10)&0x1F;
+                r=(((r*srca[x])>>5)+src[x])>>3;
+                g=(((g*srca[x])>>5)+src[x])>>3;
+                b=(((b*srca[x])>>5)+src[x])>>3;
+                dst[x]=(b<<10)|(g<<5)|r;
+#endif
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+}
+
+void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
+    int y;
+    for(y=0;y<h;y++){
+        register unsigned short *dst = (unsigned short*) dstbase;
+        register int x;
+        for(x=0;x<w;x++){
+            if(srca[x]){
+#ifdef FAST_OSD
+#ifdef FAST_OSD_TABLE
+                dst[x]=fast_osd_16bpp_table[src[x]];
+#else
+                dst[x]=((src[x]>>3)<<11)|((src[x]>>2)<<5)|(src[x]>>3);
+#endif
+#else
+                unsigned char r=dst[x]&0x1F;
+                unsigned char g=(dst[x]>>5)&0x3F;
+                unsigned char b=(dst[x]>>11)&0x1F;
+                r=(((r*srca[x])>>5)+src[x])>>3;
+                g=(((g*srca[x])>>6)+src[x])>>2;
+                b=(((b*srca[x])>>5)+src[x])>>3;
+                dst[x]=(b<<11)|(g<<5)|r;
+#endif
+            }
+        }
+        src+=srcstride;
+        srca+=srcstride;
+        dstbase+=dststride;
+    }
+    return;
+}