changeset 2350:d3a67a417352

new version by zsteva@ptt.yu
author arpi
date Sun, 21 Oct 2001 20:03:00 +0000
parents e977b05a6fd5
children c53969b13353
files libvo/vo_tdfxfb.c
diffstat 1 files changed, 264 insertions(+), 239 deletions(-) [+]
line wrap: on
line diff
--- a/libvo/vo_tdfxfb.c	Sun Oct 21 18:50:15 2001 +0000
+++ b/libvo/vo_tdfxfb.c	Sun Oct 21 20:03:00 2001 +0000
@@ -1,3 +1,10 @@
+// uncomment this if you pached mplayer.c with mplayer_our_out_buffer_hack.diff
+//#define VO_TDFXFB_METHOD 2
+#define VO_TDFXFB_METHOD 1
+// method: Host-to-Screen bitBLT-ing.
+#define HWACCEL_OSD_M2
+//#define YV12_CONV_METH
+#define DONT_USE_FAST_MEMCPY
 
 /* 
  *    video_out_tdfxfb.c
@@ -6,6 +13,15 @@
  *
  *  Most code rewrited, move from /dev/3dfx to /dev/fb0 (kernel 2.4.?)
  *  add support for YUY2 and BGR16 format, remove all X11 DGA code.
+ *  - add support for hardware accelerated OSD (buggy for now).
+ *    work on BGR16 and YUY2 (VO_3DFX_METHOD == 2 only)
+ *  [oct2001]
+ *  - added hardware acceleration for OSD (does not look nice, but is faster)
+ *    (for YV12 don't fork.)
+ *  - fixed YV12 support for ffdivx, but on my cpu this is sllower of yuv2rgb()
+ *    try to uncommenting '#define YV12_CONV_METH'
+ *  - fast_memcpy() is sllower of memcpy() (why, i don't know)
+ *  
  *
  *	Copyright (C) Colin Cross Apr 2000
  *
@@ -55,13 +71,20 @@
 
 #include "drivers/3dfx.h"
 
+// fast_memcpy() is slower of memcpy(), why? i dont know...
+#ifndef DONT_USE_FAST_MEMCPY
 #include "fastmemcpy.h"
+#endif
+
+#ifdef YV12_CONV_METH
+#include "yuv2rgb.h"
+#endif
 
 static vo_info_t vo_info = 
 {
 	"tdfxfb (/dev/fb?)",
 	"tdfxfb",
-	"Zeljko Stevanovic <zsteva@ptt.yu>, bassed on vo_3dfx of Colin Cross <colin@MIT.EDU>",
+	"Zeljko Stevanovic <zsteva@ptt.yu>",
 	""
 };
 
@@ -94,12 +117,10 @@
 
 static uint32_t *vidpage0;
 static uint32_t *vidpage1;
-static uint32_t *osd_page;
 static uint32_t *in_page0;
 
 static uint32_t vidpage0offset;
 static uint32_t vidpage1offset;
-static uint32_t osd_page_offset;
 static uint32_t in_page0_offset;
 
 // Current pointer into framebuffer where display is located
@@ -118,7 +139,7 @@
 //static uint32_t baseAddr0, baseAddr1;
 
 //#define BANSHEE_SCREEN_MEMORY		(8*1024*1024)
-//static uint32_t tdfx_free_scrmem = 0;
+static uint32_t tdfx_free_offset = 0;
 
 /*- ----------------------------------------------------------------- -*/
 
@@ -169,8 +190,8 @@
 	reg_2d->dstBaseAddr = regs->dstBaseAddr;
 	reg_2d->dstXY = regs->dstXY;
 	reg_2d->dstFormat = regs->dstFormat;
+	reg_2d->dstSize = regs->dstSize;
 
-	reg_2d->dstSize = regs->dstSize;
 	reg_2d->command = 0;
 }
 
@@ -178,77 +199,51 @@
 static void 
 restore(void) 
 {
-	//reg_IO->vidDesktopStartAddr = vidpage0offset;
+	reg_IO->vidDesktopStartAddr = vidpage0offset;
 	//XF86DGADirectVideo(display,0,0);
 }
 
 static void 
 sighup(int foo) 
 {
-	//reg_IO->vidDesktopStartAddr = vidpage0offset;
+	reg_IO->vidDesktopStartAddr = vidpage0offset;
 	//XF86DGADirectVideo(display,0,0);
 	exit(0);
 }
 
-#if 1
+#if 0
 static void 
 dump_yuv_planar(void *y, void *u, void *v,
 		uint32_t to, uint32_t px, uint32_t py, uint32_t width, uint32_t height) 
 {
-	// YUV conversion works like this:
-	//
-	// We write the Y, U, and V planes separately into 3dfx YUV Planar memory
-	// region.  The nice chip then takes these and packs them into the YUYV
-	// format in the regular frame buffer, starting at yuvBaseAddr, page 2 here.
-	// Then we tell the 3dfx to do a Screen to Screen Stretch BLT to copy all 
-	// of the data on page 2 onto page 1, converting it to 16 bpp RGB as
-	// it goes. The result is a nice image on page 1 ready for display. 
-
 	uint32_t j;
-	uint32_t y_imax, uv_imax, jmax;
-
-
-	//printf("dump_yuv_planar(..., px=%d, py=%d, w=%d, h=%d\n",
-	//				px, py, width, height);
+	uint32_t *YUV_U, *YUV_V, *YUV_Y;
+	uint32_t width2 = width >> 1;
+	uint32_t height2 = height >> 1;
 
 	reg_YUV->yuvBaseAddr = to + in_width * 2 * py;
-	reg_YUV->yuvStride = width*2;
+	reg_YUV->yuvStride = width << 1;
 
-	jmax = height >> 1;		// vidheight/2, height of U and V planes
-	y_imax = width;			// Y plane is twice as wide as U and V planes
-	uv_imax = width >> 1;	// in_width/2/4, width of U and V planes in 32-bit words
-
-	for (j = 0; j < jmax; j++) 
+	YUV_U = &fb_YUV->U[0];
+	YUV_V = &fb_YUV->V[0];
+	YUV_Y = &fb_YUV->Y[0];
+	for (j = 0; j < height2; j++) 
 	{
-#if 0
-		//XXX this should be hand-rolled 32 bit memcpy for safeness.
-		memcpy(fb_YUV->U (uint32_t) VOODOO_YUV_STRIDE *  j,
-				((uint8_t*)u) + uv_imax *  j       , uv_imax);
-		
-		memcpy(fb_YUV->V + (uint32_t) VOODOO_YUV_STRIDE *  j,
-				((uint8_t*)v) + uv_imax *  j       , uv_imax);
-
-		memcpy(fb_YUV->Y + (uint32_t) VOODOO_YUV_STRIDE* (j<<1),
-				((uint8_t*)y) + y_imax * (j<<1)   , y_imax);
-		memcpy(fb_YUV->Y + (uint32_t) VOODOO_YUV_STRIDE*((j<<1)+1),
-				((uint8_t*)y) + y_imax *((j<<1)+1), y_imax);
-#else
-		memcpy(&fb_YUV->U[VOODOO_YUV_STRIDE *  j], u + uv_imax *  j       , uv_imax);
-		memcpy(&fb_YUV->V[VOODOO_YUV_STRIDE *  j], v + uv_imax *  j       , uv_imax);
-
-
-		memcpy(&fb_YUV->Y[VOODOO_YUV_STRIDE* (j<<1)], y + y_imax * (j<<1)   , y_imax);
-		memcpy(&fb_YUV->Y[VOODOO_YUV_STRIDE*((j<<1)+1)], y + y_imax *((j<<1)+1), y_imax);
-#endif
+		memcpy(YUV_U, u, width2);
+		memcpy(YUV_V, v, width2);
+		memcpy(YUV_Y, y, width); YUV_Y += VOODOO_YUV_STRIDE; y += width;
+		memcpy(YUV_Y, y, width); YUV_Y += VOODOO_YUV_STRIDE; y += width;
+		YUV_U += VOODOO_YUV_STRIDE; u += width2;
+		YUV_V += VOODOO_YUV_STRIDE; v += width2;
 	}
 }
 #endif
 
-#define S2S_BLT(cmd, to, dXY, dFmt, dSize, from, sXY, sFmt, sSize)	\
+#define S2S_BLT(cmd, to, dXY, dFmt, dSize, from, sXY, sFmt, sSize, extCmd)	\
 	do { 										\
 		voodoo_2d_reg saved_regs = *reg_2d;		\
 												\
-		reg_2d->commandExtra = 0;				\
+		reg_2d->commandExtra = (extCmd);		\
 		reg_2d->clip0Min = 0;					\
 		reg_2d->clip0Max = 0xffffffff;			\
 												\
@@ -264,12 +259,11 @@
 												\
 		reg_2d->command = (cmd);				\
 												\
+		banshee_wait_idle();					\
 		restore_regs(&saved_regs);				\
 	} while (0)
 
 
-#define VOODOO_BLT_FORMAT_24			(4 << 16)
-
 /*- ----------------------------------------------------------------- -*/
 
 static uint32_t draw_slice_YV12(uint8_t *image[], int stride[], int w,int h,int x,int y);
@@ -279,31 +273,27 @@
 
 static uint32_t draw_slice_YUY2_BGR16(uint8_t *image[], int stride[], int w,int h,int x,int y);
 static uint32_t draw_frame_YUY2_BGR16(uint8_t *src[]);
-static void flip_page_YUY2_BGR16(void);
-static void draw_osd_YUY2_BGR16(void);
-
-#if 0
-static uint32_t draw_frame_YUY2_2(uint8_t *src[]);
-static uint32_t draw_slice_YUY2(uint8_t *image[], int stride[], int w,int h,int x,int y);
-static void flip_page_all(void);
-static void flip_page_YUY2(void);
-static void flip_page_YUY2_2(void);
-#endif
+static void flip_page_vidpage10(void);
+static void draw_osd(void);
 
 static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src,
 		unsigned char *srca, int stride);
+#ifdef HWACCEL_OSD_M2
+static void my_draw_alpha_accel(int x0, int y0, int w, int h, unsigned char *src,
+		unsigned char *srca, int stride);
+#endif
 
 static void 
 update_target(void) 
 {
 }
 
-#ifndef VO_3DFX_METHOD
-#define VO_3DFX_METHOD		1
+#ifndef VO_TDFXFB_METHOD
+#define VO_TDFXFB_METHOD		1
 #endif
 
 
-#if VO_3DFX_METHOD == 2
+#if VO_TDFXFB_METHOD == 2
 extern void **our_out_buffer;
 #endif
 
@@ -312,8 +302,7 @@
 		uint32_t fullscreen, char *title, uint32_t format) 
 {
 
-	if (!fullscreen) return -1;
-	if (1 || verbose) {
+	if (verbose) {
 		printf("vo_3dfx->init( width = %d, height = %d, "
 				"d_width = %d, d_height = %d, format = %d)\n",
 				width, height, d_width, d_height, format);
@@ -327,7 +316,7 @@
 	if (!fb_devname && !(fb_devname = getenv("FRAMEBUFFER")))
 		fb_devname = "/dev/fb0";
 
-	if (1 || verbose)
+	if (verbose)
 		printf("vo_3dfx->init(): fbdev ==> %s\n", fb_devname);
 
 	if ((fb_fd = open(fb_devname, O_RDWR)) == -1) {
@@ -371,9 +360,6 @@
 	//return -1;
 
 
-	//screenwidth = 800;
-	//screenheight = 600;
-	//screendepth = 2;
 	screenwidth = fb_vinfo.xres;
 	screenheight = fb_vinfo.yres;
 	screendepth = 2;
@@ -382,12 +368,14 @@
 	in_height = height;
 	in_format = format;
 
-	vidwidth = screenwidth;
-	vidheight = screenheight;
-	//vidwidth = in_width;
-	//vidheight = in_height;
-	if (1) {
+	if (fullscreen) {
 		double exrat;
+
+		if (verbose)
+			printf("vo_tdfxfb->init(): fullscreen mode...\n");
+
+		vidwidth = screenwidth;
+		vidheight = screenheight;
 		
 		exrat = (double)in_width / in_height;
 		if (verbose)
@@ -406,6 +394,16 @@
 			printf("vo_3dfx->init(): vidx => %d\n", vidx);
 			printf("vo_3dfx->init(): vidy => %d\n", vidy);
 		}
+	} else {
+		if (in_width > screenwidth || in_height > screenheight) {
+			printf("vo_tdfxfb->init(): your resolution is small for play move...\n");
+			return -1;
+		} else {
+			vidwidth = in_width;
+			vidheight = in_height;
+			vidx = (screenwidth - in_width) / 2;
+			vidy = (screenheight - in_height) / 2;
+		}
 	}
 
 	signal(SIGALRM,sighup);
@@ -436,13 +434,10 @@
 
 	vidpage0offset = 0;
 	vidpage1offset = screenwidth * screenheight * screendepth;
-	//osd_page_offset = vidpage1offset + screenwidth * screenheight * screendepth;
-	//in_page0_offset = osd_page_offset + screenwidth * screenheight * screendepth;
 	in_page0_offset = vidpage1offset + screenwidth * screenheight * screendepth;
 
 	vidpage0 = (void *)memBase1 + (unsigned long int)vidpage0offset;
 	vidpage1 = (void *)memBase1 + (unsigned long int)vidpage1offset;
-	//osd_page = (void *)memBase1 + (unsigned long int)osd_page_offset;
 	in_page0 = (void *)memBase1 + (unsigned long int)in_page0_offset;
 
 	vid_banshee_xy = XYREG(vidx, vidy);
@@ -451,8 +446,8 @@
 
 	in_banshee_size = XYREG(in_width, in_height);
 
-	//video_out_tdfxfb.flip_page = flip_page_all;
-	draw_alpha_p = NULL;
+	//video_out_3dfx.flip_page = flip_page_all;
+	draw_alpha_p = vo_draw_alpha_rgb16;
 
 	switch (in_format) {
 	case IMGFMT_YV12:
@@ -460,44 +455,36 @@
 		video_out_tdfxfb.draw_frame = draw_frame_YV12;
 		video_out_tdfxfb.flip_page = flip_page_YV12;
 		video_out_tdfxfb.draw_osd = draw_osd_YV12;
+		draw_alpha_p = vo_draw_alpha_yuy2;
 		in_banshee_format = in_width * 2 | VOODOO_BLT_FORMAT_YUYV;
+#ifdef YV12_CONV_METH
+		yuv2rgb_init(16, MODE_RGB);
+		in_banshee_format = in_width * 2 | VOODOO_BLT_FORMAT_16;
+		draw_alpha_p = vo_draw_alpha_rgb16;
+#endif
 		break;
 	case IMGFMT_YUY2:
 		video_out_tdfxfb.draw_slice = draw_slice_YUY2_BGR16;
 		video_out_tdfxfb.draw_frame = draw_frame_YUY2_BGR16;
-		video_out_tdfxfb.flip_page = flip_page_YUY2_BGR16;
-		video_out_tdfxfb.draw_osd = draw_osd_YUY2_BGR16;
+		video_out_tdfxfb.flip_page = flip_page_vidpage10;
 
-#if VO_3DFX_METHOD == 1
-		draw_alpha_p = vo_draw_alpha_yuy2;
-#endif
-#if VO_3DFX_METHOD == 2
-		*our_out_buffer = in_page0;
-		draw_alpha_p = vo_draw_alpha_rgb16;
-#endif
 		in_banshee_format = in_width * 2 | VOODOO_BLT_FORMAT_YUYV;
 		in_bytepp = 2;
+#if VO_TDFXFB_METHOD == 2
+		*our_out_buffer = in_page0;
+#endif
+
 		break;
 	case IMGFMT_BGR|16:
 		video_out_tdfxfb.draw_slice = draw_slice_YUY2_BGR16;
 		video_out_tdfxfb.draw_frame = draw_frame_YUY2_BGR16;
-		video_out_tdfxfb.flip_page = flip_page_YUY2_BGR16;
-		video_out_tdfxfb.draw_osd = draw_osd_YUY2_BGR16;
-#if VO_3DFX_METHOD == 2
-		*our_out_buffer = in_page0;
-#endif
-		draw_alpha_p = vo_draw_alpha_rgb16;
+		video_out_tdfxfb.flip_page = flip_page_vidpage10;
+
 		in_banshee_format = in_width * 2 | VOODOO_BLT_FORMAT_16;
 		in_bytepp = 2;
-		break;
-	case IMGFMT_BGR|24:
-		// FIXME: !!!!
-		//video_out_tdfxfb.draw_frame = draw_frame_BGR24;
-		video_out_tdfxfb.draw_frame = draw_frame; // draw_frame_BGR24;
-		//*our_out_buffer = vidpage1;
-
-		in_banshee_format = in_width * 3 | VOODOO_BLT_FORMAT_24;
-		in_bytepp = 3;
+#if VO_TDFXFB_METHOD == 2
+		*our_out_buffer = in_page0;
+#endif
 		break;
 	}
 
@@ -518,7 +505,7 @@
 
 	atexit(restore);
 
-	printf("(display) 3dfx initialized %p/%p\n",memBase0,memBase1);
+	printf("(display) tdfxfb initialized %p/%p\n",memBase0,memBase1);
 	return 0;
 }
 
@@ -534,18 +521,68 @@
 static uint32_t 
 draw_frame_YV12(uint8_t *src[]) 
 {
-	//printf("vo_3dfx->draw_frame_YV12\n");
 	return 0;
 }
 
+#ifndef YV12_CONV_METH
+
 static uint32_t
 draw_slice_YV12(uint8_t *image[], int stride[], int w,int h,int x,int y)
 {
+	void *img_y	= image[0];
+	void *img_u = image[1];
+	void *img_v = image[2];
+	uint32_t j;
+	uint32_t *YUV_U = &fb_YUV->U[0],
+			*YUV_V = &fb_YUV->V[0],
+			*YUV_Y = &fb_YUV->Y[0];
+	uint32_t height2 = h >> 1;
+
+#if 0
+	printf("stride[0] => %d\n", stride[0]);
+	printf("stride[1] => %d\n", stride[1]);
+	printf("stride[2] => %d\n", stride[2]);
+	printf("w => %d, h => %d, x => %d, y => %d\n", w, h, x, y);
+#endif
+#if 0
 	dump_yuv_planar((uint32_t *)image[0], (uint32_t *)image[1],
 			(uint32_t *)image[2], in_page0_offset, x, y, w, h);
+#endif
+
+	//reg_YUV->yuvBaseAddr = to + mystride * 2 * py;
+	reg_YUV->yuvBaseAddr = in_page0_offset + w * 2 * y;
+	reg_YUV->yuvStride = w << 1;
+
+	for (j = 0; j < height2; j++) 
+	{
+		memcpy(YUV_U, img_u, stride[1]);
+		memcpy(YUV_V, img_v, stride[2]);
+		memcpy(YUV_Y, img_y, stride[0]); YUV_Y += VOODOO_YUV_STRIDE; img_y += stride[0];
+		memcpy(YUV_Y, img_y, stride[0]); YUV_Y += VOODOO_YUV_STRIDE; img_y += stride[0];
+		YUV_U += VOODOO_YUV_STRIDE; img_u += stride[1];
+		YUV_V += VOODOO_YUV_STRIDE; img_v += stride[2];
+	}
+
 	return 0;
 }
 
+#else /* !YV12_CONV_METH */
+// -------------------------------------------------------------------
+// YV12 with converting support
+
+static uint32_t
+draw_slice_YV12(uint8_t *image[], int stride[], int w,int h,int x,int y)
+{
+	uint8_t *dest = (uint8_t *)(in_page0) + (in_width * y + x) * 2;
+	//dump_yuv_planar((uint32_t *)image[0], (uint32_t *)image[1],
+	//		(uint32_t *)image[2], in_page0_offset, x, y, w, h);
+	yuv2rgb(dest, image[0], image[1], image[2], w, h, in_width * 2,
+			stride[0], stride[1]);
+	return 0;
+}
+
+#endif /* else ! YV12_CONV_METH */
+
 static void
 flip_page_YV12(void)
 {
@@ -553,66 +590,104 @@
 			vidpage0offset, vid_banshee_xy,
 			vid_banshee_format, vid_banshee_size,
 			in_page0_offset, 0,
-			in_banshee_format, in_banshee_size);
+			in_banshee_format, in_banshee_size, 0);
+
 }
 
 static void draw_alpha_YV12(int x0, int y0, int w, int h, unsigned char *src,
 		unsigned char *srca, int stride)
 {
-	unsigned char *dst = (void *)in_page0 + (in_width * y0 + x0) * 2;	// 2 <= bpp
+	unsigned char *dst = (void *)in_page0 + (in_width * (0+y0) + 0+x0) * 2;	// 2 <= bpp
 	uint32_t dstride = in_width * 2; // 2 <= bpp
-
-	//printf("draw_alpha: x0,y0 = %d,%d; w,h = %d,%d;\n", x0, y0, w, h);
-	//(*draw_alpha_p)(w, h, src, srca, stride, dst, dstride);
-	vo_draw_alpha_yuy2(w, h, src, srca, stride, dst, dstride);
+	//printf("draw_alpha: x0,y0 = %d,%d; w,h = %d,%d; stride=%d;\n", x0, y0, w, h, stride);
+	(*draw_alpha_p)(w, h, src, srca, stride, dst, dstride);
 }
 
 
 static void draw_osd_YV12(void)
 {
+#ifndef HWACCEL_OSD_M2
+	//vo_draw_text(vidwidth, vidheight, draw_alpha);
+#else
+	//vo_draw_text(vidwidth, vidheight, my_draw_alpha_accel);
+#endif /* else ! HWACCEL_OSD_M2 */
 	vo_draw_text(in_width, in_height, draw_alpha_YV12);
 }
 
 
+
 // -------------------------------------------------------------------
 // YUYV & BGR16 support
 
 static uint32_t 
 draw_frame_YUY2_BGR16(uint8_t *src[]) 
 {
-#if VO_3DFX_METHOD == 1
+#if VO_TDFXFB_METHOD == 1
 	memcpy(in_page0, src[0], in_width * in_height * in_bytepp);
 #endif
-#if VO_3DFX_METHOD == 2
 	// blt to offscreen page.
 	S2S_BLT(2 | 1 << 8 | 0xcc << 24, // 2 | 1<<8 | 0xcc<<24,
 			vidpage1offset, vid_banshee_xy,
 			vid_banshee_format, vid_banshee_size,
 			in_page0_offset, 0,
-			in_banshee_format, in_banshee_size);
+			in_banshee_format, in_banshee_size, 0);
 	banshee_wait_idle();
-#endif
 	return 0;
 }
 
 static uint32_t
+draw_frame_YUY2_BGR16_h2s_bitblt(uint8_t *src[]) 
+{
+	uint32_t i, len;
+	uint32_t *launch = (uint32_t *)&reg_2d->launchArea[0];
+	uint32_t *src32 = (uint32_t *)src[0];
+	voodoo_2d_reg saved_regs = *reg_2d;
+
+	reg_2d->commandExtra = 0;
+	reg_2d->clip0Min = 0;
+	reg_2d->clip0Max = 0xffffffff;
+
+	reg_2d->colorFore = 0;
+	reg_2d->colorBack = 0;
+
+	reg_2d->srcXY = 0;
+	//reg_2d->srcBaseAddr = (from);
+
+//	reg_2d->srcFormat = 0x00400000 | BIT(20); // byte allignment + byte swizzle...
+	// YUYV + dword packet
+	reg_2d->srcFormat = in_width*2 | VOODOO_BLT_FORMAT_YUYV; // | (2 << 22);
+	reg_2d->dstXY = vid_banshee_xy;
+	reg_2d->dstSize = vid_banshee_size;
+	reg_2d->dstBaseAddr = vidpage1offset;
+	reg_2d->dstFormat = vid_banshee_format;
+
+// host-to-screen blting + tranpasparent
+	//reg_2d->command = 3 | (1 << 16)| (ROP_COPY << 24);
+	reg_2d->command = 3 | (ROP_COPY << 24);
+
+	i = 0;
+	len = in_width * in_height * 2;	/* 2 => 16 bit */
+	len >>= 2;	/* / 4 */
+	for (;;) {
+		if (i == len) break; launch[0] = src32[i]; i++;
+		if (i == len) break; launch[1] = src32[i]; i++;
+		if (i == len) break; launch[2] = src32[i]; i++;
+		if (i == len) break; launch[3] = src32[i]; i++;
+	}
+	banshee_wait_idle();
+	restore_regs(&saved_regs);
+	return;
+}
+
+static uint32_t
 draw_slice_YUY2_BGR16(uint8_t *image[], int stride[], int w,int h,int x,int y)
 {
 	return 0;
 }
 
 static void
-flip_page_YUY2_BGR16(void)
+flip_page_vidpage10(void)
 {
-#if VO_3DFX_METHOD == 1
-	S2S_BLT(2 | 1 << 8 | 0xcc << 24, // 2 | 1<<8 | 0xcc<<24,
-			vidpage0offset, vid_banshee_xy,
-			vid_banshee_format, vid_banshee_size,
-			in_page0_offset, 0,
-			in_banshee_format, in_banshee_size);
-	banshee_wait_idle();
-#endif
-#if VO_3DFX_METHOD == 2
 	uint32_t o;
 	void *p;
 
@@ -621,34 +696,24 @@
 	p = vidpage0; vidpage0 = vidpage1; vidpage1 = p;
 
 	reg_IO->vidDesktopStartAddr = vidpage0offset;
-#endif
 }
 
-static void draw_alpha_YUY2_BGR16(int x0, int y0, int w, int h, unsigned char *src,
+static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src,
 		unsigned char *srca, int stride)
 {
-#if VO_3DFX_METHOD == 1
-	unsigned char *dst = (void *)in_page0 + (in_width * y0 + x0) * 2;	// 2 <= bpp
-	uint32_t dstride = in_width * 2; // 2 <= bpp
-#endif
-#if VO_3DFX_METHOD == 2
 	unsigned char *dst = (void *)vidpage1 + (screenwidth * (vidy+y0) + vidx+x0) * 2;	// 2 <= bpp
 	uint32_t dstride = screenwidth * 2; // 2 <= bpp
-#endif
-	//printf("draw_alpha: x0,y0 = %d,%d; w,h = %d,%d;\n", x0, y0, w, h);
+	//printf("draw_alpha: x0,y0 = %d,%d; w,h = %d,%d; stride=%d;\n", x0, y0, w, h, stride);
 	(*draw_alpha_p)(w, h, src, srca, stride, dst, dstride);
 }
 
-static void draw_osd_YUY2_BGR16(void)
+static void draw_osd(void)
 {
-#if VO_3DFX_METHOD == 1
-	vo_draw_text(in_width, in_height, draw_alpha_YUY2_BGR16);
-#endif
-#if VO_3DFX_METHOD == 2
-	//vo_draw_text(screenwidth, screenheight, draw_alpha_YUY2_BGR16);
-	//vo_draw_text(vidwidth, vidheight, draw_alpha_YUY2_BGR16);
-	vo_draw_text(vidwidth, vidheight, draw_alpha_YUY2_BGR16);
-#endif
+#ifndef HWACCEL_OSD_M2
+	vo_draw_text(vidwidth, vidheight, draw_alpha);
+#else
+	vo_draw_text(vidwidth, vidheight, my_draw_alpha_accel);
+#endif /* else ! HWACCEL_OSD_M2 */
 }
 
 // -------------------------------------------------------------------
@@ -673,73 +738,7 @@
 	/* dummy */
 }
 
-#if 0
-static void
-flip_page_all(void)
-{
-	S2S_BLT(2 | 1 << 8 | 0xcc << 24, // 2 | 1<<8 | 0xcc<<24,
-			vidpage0offset, vid_banshee_xy,
-			vid_banshee_format, vid_banshee_size,
-			in_page0_offset, 0,
-			in_banshee_format, in_banshee_size);
-}
-
-
-static void
-flip_page_YUY2_2(void)
-{
-	void *p; 
-	uint32_t o;
-
-	/* flip screen buffer */
-	p = vidpage0; vidpage0 = vidpage1; vidpage1 = p;
-	o = vidpage0offset; vidpage0offset = vidpage1offset; vidpage1offset = o;
-	reg_IO->vidDesktopStartAddr = vidpage0offset;
-
-
-	//banshee_make_room(1);
-	//tdfx_outl(VIDDESKSTART, vidpage0offset);
-	//banshee_wait_idle();
-
-#if 0
-	S2S_BLT(2 | 1 << 8 | 0xCC << 24,
-			vidpage0offset, vid_banshee_xy,
-			vid_banshee_format, vid_banshee_size,
-			in_page1_offset, 0,
-			in_banshee_format, in_banshee_size);
-	banshee_wait_idle();
-#endif
-#if 0
-	S2S_BLT(2 | 1 << 8 | 0xCC << 24,
-			vidpage0offset, vid_banshee_xy,
-			vid_banshee_format, vid_banshee_size,
-			vidpage1offset, vid_banshee_xy,
-			vid_banshee_format, vid_banshee_size);
-
-	banshee_wait_idle();
-#endif
-#if 0
-	banshee_make_room(3+9);
-
-	tdfx_outl(COMMANDEXTRA_2D, 4);
-	tdfx_outl(CLIP0MIN, 0);
-	tdfx_outl(CLIP0MAX, 0xffffffff);
-	tdfx_outl(SRCBASE, page_0_offset);
-	tdfx_outl(SRCXY, 0);
-	tdfx_outl(SRCFORMAT, in_banshee_format);
-	tdfx_outl(SRCSIZE, in_banshee_size);
-
-	tdfx_outl(DSTBASE, vidpage0offset);
-	tdfx_outl(DSTXY, vid_banshee_xy);
-	tdfx_outl(DSTFORMAT, vid_banshee_format);
-	tdfx_outl(DSTSIZE, vid_banshee_size);
-
-	tdfx_outl(COMMAND_2D, 2 | 1<<8 | 0xcc << 24);
-	banshee_wait_idle();
-#endif
-}
-#endif
-
+/*- ----------------------------------------------------------------- -*/
 
 static uint32_t
 query_format(uint32_t format)
@@ -749,15 +748,10 @@
         return 4|2; // 4|2;
     case IMGFMT_YUY2:
 		if (verbose) printf("query_format: IMGFMT_YUY2\n");
-		return 0; //4|2;
-    case IMGFMT_RGB|24:
-		if (verbose) printf("query_format: IMGFMT_RGB|24\n");
-		return 0;
-    case IMGFMT_BGR|24:
-		if (verbose) printf("query_format: IMGFMT_BGR|24\n");
-		return 0;
+		return 4|2; //4|2;
 	case IMGFMT_BGR|16:
-		return 4|2; // 4|1;	/* osd + ????? */
+		if (verbose) printf("query_format: IMGFMT_BGR|16\n");
+		return 4|2; // 4|2;	/* osd + ????? */
     }
     return 0;
 }
@@ -765,6 +759,7 @@
 static void
 uninit(void)
 {
+	reg_IO->vidDesktopStartAddr = vidpage0offset;
 }
 
 
@@ -772,30 +767,60 @@
 {
 }
 
-static void draw_alpha(int x0, int y0, int w, int h, unsigned char *src,
+#ifdef HWACCEL_OSD_M2
+
+static void my_draw_alpha_accel(int x0, int y0, int w, int h, unsigned char *src,
 		unsigned char *srca, int stride)
 {
-	unsigned char *dst = (void *)vidpage1 + (in_width * y0 + x0) * 2;	// 2 <= bpp
-	uint32_t dstride = in_width * 2; // 2 <= bpp
+	int y, x;
+	uint32_t pbuf, pcnt;
+	uint32_t *launch = (uint32_t *)&reg_2d->launchArea[0];
+	voodoo_2d_reg saved_regs = *reg_2d;
+
+	reg_2d->commandExtra = 0;
+	reg_2d->clip0Min = 0;
+	reg_2d->clip0Max = 0xffffffff;
 
-	//printf("draw_alpha: x0,y0 = %d,%d; w,h = %d,%d;\n", x0, y0, w, h);
-	//(*draw_alpha_p)(w, h, src, srca, stride, dst, dstride);
-	vo_draw_alpha_rgb16(w, h, src, srca, stride, dst, dstride);
-}
+	reg_2d->colorFore = 0xffff;
+	reg_2d->colorBack = 0;
+
+	reg_2d->srcXY = 0;
+	//reg_2d->srcBaseAddr = (from);
+
+	reg_2d->srcFormat = 0x00400000 | BIT(20); // byte allignment + byte swizzle...
+	//reg_2d->srcSize = XYREG(w, h);
+	reg_2d->dstSize = XYREG(w, h);
+
+	reg_2d->dstBaseAddr = vidpage1offset;
+	reg_2d->dstXY = XYREG(vidx+x0, vidy+y0);
+	reg_2d->dstFormat = vid_banshee_format;
 
-static void draw_osd(void)
-{
-#if 1
-	vo_draw_text(vidwidth, vidheight, draw_alpha);
-#else
-	zz_draw_text(in_width, in_height, draw_alpha);
-	S2S_BLT(2 | 1 << 8 | 0xCC << 24,
-			in_page1_offset, in_banshee_xy,
-			in_banshee_format, in_banshee_size,
-			in_pageT_offset, 0,
-			in_banshee_format, in_banshee_size);
+// host-to-screen blting + tranpasparent
+	reg_2d->command = 3 | (1 << 16)| (ROP_COPY << 24);
+
+	pcnt = 0;
+	pbuf = 0;
+	for (y = 0; y < h; y++) {
+		for (x = 0; x < w; x++) {
+			pbuf = (pbuf << 1) | (((src[x] > 150) ? 1 : 0));
+			if (++pcnt == 32) { launch[0] = pbuf; pcnt = 0; pbuf = 0; }
+		}
+
+		if ((pcnt % 8) != 0) { 
+			pbuf <<= 8 - (pcnt % 8);
+			pcnt += 8 - (pcnt % 8);
+			if (pcnt == 32) { launch[0] = pbuf; pcnt = 0; pbuf = 0; }
+		}
+			
+		src += stride;
+		srca += stride;
+	}
+	if (pcnt != 0) launch[0] = pbuf;
+
 	banshee_wait_idle();
-#endif
+	restore_regs(&saved_regs);
+	return;
 }
+#endif /* ! HWACCEL_OSD_M2 */