changeset 23044:7d693457f85e

updated the ati vidix driver with the one from upstream vidix, it now supports much more GPUs (including all Radeon > 9600 and X series)
author ben
date Sun, 22 Apr 2007 13:25:50 +0000
parents 4038852b041e
children 212bbe3789e7
files vidix/radeon.h vidix/radeon_vid.c
diffstat 2 files changed, 2108 insertions(+), 643 deletions(-) [+]
line wrap: on
line diff
--- a/vidix/radeon.h	Sun Apr 22 10:27:44 2007 +0000
+++ b/vidix/radeon.h	Sun Apr 22 13:25:50 2007 +0000
@@ -148,13 +148,23 @@
 #define	AIC_TLB_DATA				0x01E8
 #define	DAC_CNTL				0x0058
 /* DAC_CNTL bit	constants */
+#	define DAC_RANGE_CNTL_MSK		0x00000003
+#	define DAC_RANGE_PAL			0x00000000
+#	define DAC_RANGE_NTSC			0x00000001
+#	define DAC_RANGE_PS2			0x00000002
+#	define DAC_BLANKING			0x00000004
+#	define DAC_CMP_EN			0x00000008
+#	define DAC_CMP_OUTPUT			0x00000080
 #	define DAC_8BIT_EN			0x00000100
 #	define DAC_4BPP_PIX_ORDER		0x00000200
+#	define DAC_TVO_EN			0x00000400
+#	define DAC_TVO_OVR_EXCL			0x00000800
+#	define DAC_TVO_16BPP_DITH_EN		0x00001000
+#	define DAC_VGA_ADR_EN			(1 << 13)
+#	define DAC_PWDN				(1 << 15)
 #	define DAC_CRC_EN			0x00080000
 #	define DAC_MASK_ALL			(0xff << 24)
-#	define DAC_VGA_ADR_EN			(1 << 13)
 #	define DAC_RANGE_CNTL			(3 << 0)
-#	define DAC_BLANKING			(1 << 2)
 #define	DAC_CNTL2				0x007c
 /* DAC_CNTL2 bit constants */
 #	define DAC2_DAC_CLK_SEL			(1 <<  0)
@@ -191,6 +201,7 @@
 #	define CRTC2_DISP_DIS			(1 << 23)
 #	define CRTC2_EN				(1 << 25)
 #	define CRTC2_DISP_REQ_EN_B		(1 << 26)
+#       define CRTC2_CSYNC_EN			(1 << 27)
 #	define CRTC2_HSYNC_DIS			(1 << 28)
 #	define CRTC2_VSYNC_DIS			(1 << 29)
 #define	MEM_CNTL				0x0140
@@ -284,6 +295,19 @@
 #	define CRTC_DISPLAY_DIS_BYTE		(1 <<  2)
 #define	RB3D_CNTL				0x1C3C
 #define	WAIT_UNTIL				0x1720
+#	define EVENT_CRTC_OFFSET		0x00000001
+#	define EVENT_RE_CRTC_VLINE		0x00000002
+#	define EVENT_FE_CRTC_VLINE		0x00000004
+#	define EVENT_CRTC_VLINE			0x00000008
+#	define EVENT_BM_VIP0_IDLE		0x00000010
+#	define EVENT_BM_VIP1_IDLE		0x00000020
+#	define EVENT_BM_VIP2_IDLE		0x00000040
+#	define EVENT_BM_VIP3_IDLE		0x00000080
+#	define EVENT_BM_VIDCAP_IDLE		0x00000100
+#	define EVENT_BM_GUI_IDLE		0x00000200
+#	define EVENT_CMDFIFO			0x00000400
+#	define EVENT_OV0_FLIP			0x00000800
+#	define EVENT_CMDFIFO_ENTRIES		0x07F00000
 #define	ISYNC_CNTL				0x1724
 #define	RBBM_GUICNTL				0x172C
 #define	RBBM_STATUS				0x0E40
@@ -563,7 +587,7 @@
 #	define SCALER_SOURCE_UNK7		0x00000F00L /* 16BPP_ARGB4444 */
 #	define SCALER_ADAPTIVE_DEINT		0x00001000L
 #	define R200_SCALER_TEMPORAL_DEINT	0x00002000L
-#	define SCALER_UNKNOWN_FLAG1		0x00004000L /* ??? */
+#	define SCALER_USE_OV1			0x00004000L /* Use/force Ov1 instead of Ov0 */
 #	define SCALER_SMART_SWITCH		0x00008000L
 #ifdef RAGE128
 #	define SCALER_BURST_PER_PLANE		0x00ff0000L
@@ -574,9 +598,7 @@
 #	define SCALER_UNKNOWN_FLAG3		0x02000000L /* ??? */
 #	define SCALER_UNKNOWN_FLAG4		0x04000000L /* ??? */
 #	define SCALER_DIS_LIMIT			0x08000000L
-#ifdef RAGE128
 #	define SCALER_PRG_LOAD_START		0x10000000L
-#endif
 #	define SCALER_INT_EMU			0x20000000L
 #	define SCALER_ENABLE			0x40000000L
 #	define SCALER_SOFT_RESET		0x80000000L
@@ -599,32 +621,32 @@
 #define	OV0_VID_BUF0_BASE_ADRS			0x0440
 #	define VIF_BUF0_PITCH_SEL		0x00000001L
 #	define VIF_BUF0_TILE_ADRS		0x00000002L
-#	define VIF_BUF0_BASE_ADRS_MASK		0x03fffff0L
+#	define VIF_BUF0_BASE_ADRS_MASK		0x0ffffff0L
 #	define VIF_BUF0_1ST_LINE_LSBS_MASK	0x48000000L
 #define	OV0_VID_BUF1_BASE_ADRS			0x0444
 #	define VIF_BUF1_PITCH_SEL		0x00000001L
 #	define VIF_BUF1_TILE_ADRS		0x00000002L
-#	define VIF_BUF1_BASE_ADRS_MASK		0x03fffff0L
+#	define VIF_BUF1_BASE_ADRS_MASK		0x0ffffff0L
 #	define VIF_BUF1_1ST_LINE_LSBS_MASK	0x48000000L
 #define	OV0_VID_BUF2_BASE_ADRS			0x0448
 #	define VIF_BUF2_PITCH_SEL		0x00000001L
 #	define VIF_BUF2_TILE_ADRS		0x00000002L
-#	define VIF_BUF2_BASE_ADRS_MASK		0x03fffff0L
+#	define VIF_BUF2_BASE_ADRS_MASK		0x0ffffff0L
 #	define VIF_BUF2_1ST_LINE_LSBS_MASK	0x48000000L
 #define	OV0_VID_BUF3_BASE_ADRS			0x044C
 #	define VIF_BUF3_PITCH_SEL		0x00000001L
 #	define VIF_BUF3_TILE_ADRS		0x00000002L
-#	define VIF_BUF3_BASE_ADRS_MASK		0x03fffff0L
+#	define VIF_BUF3_BASE_ADRS_MASK		0x0ffffff0L
 #	define VIF_BUF3_1ST_LINE_LSBS_MASK	0x48000000L
 #define	OV0_VID_BUF4_BASE_ADRS			0x0450
 #	define VIF_BUF4_PITCH_SEL		0x00000001L
 #	define VIF_BUF4_TILE_ADRS		0x00000002L
-#	define VIF_BUF4_BASE_ADRS_MASK		0x03fffff0L
+#	define VIF_BUF4_BASE_ADRS_MASK		0x0ffffff0L
 #	define VIF_BUF4_1ST_LINE_LSBS_MASK	0x48000000L
 #define	OV0_VID_BUF5_BASE_ADRS			0x0454
 #	define VIF_BUF5_PITCH_SEL		0x00000001L
 #	define VIF_BUF5_TILE_ADRS		0x00000002L
-#	define VIF_BUF5_BASE_ADRS_MASK		0x03fffff0L
+#	define VIF_BUF5_BASE_ADRS_MASK		0x0ffffff0L
 #	define VIF_BUF5_1ST_LINE_LSBS_MASK	0x48000000L
 #define	OV0_VID_BUF_PITCH0_VALUE		0x0460
 #define	OV0_VID_BUF_PITCH1_VALUE		0x0464
@@ -735,6 +757,18 @@
 #	define CMP_MIX_OR			0x00000000L
 #	define CMP_MIX_AND			0x00000100L
 #define	OV0_TEST				0x04F8
+#	define OV0_SCALER_Y2R_DISABLE		0x00000001L
+#	define OV0_SUBPIC_ONLY			0x00000008L
+#	define OV0_EXTENSE			0x00000010L
+#	define OV0_SWAP_UV			0x00000020L
+#define OV0_COL_CONV				0x04FC
+#	define OV0_CB_TO_B			0x0000007FL
+#	define OV0_CB_TO_G			0x0000FF00L
+#	define OV0_CR_TO_G			0x00FF0000L
+#	define OV0_CR_TO_R			0x7F000000L
+#	define OV0_NEW_COL_CONV			0x80000000L
+#define OV1_Y_X_START				0x0600
+#define OV1_Y_X_END				0x0604
 #define	OV0_LIN_TRANS_A				0x0D20
 #define	OV0_LIN_TRANS_B				0x0D24
 #define	OV0_LIN_TRANS_C				0x0D28
@@ -774,9 +808,25 @@
 #define IDCT_CONTROL				0x1FBC
 
 #define SE_MC_SRC2_CNTL				0x19D4
+#	define SECONDARY_SCALE_HACC		0x00001FFFL
+#	define SECONDARY_SCALE_VACC		0x0FFF0000L
+#	define SECONDARY_SCALE_PICTH_ADJ	0xC0000000L
 #define SE_MC_SRC1_CNTL				0x19D8
+#	define SCALE_HACC			0x00001FFFL
+#	define SCALE_VACC			0x0FFF0000L
+#	define IDCT_EN				0x10000000L
+#	define SECONDARY_TEX_EN			0x20000000L
+#	define SCALE_PICTH_ADJ			0xC0000000L
 #define SE_MC_DST_CNTL				0x19DC
+#	define DST_Y				0x00003FFFL
+#	define DST_X				0x3FFF0000L
+#	define DST_PITCH_ADJ			0xC0000000L
 #define SE_MC_CNTL_START			0x19E0
+#	define SCALE_OFFSET_PTR			0x0000000FL
+#	define DST_OFFSET			0x00FFFFF0L
+#	define ALPHA_EN				0x01000000L
+#	define SECONDARY_OFFSET_PTR		0x1E000000L
+#	define MC_DST_HEIGHT_WIDTH		0xE0000000L
 #ifndef RAGE128
 #define SE_MC_BUF_BASE				0x19E4
 #define PP_MC_CONTEXT				0x19E8
@@ -817,6 +867,7 @@
 #define CP_CSQ_CNTL				0x0740
 #define SCRATCH_UMSK				0x0770
 #define SCRATCH_ADDR				0x0774
+#ifndef RAGE128
 #define DMA_GUI_TABLE_ADDR			0x0780
 #	define DMA_GUI_COMMAND__BYTE_COUNT_MASK	0x001fffff
 #	define DMA_GUI_COMMAND__INTDIS		0x40000000
@@ -832,6 +883,7 @@
 #define DMA_VID_COMMAND				0x07AC
 #define DMA_VID_STATUS				0x07B0
 #define DMA_VID_ACT_DSCRPTR			0x07B4
+#endif
 #define CP_ME_CNTL				0x07D0
 #define CP_ME_RAM_ADDR				0x07D4
 #define CP_ME_RAM_RADDR				0x07D8
@@ -1031,6 +1083,20 @@
 #ifdef RAGE128
 #define GUI_STAT				0x1740
 #	define GUI_FIFOCNT_MASK			0x0fff
+#	define PM4_BUSY				(1 << 16)
+#	define MICRO_BUSY			(1 << 17)
+#	define FPU_BUSY				(1 << 18)
+#	define VC_BUSY				(1 << 19)
+#	define IDCT_BUSY			(1 << 20)
+#	define ENG_EV_BUSY			(1 << 21)
+#	define SETUP_BUSY			(1 << 22)
+#	define EDGE_WALK_BUSY			(1 << 23)
+#	define ADDRESSING_BUSY			(1 << 24)
+#	define ENG_3D_BUSY			(1 << 25)
+#	define ENG_2D_SM_BUSY			(1 << 26)
+#	define ENG_2D_BUSY			(1 << 27)
+#	define GUI_WB_BUSY			(1 << 28)
+#	define CACHE_BUSY			(1 << 29)
 #	define GUI_ACTIVE			(1 << 31)
 #endif
 #define	SRC_CLUT_ADDRESS			0x1780
@@ -1211,7 +1277,7 @@
 #define	RB2D_DSTCACHE_CTLSTAT			0x342C
 #define	RB2D_DSTCACHE_MODE			0x3428
 
-#define	BASE_CODE				0x0f0b
+#define	BASE_CODE				0x0f0b/*0x0f08*/
 #define	RADEON_BIOS_0_SCRATCH			0x0010
 #define	RADEON_BIOS_1_SCRATCH			0x0014
 #define	RADEON_BIOS_2_SCRATCH			0x0018
@@ -1303,6 +1369,7 @@
 #define	PPLL_POST3_DIV_MASK		0x00070000
 
 /* BUS MASTERING */
+#ifdef RAGE128
 #define BM_FRAME_BUF_OFFSET			0xA00
 #define BM_SYSTEM_MEM_ADDR			0xA04
 #define BM_COMMAND				0xA08
@@ -1338,7 +1405,8 @@
 #define BM_VIDCAP_BUF2				0xA68
 #define BM_VIDCAP_ACTIVE			0xA6c
 #define BM_GUI					0xA80
-
+#define BM_ABORT				0xA88
+#endif
 /* RAGE	THEATER	REGISTERS */
 
 #define DMA_VIPH0_COMMAND			0x0A00
--- a/vidix/radeon_vid.c	Sun Apr 22 10:27:44 2007 +0000
+++ b/vidix/radeon_vid.c	Sun Apr 22 13:25:50 2007 +0000
@@ -15,6 +15,7 @@
 #include <string.h>
 #include <math.h>
 #include <inttypes.h>
+#include <sys/mman.h>
 
 #include "config.h"
 #include "libavutil/common.h"
@@ -27,8 +28,9 @@
 #include "dha.h"
 #include "radeon.h"
 
-#ifdef HAVE_X11
+#if !defined(RAGE128) && defined(HAVE_X11)
 #include <X11/Xlib.h>
+static uint32_t firegl_shift = 0;
 #endif
 
 #ifdef RAGE128
@@ -36,22 +38,26 @@
 #define X_ADJUST 0
 #else
 #define RADEON_MSG "[radeon]"
-#define X_ADJUST (is_shift_required ? 8 : 0)
+#define X_ADJUST (((besr.chip_flags&R_OVL_SHIFT)==R_OVL_SHIFT)?8:0)
 #ifndef RADEON
 #define RADEON
 #endif
 #endif
 
+#define RADEON_ASSERT(msg) printf(RADEON_MSG"################# FATAL:"msg);
+
+#define VERBOSE_LEVEL 0
 static int __verbose = 0;
-#ifdef RADEON
-static int is_shift_required = 0;
-#endif
-
 typedef struct bes_registers_s
 {
   /* base address of yuv framebuffer */
   uint32_t yuv_base;
   uint32_t fourcc;
+  uint32_t surf_id;
+  int load_prg_start;
+  int horz_pick_nearest;
+  int vert_pick_nearest;
+  int swap_uv; /* for direct support of bgr fourccs */
   uint32_t dest_bpp;
   /* YUV BES registers */
   uint32_t reg_load_cntl;
@@ -81,6 +87,7 @@
   uint32_t exclusive_horz;
   uint32_t auto_flip_cntl;
   uint32_t filter_cntl;
+  uint32_t four_tap_coeff[5];
   uint32_t key_cntl;
   uint32_t test;
   /* Configurable stuff */
@@ -93,10 +100,12 @@
   uint32_t graphics_key_clr;
   uint32_t graphics_key_msk;
   uint32_t ckey_cntl;
+  uint32_t merge_cntl;
   
   int deinterlace_on;
   uint32_t deinterlace_pattern;
   
+  unsigned chip_flags;
 } bes_registers_t;
 
 typedef struct video_registers_s
@@ -107,9 +116,6 @@
 }video_registers_t;
 
 static bes_registers_t besr;
-#ifndef RAGE128
-static int RadeonFamily=100;
-#endif
 #define DECLARE_VREG(name) { #name, name, 0 }
 static video_registers_t vregs[] = 
 {
@@ -120,6 +126,8 @@
   DECLARE_VREG(VIPPAD1_Y),
   DECLARE_VREG(OV0_Y_X_START),
   DECLARE_VREG(OV0_Y_X_END),
+  DECLARE_VREG(OV1_Y_X_START),
+  DECLARE_VREG(OV1_Y_X_END),
   DECLARE_VREG(OV0_PIPELINE_CNTL),
   DECLARE_VREG(OV0_EXCLUSIVE_HORZ),
   DECLARE_VREG(OV0_EXCLUSIVE_VERT),
@@ -204,47 +212,380 @@
   DECLARE_VREG(IDCT_AUTH_CONTROL),
   DECLARE_VREG(IDCT_AUTH),
   DECLARE_VREG(IDCT_CONTROL),
-  DECLARE_VREG(CONFIG_CNTL)
+#ifdef RAGE128
+  DECLARE_VREG(BM_FRAME_BUF_OFFSET),
+  DECLARE_VREG(BM_SYSTEM_MEM_ADDR),
+  DECLARE_VREG(BM_COMMAND),
+  DECLARE_VREG(BM_STATUS),
+  DECLARE_VREG(BM_QUEUE_STATUS),
+  DECLARE_VREG(BM_QUEUE_FREE_STATUS),
+  DECLARE_VREG(BM_CHUNK_0_VAL),
+  DECLARE_VREG(BM_CHUNK_1_VAL),
+  DECLARE_VREG(BM_VIP0_BUF),
+  DECLARE_VREG(BM_VIP0_ACTIVE),
+  DECLARE_VREG(BM_VIP1_BUF),
+  DECLARE_VREG(BM_VIP1_ACTIVE),
+  DECLARE_VREG(BM_VIP2_BUF),
+  DECLARE_VREG(BM_VIP2_ACTIVE),
+  DECLARE_VREG(BM_VIP3_BUF),
+  DECLARE_VREG(BM_VIP3_ACTIVE),
+  DECLARE_VREG(BM_VIDCAP_BUF0),
+  DECLARE_VREG(BM_VIDCAP_BUF1),
+  DECLARE_VREG(BM_VIDCAP_BUF2),
+  DECLARE_VREG(BM_VIDCAP_ACTIVE),
+  DECLARE_VREG(BM_GUI),
+  DECLARE_VREG(BM_ABORT)
+#else
+  DECLARE_VREG(DMA_GUI_TABLE_ADDR),
+  DECLARE_VREG(DMA_GUI_SRC_ADDR),
+  DECLARE_VREG(DMA_GUI_DST_ADDR),
+  DECLARE_VREG(DMA_GUI_COMMAND),
+  DECLARE_VREG(DMA_GUI_STATUS),
+  DECLARE_VREG(DMA_GUI_ACT_DSCRPTR),
+  DECLARE_VREG(DMA_VID_SRC_ADDR),
+  DECLARE_VREG(DMA_VID_DST_ADDR),
+  DECLARE_VREG(DMA_VID_COMMAND),
+  DECLARE_VREG(DMA_VID_STATUS),
+  DECLARE_VREG(DMA_VID_ACT_DSCRPTR),
+#endif
 };
 
-#ifdef HAVE_X11
-static uint32_t firegl_shift = 0;
+#define R_FAMILY	0x000000FF
+#define R_100		0x00000001
+#define R_120		0x00000002
+#define R_150		0x00000003
+#define R_200		0x00000004
+#define R_250		0x00000005
+#define R_280		0x00000006
+#define R_300		0x00000007
+#define R_350		0x00000008
+#define R_370		0x00000010
+#define R_380		0x00000020
+#define R_420		0x00000040
+#define R_430		0x00000080
+#define R_480		0x00000100
+#define R_520		0x00000200
+#define R_530		0x00000400
+#define R_580		0x00000800
+#define R_OVL_SHIFT	0x01000000
+#define R_INTEGRATED	0x02000000
+#define R_PCIE		0x04000000
+
+typedef struct ati_card_ids_s
+{
+    unsigned short id;
+    unsigned flags;
+}ati_card_ids_t;
+
+static const ati_card_ids_t ati_card_ids[] = 
+{
+#ifdef RAGE128
+ /*
+    This driver should be compatible with Rage128 (pro) chips.
+    (include adaptive deinterlacing!!!).
+    Moreover: the same logic can be used with Mach64 chips.
+    (I mean: mach64xx, 3d rage, 3d rage IIc, 3D rage pro, 3d rage mobility).
+    but they are incompatible by i/o ports. So if enthusiasts will want
+    then they can redefine OUTREG and INREG macros and redefine OV0_*
+    constants. Also it seems that mach64 chips supports only: YUY2, YV12, UYVY
+    fourccs (422 and 420 formats only).
+  */
+/* Rage128 Pro GL */
+ { DEVICE_ATI_RAGE_128_PA_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PB_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PC_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PD_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PE_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PF_PRO, 0 },
+/* Rage128 Pro VR */
+ { DEVICE_ATI_RAGE_128_PG_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PH_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PI_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PJ_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PK_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PL_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PM_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PN_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PO_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PP_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PQ_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PR_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PS_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PT_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PU_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PV_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PW_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PX_PRO, 0 },
+/* Rage128 GL */
+ { DEVICE_ATI_RAGE_128_RE_SG, 0 },
+ { DEVICE_ATI_RAGE_128_RF_SG, 0 },
+ { DEVICE_ATI_RAGE_128_RG, 0 },
+ { DEVICE_ATI_RAGE_128_RK_VR, 0 },
+ { DEVICE_ATI_RAGE_128_RL_VR, 0 },
+ { DEVICE_ATI_RAGE_128_SE_4X, 0 },
+ { DEVICE_ATI_RAGE_128_SF_4X, 0 },
+ { DEVICE_ATI_RAGE_128_SG_4X, 0 },
+ { DEVICE_ATI_RAGE_128_SH, 0 },
+ { DEVICE_ATI_RAGE_128_SK_4X, 0 },
+ { DEVICE_ATI_RAGE_128_SL_4X, 0 },
+ { DEVICE_ATI_RAGE_128_SM_4X, 0 },
+ { DEVICE_ATI_RAGE_128_4X, 0 },
+ { DEVICE_ATI_RAGE_128_PRO, 0 },
+ { DEVICE_ATI_RAGE_128_PRO2, 0 },
+ { DEVICE_ATI_RAGE_128_PRO3, 0 },
+/* these seem to be based on rage 128 instead of mach64 */
+ { DEVICE_ATI_RAGE_MOBILITY_M3, 0 },
+ { DEVICE_ATI_RAGE_MOBILITY_M32, 0 },
+#else
+/* Radeon1 (indeed: Rage 256 Pro ;) */
+ { DEVICE_ATI_RADEON_R100_QD,		R_100|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_R100_QE,		R_100|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_R100_QF,		R_100|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_R100_QG,		R_100|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_IGP_320,		R_150|R_OVL_SHIFT|R_INTEGRATED },
+ { DEVICE_ATI_RADEON_MOBILITY_U1,	R_150|R_OVL_SHIFT|R_INTEGRATED },
+ { DEVICE_ATI_RADEON_RV100_QY,		R_120|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_RV100_QZ,		R_120|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_MOBILITY_M7,	R_150|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_RV200_LX,		R_150|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_MOBILITY_M6,	R_120|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_MOBILITY_M62,	R_120|R_OVL_SHIFT },
+/* Radeon2 (indeed: Rage 512 Pro ;) */
+ { DEVICE_ATI_R200_BB_RADEON,		R_200 },
+ { DEVICE_ATI_R200_BC_RADEON,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QH,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QI,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QJ,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QK,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QL,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QM,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QN,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QO,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QH2,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QI2,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QJ2,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QK2,		R_200 },
+ { DEVICE_ATI_RADEON_R200_QL2,		R_200 },
+ { DEVICE_ATI_RADEON_RV200_QW,		R_150|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_RV200_QX,		R_150|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_IGP330_340_350,R_200|R_INTEGRATED },
+ { DEVICE_ATI_RADEON_IGP_330M_340M_350M,R_200|R_INTEGRATED },
+ { DEVICE_ATI_RADEON_RV250_IG,		R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_7000_IGP,		R_250|R_OVL_SHIFT|R_INTEGRATED },
+ { DEVICE_ATI_RADEON_MOBILITY_7000,	R_250|R_OVL_SHIFT|R_INTEGRATED },
+ { DEVICE_ATI_RADEON_RV250_ID,		R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_RV250_IE,		R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_RV250_IF,		R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_RV250_IG,		R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_R250_LD,		R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_R250_LE,		R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_R250_MOBILITY,	R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_R250_LG,		R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RV250_RADEON_9000,	R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RADEON_RV250_RADEON2,	R_250|R_OVL_SHIFT },
+ { DEVICE_ATI_RV280_RADEON_9200,	R_280 },
+ { DEVICE_ATI_RV280_RADEON_92002,	R_280 },
+ { DEVICE_ATI_RV280_RADEON_92003,	R_280 },
+ { DEVICE_ATI_RV280_RADEON_92004,	R_280 },
+ { DEVICE_ATI_RV280_RADEON_92005,	R_280 },
+ { DEVICE_ATI_RV280_RADEON_92006,	R_280 },
+ { DEVICE_ATI_RV280_RADEON_92007,	R_280 },
+ { DEVICE_ATI_M9_5C61_RADEON,		R_280 },
+ { DEVICE_ATI_M9_5C63_RADEON,		R_280 },
+/* Radeon3 (indeed: Rage 1024 Pro ;) */
+ { DEVICE_ATI_R300_AG_FIREGL,		R_300 },
+ { DEVICE_ATI_RADEON_R300_ND,		R_300 },
+ { DEVICE_ATI_RADEON_R300_NE,		R_300 },
+ { DEVICE_ATI_RADEON_R300_NG,		R_300 },
+ { DEVICE_ATI_R300_AD_RADEON,		R_300 },
+ { DEVICE_ATI_R300_AE_RADEON,		R_300 },
+ { DEVICE_ATI_R300_AF_RADEON,		R_300 },
+ { DEVICE_ATI_RADEON_9100_IGP2,		R_300|R_OVL_SHIFT|R_INTEGRATED },
+ { DEVICE_ATI_RS300M_AGP_RADEON,	R_300|R_INTEGRATED },
+ { DEVICE_ATI_R350_AH_RADEON,		R_350 },
+ { DEVICE_ATI_R350_AI_RADEON,		R_350 },
+ { DEVICE_ATI_R350_AJ_RADEON,		R_350 },
+ { DEVICE_ATI_R350_AK_FIRE,		R_350 },
+ { DEVICE_ATI_RADEON_R350_RADEON2,	R_350 },
+ { DEVICE_ATI_RADEON_R350_RADEON3,	R_350 },
+ { DEVICE_ATI_RV350_NJ_RADEON,		R_350 },
+ { DEVICE_ATI_R350_NK_FIRE,		R_350 },
+ { DEVICE_ATI_RV350_AP_RADEON,		R_350 },
+ { DEVICE_ATI_RV350_AQ_RADEON,		R_350 },
+ { DEVICE_ATI_RV350_AR_RADEON,		R_350 },
+ { DEVICE_ATI_RV350_AS_RADEON,		R_350 },
+ { DEVICE_ATI_RV350_AT_FIRE,		R_350 },
+ { DEVICE_ATI_RV350_AU_FIRE,		R_350 },
+ { DEVICE_ATI_RV350_AV_FIRE,		R_350 },
+ { DEVICE_ATI_RV350_AW_FIRE,		R_350 },
+ { DEVICE_ATI_RV350_MOBILITY_RADEON,	R_350 },
+ { DEVICE_ATI_RV350_NF_RADEON,		R_300 },
+ { DEVICE_ATI_RV350_NJ_RADEON,		R_300 },
+ { DEVICE_ATI_RV350_AS_RADEON2,		R_350 },
+ { DEVICE_ATI_M10_NQ_RADEON,		R_350 },
+ { DEVICE_ATI_M10_NQ_RADEON2,		R_350 },
+ { DEVICE_ATI_RV350_MOBILITY_RADEON2,	R_350 },
+ { DEVICE_ATI_M10_NS_RADEON,		R_350 },
+ { DEVICE_ATI_M10_NT_FIREGL,		R_350 },
+ { DEVICE_ATI_M11_NV_FIREGL,		R_350 },
+ { DEVICE_ATI_RV370_5B60_RADEON,	R_370|R_PCIE  },
+ { DEVICE_ATI_RV370_SAPPHIRE_X550,	R_370 },
+ { DEVICE_ATI_RV370_5B64_FIREGL,	R_370|R_PCIE  },
+ { DEVICE_ATI_RV370_5B65_FIREGL,	R_370|R_PCIE  },
+ { DEVICE_ATI_M24_1P_RADEON,		R_370  },
+ { DEVICE_ATI_M22_RADEON_MOBILITY,	R_370  },
+ { DEVICE_ATI_M24_1T_FIREGL,		R_370  },
+ { DEVICE_ATI_M24_RADEON_MOBILITY,	R_370  },
+ { DEVICE_ATI_RV370_RADEON_X300SE,	R_370  },
+ { DEVICE_ATI_RV370_SECONDARY_SAPPHIRE,	R_370  },
+ { DEVICE_ATI_RV370_5B64_FIREGL2,	R_370  },
+ { DEVICE_ATI_RV380_0X3E50_RADEON,	R_380|R_PCIE  },
+ { DEVICE_ATI_RV380_0X3E54_FIREGL,	R_380|R_PCIE  },
+ { DEVICE_ATI_RV380_RADEON_X600,	R_380|R_PCIE  },
+ { DEVICE_ATI_RV380_RADEON_X6002,	R_380  },
+ { DEVICE_ATI_RV380_RADEON_X6003,	R_380  },
+ { DEVICE_ATI_RV410_FIREGL_V5000,	R_420  },
+ { DEVICE_ATI_RV410_FIREGL_V3300,	R_420  },
+ { DEVICE_ATI_RV410_RADEON_X700XT,	R_420  },
+ { DEVICE_ATI_RV410_RADEON_X700,	R_420|R_PCIE  },
+ { DEVICE_ATI_RV410_RADEON_X700SE,	R_420  },
+ { DEVICE_ATI_RV410_RADEON_X7002,	R_420|R_PCIE  },
+ { DEVICE_ATI_RV410_RADEON_X7003,	R_420  },
+ { DEVICE_ATI_RV410_RADEON_X7004,	R_420|R_PCIE  },
+ { DEVICE_ATI_RV410_RADEON_X7005,	R_420|R_PCIE  },
+ { DEVICE_ATI_M26_MOBILITY_FIREGL,	R_420  },
+ { DEVICE_ATI_M26_MOBILITY_FIREGL2,	R_420  },
+ { DEVICE_ATI_M26_RADEON_MOBILITY,	R_420  },
+ { DEVICE_ATI_M26_RADEON_MOBILITY2,	R_420  },
+ { DEVICE_ATI_RADEON_MOBILITY_X700,	R_420  },
+ { DEVICE_ATI_R420_JH_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R420_JI_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R420_JJ_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R420_JK_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R420_JL_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R420_JM_FIREGL,		R_420|R_PCIE  },
+ { DEVICE_ATI_M18_JN_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R420_JP_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R420_RADEON_X800,		R_420|R_PCIE  },
+ { DEVICE_ATI_R420_RADEON_X8002,	R_420|R_PCIE  },
+ { DEVICE_ATI_R420_RADEON_X8003,	R_420|R_PCIE  },
+ { DEVICE_ATI_R420_RADEON_X8004,	R_420|R_PCIE  },
+ { DEVICE_ATI_R420_RADEON_X8005,	R_420|R_PCIE  },
+ { DEVICE_ATI_R420_JM_FIREGL,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423_5F57_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423_5F57_RADEON2,	R_420|R_PCIE  },
+ { DEVICE_ATI_R423_UH_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423_UI_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423_UJ_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423_UK_RADEON,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423_FIRE_GL,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423_UQ_FIREGL,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423_UR_FIREGL,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423_UT_FIREGL,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423_UI_RADEON2,		R_420|R_PCIE  },
+ { DEVICE_ATI_R423GL_SE_ATI_FIREGL,	R_420|R_PCIE  },
+ { DEVICE_ATI_R423_RADEON_X800XT,	R_420|R_PCIE  },
+ { DEVICE_ATI_RADEON_R423_UK,		R_420|R_PCIE  },
+ { DEVICE_ATI_M28_RADEON_MOBILITY,	R_420  },
+ { DEVICE_ATI_M28_MOBILITY_FIREGL,	R_420  },
+ { DEVICE_ATI_MOBILITY_RADEON_X800,	R_420  },
+ { DEVICE_ATI_R430_RADEON_X800,		R_430|R_PCIE  },
+ { DEVICE_ATI_R430_RADEON_X8002,	R_430|R_PCIE  },
+ { DEVICE_ATI_R430_RADEON_X8003,	R_430|R_PCIE  },
+ { DEVICE_ATI_R430_RADEON_X8004,	R_430|R_PCIE  },
+ { DEVICE_ATI_R480_RADEON_X800,		R_480  },
+ { DEVICE_ATI_R480_RADEON_X8002,	R_480  },
+ { DEVICE_ATI_R480_RADEON_X850XT,	R_480  },
+ { DEVICE_ATI_R480_RADEON_X850PRO,	R_480  },
+ { DEVICE_ATI_R481_RADEON_X850XT_PE,	R_480|R_PCIE  },
+ { DEVICE_ATI_R480_RADEON_X850XT2,	R_480  },
+ { DEVICE_ATI_R480_RADEON_X850PRO2,	R_480  },
+ { DEVICE_ATI_R481_RADEON_X850XT_PE2,	R_480|R_PCIE  },
+ { DEVICE_ATI_R480_RADEON_X850XT3,	R_480|R_PCIE  },
+ { DEVICE_ATI_R480_RADEON_X850XT4,	R_480|R_PCIE  },
+ { DEVICE_ATI_R480_RADEON_X850XT5,	R_480|R_PCIE  },
+ { DEVICE_ATI_R480_RADEON_X850XT6,	R_480|R_PCIE  },
+ { DEVICE_ATI_R520_FIREGL,		R_520  },
+ { DEVICE_ATI_R520_GL_ATI,		R_520  },
+ { DEVICE_ATI_R520_GL_ATI2,		R_520  },
+ { DEVICE_ATI_R520_RADEON_X1800,	R_520  },
+ { DEVICE_ATI_R520_RADEON_X18002,	R_520  },
+ { DEVICE_ATI_R520_RADEON_X18003,	R_520  },
+ { DEVICE_ATI_R520_RADEON_X18004,	R_520  },
+ { DEVICE_ATI_R520_RADEON_X18005,	R_520  },
+ { DEVICE_ATI_R520_RADEON_X18006,	R_520  },
+ { DEVICE_ATI_R520_RADEON_X18007,	R_520  },
+ { DEVICE_ATI_M58_RADEON_MOBILITY,	R_520  },
+ { DEVICE_ATI_M58_RADEON_MOBILITY2,	R_520  },
+ { DEVICE_ATI_M58_MOBILITY_FIREGL,	R_520  },
+ { DEVICE_ATI_M58_MOBILITY_FIREGL2,	R_520  },
+ { DEVICE_ATI_RV515_RADEON_X1600,	R_520  },
+ { DEVICE_ATI_RV515_RADEON_X1300,	R_520  },
+ { DEVICE_ATI_RV515_RADEON_X13002,	R_520  },
+ { DEVICE_ATI_RV515_RADEON_X13003,	R_520  },
+ { DEVICE_ATI_RV515_RADEON_X13004,	R_520  },
+ { DEVICE_ATI_RV515_RADEON_X13005,	R_520  },
+ { DEVICE_ATI_RV515_RADEON_X13006,	R_520  },
+ { DEVICE_ATI_RV515_RADEON_X13007,	R_520  },
+ { DEVICE_ATI_RV515_GL_ATI,		R_520  },
+ { DEVICE_ATI_RV515_GL_ATI2,		R_520  },
+ { DEVICE_ATI_RADEON_MOBILITY_X1400,	R_520  },
+ { DEVICE_ATI_M52_ATI_MOBILITY,		R_520  },
+ { DEVICE_ATI_M52_ATI_MOBILITY2,	R_520  },
+ { DEVICE_ATI_M52_ATI_MOBILITY3,	R_520  },
+ { DEVICE_ATI_M52_ATI_MOBILITY4,	R_520  },
+ { DEVICE_ATI_RV516_RADEON_X1300,	R_520  },
+ { DEVICE_ATI_RV516_RADEON_X13002,	R_520  },
+ { DEVICE_ATI_RV516_XT_RADEON,		R_520  },
+ { DEVICE_ATI_RV516_XT_RADEON2,		R_520  },
+ { DEVICE_ATI_RV530_RADEON_X1600,	R_520  },
+ { DEVICE_ATI_RV530_RADEON_X16002,	R_520  },
+ { DEVICE_ATI_M56GL_ATI_MOBILITY,	R_520  },
+ { DEVICE_ATI_M56P_RADEON_MOBILITY,	R_520  },
+ { DEVICE_ATI_M66_P_ATI_MOBILITY,	R_520  },
+ { DEVICE_ATI_M66_XT_ATI_MOBILITY,	R_520  },
+ { DEVICE_ATI_RV530LE_RADEON_X1600,	R_520  },
+ { DEVICE_ATI_RV530LE_RADEON_X16002,	R_520  },
+ { DEVICE_ATI_RV530LE_RADEON_X16003,	R_520  },
+ { DEVICE_ATI_RV530_RADEON_X16003,	R_520  },
+ { DEVICE_ATI_RV530_RADEON_X16004,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X1900,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X19002,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X19003,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X19004,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X19005,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X19006,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X19007,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X19008,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X19009,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X190010,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X190011,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X190012,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X190013,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X190014,	R_520  },
+ { DEVICE_ATI_R580_RADEON_X190015,	R_520  },
+ { DEVICE_ATI_R580_FIREGL_V7300_V7350,	R_520  },
+ { DEVICE_ATI_R580_FIREGL_V7300_V73502,	R_520  },
 #endif
+};
+
+
 static void * radeon_mmio_base = 0;
 static void * radeon_mem_base = 0; 
 static int32_t radeon_overlay_off = 0;
 static uint32_t radeon_ram_size = 0;
-/* Restore on exit */
-static uint32_t SAVED_OV0_GRAPHICS_KEY_CLR = 0;
-static uint32_t SAVED_OV0_GRAPHICS_KEY_MSK = 0;
-static uint32_t SAVED_OV0_VID_KEY_CLR = 0;
-static uint32_t SAVED_OV0_VID_KEY_MSK = 0;
-static uint32_t SAVED_OV0_KEY_CNTL = 0;
-#ifdef WORDS_BIGENDIAN
-static uint32_t SAVED_CONFIG_CNTL = 0;
-#if defined(RAGE128)
-#define APER_0_BIG_ENDIAN_16BPP_SWAP (1<<0)
-#define APER_0_BIG_ENDIAN_32BPP_SWAP (2<<0)
-#else
-#define RADEON_SURFACE_CNTL                 0x0b00
-#define RADEON_NONSURF_AP0_SWP_16BPP (1 << 20)
-#define RADEON_NONSURF_AP0_SWP_32BPP (1 << 21)
-#endif
-#endif
 
 #define GETREG(TYPE,PTR,OFFZ)		(*((volatile TYPE*)((PTR)+(OFFZ))))
 #define SETREG(TYPE,PTR,OFFZ,VAL)	(*((volatile TYPE*)((PTR)+(OFFZ))))=VAL
 
-#define INREG8(addr)		GETREG(uint8_t,(uint8_t*)(radeon_mmio_base),addr)
-#define OUTREG8(addr,val)	SETREG(uint8_t,(uint8_t*)(radeon_mmio_base),addr,val)
-
+#define INREG8(addr)		GETREG(uint8_t,(uint8_t *)(radeon_mmio_base),addr)
+#define OUTREG8(addr,val)	SETREG(uint8_t,(uint8_t *)(radeon_mmio_base),addr,val)
 static inline uint32_t INREG (uint32_t addr) {
-	uint32_t tmp = GETREG(uint32_t,(uint8_t*)(radeon_mmio_base),addr);
-	return le2me_32(tmp);
+    uint32_t tmp = GETREG(uint32_t,(uint8_t *)(radeon_mmio_base),addr);
+    return le2me_32(tmp);
 }
-//#define OUTREG(addr,val)	SETREG(uint32_t,(uint8_t*)(radeon_mmio_base),addr,val)
-#define OUTREG(addr,val)	SETREG(uint32_t,(uint8_t*)(radeon_mmio_base),addr,le2me_32(val))
-#define OUTREGP(addr,val,mask)  					\
+#define OUTREG(addr,val)	SETREG(uint32_t,(uint8_t *)(radeon_mmio_base),addr,le2me_32(val))
+#define OUTREGP(addr,val,mask)						\
 	do {								\
 		unsigned int _tmp = INREG(addr);			\
 		_tmp &= (mask);						\
@@ -260,7 +601,7 @@
 
 #define OUTPLL(addr,val)	OUTREG8(CLOCK_CNTL_INDEX, (addr & 0x0000001f) | 0x00000080); \
 				OUTREG(CLOCK_CNTL_DATA, val)
-#define OUTPLLP(addr,val,mask)  					\
+#define OUTPLLP(addr,val,mask)						\
 	do {								\
 		unsigned int _tmp = INPLL(addr);			\
 		_tmp &= (mask);						\
@@ -268,6 +609,85 @@
 		OUTPLL(addr, _tmp);					\
 	} while (0)
 
+#ifndef RAGE128
+enum radeon_montype
+{
+    MT_NONE,
+    MT_CRT, /* CRT-(cathode ray tube) analog monitor. (15-pin VGA connector) */
+    MT_LCD, /* Liquid Crystal Display */
+    MT_DFP, /* DFP-digital flat panel monitor. (24-pin DVI-I connector) */
+    MT_CTV, /* Composite TV out (not in VE) */
+    MT_STV  /* S-Video TV out (probably in VE only) */
+};
+
+typedef struct radeon_info_s
+{
+	int hasCRTC2;
+	int crtDispType;
+	int dviDispType;
+}rinfo_t;
+
+static rinfo_t rinfo;
+
+static char * GET_MON_NAME(int type)
+{
+  char *pret;
+  switch(type)
+  {
+    case MT_NONE: pret = "no"; break;
+    case MT_CRT:  pret = "CRT"; break;
+    case MT_DFP:  pret = "DFP"; break;
+    case MT_LCD:  pret = "LCD"; break;
+    case MT_CTV:  pret = "CTV"; break;
+    case MT_STV:  pret = "STV"; break;
+    default:	  pret = "Unknown";
+  }
+  return pret;
+}
+
+static void radeon_get_moninfo (rinfo_t *rinfo)
+{
+	unsigned int tmp;
+
+	tmp = INREG(RADEON_BIOS_4_SCRATCH);
+
+	if (rinfo->hasCRTC2) {
+		/* primary DVI port */
+		if (tmp & 0x08)
+			rinfo->dviDispType = MT_DFP;
+		else if (tmp & 0x4)
+			rinfo->dviDispType = MT_LCD;
+		else if (tmp & 0x200)
+			rinfo->dviDispType = MT_CRT;
+		else if (tmp & 0x10)
+			rinfo->dviDispType = MT_CTV;
+		else if (tmp & 0x20)
+			rinfo->dviDispType = MT_STV;
+
+		/* secondary CRT port */
+		if (tmp & 0x2)
+			rinfo->crtDispType = MT_CRT;
+		else if (tmp & 0x800)
+			rinfo->crtDispType = MT_DFP;
+		else if (tmp & 0x400)
+			rinfo->crtDispType = MT_LCD;
+		else if (tmp & 0x1000)
+			rinfo->crtDispType = MT_CTV;
+		else if (tmp & 0x2000)
+			rinfo->crtDispType = MT_STV;
+	} else {
+		rinfo->dviDispType = MT_NONE;
+
+		tmp = INREG(FP_GEN_CNTL);
+
+		if (tmp & FP_EN_TMDS)
+			rinfo->crtDispType = MT_DFP;
+		else
+			rinfo->crtDispType = MT_CRT;
+	}
+}
+#endif
+
 static uint32_t radeon_vid_get_dbpp( void )
 {
   uint32_t dbpp,retval;
@@ -295,35 +715,32 @@
 
 static uint32_t radeon_get_xres( void )
 {
-  /* FIXME: currently we extract that from CRTC!!!*/
   uint32_t xres,h_total;
-  h_total = INREG(CRTC_H_TOTAL_DISP);
+#ifndef RAGE128
+  if(rinfo.hasCRTC2 && 
+       (rinfo.dviDispType == MT_CTV || rinfo.dviDispType == MT_STV))
+	h_total = INREG(CRTC2_H_TOTAL_DISP);
+  else
+#endif
+	h_total = INREG(CRTC_H_TOTAL_DISP);
   xres = (h_total >> 16) & 0xffff;
   return (xres + 1)*8;
 }
 
 static uint32_t radeon_get_yres( void )
 {
-  /* FIXME: currently we extract that from CRTC!!!*/
   uint32_t yres,v_total;
-  v_total = INREG(CRTC_V_TOTAL_DISP);
+#ifndef RAGE128
+  if(rinfo.hasCRTC2 && 
+       (rinfo.dviDispType == MT_CTV || rinfo.dviDispType == MT_STV))
+	v_total = INREG(CRTC2_V_TOTAL_DISP);
+  else
+#endif
+	v_total = INREG(CRTC_V_TOTAL_DISP);
   yres = (v_total >> 16) & 0xffff;
   return yres + 1;
 }
 
-/* get flat panel x resolution*/
-static uint32_t radeon_get_fp_xres( void ){
-  uint32_t xres=(INREG(FP_HORZ_STRETCH)&0x00fff000)>>16;
-  xres=(xres+1)*8;
-  return xres;
-}
-
-/* get flat panel y resolution*/
-static uint32_t radeon_get_fp_yres( void ){
-  uint32_t yres=(INREG(FP_VERT_STRETCH)&0x00fff000)>>12;
-  return yres+1;
-}
-
 static void radeon_wait_vsync(void)
 {
     int i;
@@ -361,7 +778,7 @@
     radeon_engine_flush();
 
     clock_cntl_index = INREG(CLOCK_CNTL_INDEX);
-    mclk_cntl        = INPLL(MCLK_CNTL);
+    mclk_cntl	     = INPLL(MCLK_CNTL);
 
     OUTPLL(MCLK_CNTL, mclk_cntl | FORCE_GCP | FORCE_PIPE3D_CP);
 
@@ -373,7 +790,7 @@
 	gen_reset_cntl & (uint32_t)(~SOFT_RESET_GUI));
     INREG(GEN_RESET_CNTL);
 
-    OUTPLL(MCLK_CNTL,        mclk_cntl);
+    OUTPLL(MCLK_CNTL,	     mclk_cntl);
     OUTREG(CLOCK_CNTL_INDEX, clock_cntl_index);
     OUTREG(GEN_RESET_CNTL,   gen_reset_cntl);
 }
@@ -385,7 +802,7 @@
 
 	/* initiate flush */
 	OUTREGP(RB2D_DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL,
-	        ~RB2D_DC_FLUSH_ALL);
+		~RB2D_DC_FLUSH_ALL);
 
 	for (i=0; i < 2000000; i++) {
 		if (!(INREG(RB2D_DSTCACHE_CTLSTAT) & RB2D_DC_BUSY))
@@ -464,9 +881,8 @@
 
     radeon_fifo_wait(1);
 #if defined(WORDS_BIGENDIAN)
-#ifdef RADEON
-    OUTREGP(DP_DATATYPE, HOST_BIG_ENDIAN_EN, ~HOST_BIG_ENDIAN_EN);
-#endif
+    OUTREGP(DP_DATATYPE,
+	    HOST_BIG_ENDIAN_EN, ~HOST_BIG_ENDIAN_EN);
 #else
     OUTREGP(DP_DATATYPE, 0, ~HOST_BIG_ENDIAN_EN);
 #endif
@@ -578,19 +994,19 @@
 	{1.1678, 0.0, 1.7980, -0.2139, -0.5345, 2.1186, 0.0}  /* BT.709 */
 };
 /****************************************************************************
- * SetTransform                                                             *
- *  Function: Calculates and sets color space transform from supplied       *
- *            reference transform, gamma, brightness, contrast, hue and     *
- *            saturation.                                                   *
- *    Inputs: bright - brightness                                           *
- *            cont - contrast                                               *
- *            sat - saturation                                              *
- *            hue - hue                                                     *
- *            red_intensity - intense of red component                      *
- *            green_intensity - intense of green component                  *
- *            blue_intensity - intense of blue component                    *
- *            ref - index to the table of refernce transforms               *
- *   Outputs: NONE                                                          *
+ * SetTransform								    *
+ *  Function: Calculates and sets color space transform from supplied	    *
+ *	      reference transform, gamma, brightness, contrast, hue and	    *
+ *	      saturation.						    *
+ *    Inputs: bright - brightness					    *
+ *	      cont - contrast						    *
+ *	      sat - saturation						    *
+ *	      hue - hue							    *
+ *	      red_intensity - intense of red component			    *
+ *	      green_intensity - intense of green component		    *
+ *	      blue_intensity - intense of blue component		    *
+ *	      ref - index to the table of refernce transforms		    *
+ *   Outputs: NONE							    *
  ****************************************************************************/
 
 static void radeon_set_transform(float bright, float cont, float sat,
@@ -635,7 +1051,7 @@
 	CAdjBCr = sat * OvHueSin * trans[ref].RefBCb;
     
 #if 0 /* default constants */
-        CAdjLuma = 1.16455078125;
+	CAdjLuma = 1.16455078125;
 
 	CAdjRCb = 0.0;
 	CAdjRCr = 1.59619140625;
@@ -669,7 +1085,7 @@
 	/* Whatever docs say about R200 having 3.8 format instead of 3.11
 	   as in Radeon is a lie */
 #if 0
-	if(RadeonFamily == 100)
+	if(!IsR200)
 	{
 #endif
 		dwOvLuma =(((int)(OvLuma * 2048.0))&0x7fff)<<17;
@@ -744,7 +1160,9 @@
 static void make_default_gamma_correction( void )
 {
     size_t i;
-    if(RadeonFamily == 100) {
+    if((besr.chip_flags & R_100)==R_100||
+	(besr.chip_flags & R_120)==R_120||
+	(besr.chip_flags & R_150)==R_150){
 	OUTREG(OV0_LIN_TRANS_A, 0x12A00000);
 	OUTREG(OV0_LIN_TRANS_B, 0x199018FE);
 	OUTREG(OV0_LIN_TRANS_C, 0x12A0F9B0);
@@ -754,24 +1172,23 @@
 	for(i=0; i<6; i++){
 		OUTREG(r100_def_gamma[i].gammaReg,
 		       (r100_def_gamma[i].gammaSlope<<16) |
-		        r100_def_gamma[i].gammaOffset);
+			r100_def_gamma[i].gammaOffset);
 	}
     }
     else{
-	OUTREG(OV0_LIN_TRANS_A, 0x12a00000);
-	OUTREG(OV0_LIN_TRANS_B, 0x1990190e);
-	OUTREG(OV0_LIN_TRANS_C, 0x12a0f9c0);
-	OUTREG(OV0_LIN_TRANS_D, 0xf3000442);
-	OUTREG(OV0_LIN_TRANS_E, 0x12a02040);
+	OUTREG(OV0_LIN_TRANS_A, 0x12a20000);
+	OUTREG(OV0_LIN_TRANS_B, 0x198a190e);
+	OUTREG(OV0_LIN_TRANS_C, 0x12a2f9da);
+	OUTREG(OV0_LIN_TRANS_D, 0xf2fe0442);
+	OUTREG(OV0_LIN_TRANS_E, 0x12a22046);
 	OUTREG(OV0_LIN_TRANS_F, 0x175f);
-
 	/* Default Gamma,
 	   Of 18 segments for gamma cure, all segments in R200 are programmable,
 	   while only lower 4 and upper 2 segments are programmable in Radeon*/
 	for(i=0; i<18; i++){
 		OUTREG(r200_def_gamma[i].gammaReg,
 		       (r200_def_gamma[i].gammaSlope<<16) |
-		        r200_def_gamma[i].gammaOffset);
+			r200_def_gamma[i].gammaOffset);
 	}
     }
 }
@@ -780,7 +1197,9 @@
 static void radeon_vid_make_default(void)
 {
 #ifdef RAGE128
-  OUTREG(OV0_COLOUR_CNTL,0x00101000UL); /* Default brightness and saturation for Rage128 */
+  besr.saturation = 0x0F;
+  besr.brightness = 0;
+  OUTREG(OV0_COLOUR_CNTL,0x000F0F00UL); /* Default brihgtness and saturation for Rage128 */
 #else
   make_default_gamma_correction();
 #endif
@@ -794,133 +1213,12 @@
   besr.ckey_cntl = VIDEO_KEY_FN_TRUE|GRAPHIC_KEY_FN_TRUE|CMP_MIX_AND;
 }
 
-static unsigned short ati_card_ids[] = 
-{
-#ifdef RAGE128
- /*
-    This driver should be compatible with Rage128 (pro) chips.
-    (include adaptive deinterlacing!!!).
-    Moreover: the same logic can be used with Mach64 chips.
-    (I mean: mach64xx, 3d rage, 3d rage IIc, 3D rage pro, 3d rage mobility).
-    but they are incompatible by i/o ports. So if enthusiasts will want
-    then they can redefine OUTREG and INREG macros and redefine OV0_*
-    constants. Also it seems that mach64 chips supports only: YUY2, YV12, UYVY
-    fourccs (422 and 420 formats only).
-  */
-/* Rage128 Pro GL */
- DEVICE_ATI_RAGE_128_PA_PRO,
- DEVICE_ATI_RAGE_128_PB_PRO,
- DEVICE_ATI_RAGE_128_PC_PRO,
- DEVICE_ATI_RAGE_128_PD_PRO,
- DEVICE_ATI_RAGE_128_PE_PRO,
- DEVICE_ATI_RAGE_128_PF_PRO,
-/* Rage128 Pro VR */
- DEVICE_ATI_RAGE_128_PG_PRO,
- DEVICE_ATI_RAGE_128_PH_PRO,
- DEVICE_ATI_RAGE_128_PI_PRO,
- DEVICE_ATI_RAGE_128_PJ_PRO,
- DEVICE_ATI_RAGE_128_PK_PRO,
- DEVICE_ATI_RAGE_128_PL_PRO,
- DEVICE_ATI_RAGE_128_PM_PRO,
- DEVICE_ATI_RAGE_128_PN_PRO,
- DEVICE_ATI_RAGE_128_PO_PRO,
- DEVICE_ATI_RAGE_128_PP_PRO,
- DEVICE_ATI_RAGE_128_PQ_PRO,
- DEVICE_ATI_RAGE_128_PR_PRO,
- DEVICE_ATI_RAGE_128_PS_PRO,
- DEVICE_ATI_RAGE_128_PT_PRO,
- DEVICE_ATI_RAGE_128_PU_PRO,
- DEVICE_ATI_RAGE_128_PV_PRO,
- DEVICE_ATI_RAGE_128_PW_PRO,
- DEVICE_ATI_RAGE_128_PX_PRO,
-/* Rage128 GL */
- DEVICE_ATI_RAGE_128_RE_SG,
- DEVICE_ATI_RAGE_128_RF_SG,
- DEVICE_ATI_RAGE_128_RG,
- DEVICE_ATI_RAGE_128_RK_VR,
- DEVICE_ATI_RAGE_128_RL_VR,
- DEVICE_ATI_RAGE_128_SE_4X,
- DEVICE_ATI_RAGE_128_SF_4X,
- DEVICE_ATI_RAGE_128_SG_4X,
- DEVICE_ATI_RAGE_128_SH,
- DEVICE_ATI_RAGE_128_SK_4X,
- DEVICE_ATI_RAGE_128_SL_4X,
- DEVICE_ATI_RAGE_128_SM_4X,
- DEVICE_ATI_RAGE_128_4X,
- DEVICE_ATI_RAGE_128_PRO,
- DEVICE_ATI_RAGE_128_PRO2,
- DEVICE_ATI_RAGE_128_PRO3,
-/* these seem to be based on rage 128 instead of mach64 */
- DEVICE_ATI_RAGE_MOBILITY_M3,
- DEVICE_ATI_RAGE_MOBILITY_M32
-#else
-/* Radeons (indeed: Rage 256 Pro ;) */
- DEVICE_ATI_RADEON_R100_QD,
- DEVICE_ATI_RADEON_R100_QE,
- DEVICE_ATI_RADEON_R100_QF,
- DEVICE_ATI_RADEON_R100_QG,
- DEVICE_ATI_RADEON_RV100_QY,
- DEVICE_ATI_RADEON_RV100_QZ,
- DEVICE_ATI_RADEON_MOBILITY_M7,
- DEVICE_ATI_RADEON_RV200_LX,
- DEVICE_ATI_RADEON_MOBILITY_M6,
- DEVICE_ATI_RADEON_MOBILITY_M62,
- DEVICE_ATI_RADEON_MOBILITY_U1,
- DEVICE_ATI_R200_BB_RADEON,
- DEVICE_ATI_RADEON_R200_QH,
- DEVICE_ATI_RADEON_R200_QI,
- DEVICE_ATI_RADEON_R200_QJ,
- DEVICE_ATI_RADEON_R200_QK,
- DEVICE_ATI_RADEON_R200_QL,
- DEVICE_ATI_RADEON_R200_QM,
- DEVICE_ATI_RADEON_R200_QH2,
- DEVICE_ATI_RADEON_R200_QI2,
- DEVICE_ATI_RADEON_R200_QJ2,
- DEVICE_ATI_RADEON_R200_QK2,
- DEVICE_ATI_RADEON_RV200_QW,
- DEVICE_ATI_RADEON_RV200_QX,
- DEVICE_ATI_RADEON_RV250_ID,
- DEVICE_ATI_RADEON_RV250_IE,
- DEVICE_ATI_RADEON_RV250_IF,
- DEVICE_ATI_RADEON_RV250_IG,
- DEVICE_ATI_RADEON_R250_LD,
- DEVICE_ATI_RADEON_R250_LE,
- DEVICE_ATI_RADEON_R250_MOBILITY,
- DEVICE_ATI_RADEON_R250_LG,
- DEVICE_ATI_RV370_5B60_RADEON,
- DEVICE_ATI_M9_5C61_RADEON,
- DEVICE_ATI_M9_5C63_RADEON,
- DEVICE_ATI_RV280_RADEON_9200,
- DEVICE_ATI_RV280_RADEON_92002,
- DEVICE_ATI_RV280_RADEON_92003,
- DEVICE_ATI_RV280_RADEON_92004,
- DEVICE_ATI_RV280_RADEON_92005,
- DEVICE_ATI_RV280_RADEON_92006,
- DEVICE_ATI_RADEON_R300_ND,
- DEVICE_ATI_RADEON_R300_NE,
- DEVICE_ATI_RV350_NF_RADEON,
- DEVICE_ATI_RADEON_R300_NG,
- DEVICE_ATI_R300_AE_RADEON,
- DEVICE_ATI_R300_AF_RADEON,
- DEVICE_ATI_RV350_AP_RADEON,
- DEVICE_ATI_RV350_AQ_RADEON,
- DEVICE_ATI_RV350_AR_RADEON,
- DEVICE_ATI_RV350_AS_RADEON,
- DEVICE_ATI_R350_AH_RADEON,
- DEVICE_ATI_R350_AI_RADEON,
- DEVICE_ATI_RADEON_R350_RADEON2,
- DEVICE_ATI_RV350_NJ_RADEON,
- DEVICE_ATI_RV350_MOBILITY_RADEON,
- DEVICE_ATI_RV350_MOBILITY_RADEON2
-#endif
-};
-
 static int find_chip(unsigned chip_id)
 {
   unsigned i;
-  for(i = 0;i < sizeof(ati_card_ids)/sizeof(unsigned short);i++)
+  for(i = 0;i < sizeof(ati_card_ids)/sizeof(ati_card_ids_t);i++)
   {
-    if(chip_id == ati_card_ids[i]) return i;
+    if(chip_id == ati_card_ids[i].id) return i;
   }
   return -1;
 }
@@ -949,8 +1247,7 @@
     { 0, 0, 0, 0}
 };
 
-#ifndef RAGE128
-#ifdef HAVE_X11
+#if !defined(RAGE128) && defined(HAVE_X11)
 static void probe_fireGL_driver(void) {
   Display *dp = XOpenDisplay ((void*)0);
   int n = 0;
@@ -972,16 +1269,15 @@
       firegl_shift = 0x500000;
       if (!ext_fglrx) {
         printf(", but DRI seems not to be activated\n");
-        printf(RADEON_MSG" Output may not work correctly, check your DRI configuration!");
+        printf(RADEON_MSG" Output may not work correctly, check your DRI configration!");
       }
       printf("\n");
     }
   }
 }
 #endif
-#endif
 
-static int radeon_probe( int verbose,int force )
+static int radeon_probe(int verbose, int force)
 {
   pciinfo_t lst[MAX_PCI_DEVICES];
   unsigned i,num_pci;
@@ -1000,121 +1296,19 @@
     {
       if(lst[i].vendor == VENDOR_ATI)
       {
-        int idx;
+	int idx;
 	const char *dname;
 	idx = find_chip(lst[i].device);
 	if(idx == -1 && force == PROBE_NORMAL) continue;
 	dname = pci_device_name(VENDOR_ATI,lst[i].device);
 	dname = dname ? dname : "Unknown chip";
 	printf(RADEON_MSG" Found chip: %s\n",dname);
-#if 0
-	if ((lst[i].command & PCI_COMMAND_IO) == 0)
-	{
-		printf("[radeon] Device is disabled, ignoring\n");
-		continue;
-	}
-#endif
-#ifndef RAGE128	
-	if(idx != -1)
-#ifdef HAVE_X11
-	probe_fireGL_driver();
-#endif
-	{
-          switch(ati_card_ids[idx]) {
-            /* Original radeon */
-            case DEVICE_ATI_RADEON_R100_QD:
-            case DEVICE_ATI_RADEON_R100_QE:
-            case DEVICE_ATI_RADEON_R100_QF:
-            case DEVICE_ATI_RADEON_R100_QG:
-              RadeonFamily = 100;
-              break;
-              
-            /* Radeon VE / Radeon Mobility */
-            case DEVICE_ATI_RADEON_RV100_QY:
-            case DEVICE_ATI_RADEON_RV100_QZ:
-            case DEVICE_ATI_RADEON_MOBILITY_M6:
-            case DEVICE_ATI_RADEON_MOBILITY_M62:
-	    case DEVICE_ATI_RADEON_MOBILITY_U1:
-              RadeonFamily = 120;
-              break;
-              
-            /* Radeon 7500 / Radeon Mobility 7500 */
-            case DEVICE_ATI_RADEON_RV200_QW:
-            case DEVICE_ATI_RADEON_RV200_QX: 
-            case DEVICE_ATI_RADEON_MOBILITY_M7:
-            case DEVICE_ATI_RADEON_RV200_LX:
-              RadeonFamily = 150;
-              break;
-              
-            /* Radeon 8500 */
-            case DEVICE_ATI_R200_BB_RADEON:
-            case DEVICE_ATI_RADEON_R200_QH:
-            case DEVICE_ATI_RADEON_R200_QI:
-            case DEVICE_ATI_RADEON_R200_QJ:
-            case DEVICE_ATI_RADEON_R200_QK:
-            case DEVICE_ATI_RADEON_R200_QL:
-            case DEVICE_ATI_RADEON_R200_QM:
-            case DEVICE_ATI_RADEON_R200_QH2:
-            case DEVICE_ATI_RADEON_R200_QI2:
-            case DEVICE_ATI_RADEON_R200_QJ2:
-            case DEVICE_ATI_RADEON_R200_QK2:
-              RadeonFamily = 200;
-              break;
-              
-            /* Radeon 9000 */
-            case DEVICE_ATI_RADEON_RV250_ID:
-            case DEVICE_ATI_RADEON_RV250_IE:
-            case DEVICE_ATI_RADEON_RV250_IF:
-            case DEVICE_ATI_RADEON_RV250_IG:
-            case DEVICE_ATI_RADEON_R250_LD:
-            case DEVICE_ATI_RADEON_R250_LE:
-            case DEVICE_ATI_RADEON_R250_MOBILITY:
-            case DEVICE_ATI_RADEON_R250_LG:
-            case DEVICE_ATI_M9_5C61_RADEON:
-            case DEVICE_ATI_M9_5C63_RADEON:
-              RadeonFamily = 250;
-              break;
-              
-            /* Radeon 9200 */
-            case DEVICE_ATI_RV280_RADEON_9200:
-            case DEVICE_ATI_RV280_RADEON_92002:
-            case DEVICE_ATI_RV280_RADEON_92003:
-            case DEVICE_ATI_RV280_RADEON_92004:
-            case DEVICE_ATI_RV280_RADEON_92005:
-            case DEVICE_ATI_RV280_RADEON_92006:
-              RadeonFamily = 280;
-              break;
-
-            /* Radeon 9700 */
-            case DEVICE_ATI_RADEON_R300_ND:
-            case DEVICE_ATI_RADEON_R300_NE:
-            case DEVICE_ATI_RV350_NF_RADEON:
-            case DEVICE_ATI_RADEON_R300_NG:
-            case DEVICE_ATI_R300_AE_RADEON:
-            case DEVICE_ATI_R300_AF_RADEON:
-              RadeonFamily = 300;
-              break;
-
-            /* Radeon 9600/9800 */
-            case DEVICE_ATI_RV370_5B60_RADEON:
-            case DEVICE_ATI_RV350_AP_RADEON:
-            case DEVICE_ATI_RV350_AQ_RADEON:
-            case DEVICE_ATI_RV350_AR_RADEON:
-            case DEVICE_ATI_RV350_AS_RADEON:
-            case DEVICE_ATI_RADEON_R350_RADEON2:
-            case DEVICE_ATI_R350_AH_RADEON:
-            case DEVICE_ATI_R350_AI_RADEON:
-            case DEVICE_ATI_RV350_NJ_RADEON:
-            case DEVICE_ATI_RV350_MOBILITY_RADEON:
-            case DEVICE_ATI_RV350_MOBILITY_RADEON2:
-              RadeonFamily = 350;
-              break;
-
-            default:
-              break;
-          }
-	}
-#endif
+        if ((lst[i].command & PCI_COMMAND_IO) == 0)
+        {
+          printf("[radeon] Device is disabled, ignoring\n");
+          continue;
+        }
+	memset(&besr,0,sizeof(bes_registers_t));
 	if(force > PROBE_NORMAL)
 	{
 	    printf(RADEON_MSG" Driver was forced. Was found %sknown chip\n",idx == -1 ? "un" : "");
@@ -1124,7 +1318,12 @@
 #else
 		printf(RADEON_MSG" Assuming it as Radeon1\n");
 #endif
+	    besr.chip_flags=R_100|R_OVL_SHIFT;
 	}
+#if !defined(RAGE128) && defined(HAVE_X11)
+        probe_fireGL_driver();
+#endif
+	if(idx != -1) besr.chip_flags=ati_card_ids[idx].flags;
 	def_cap.device_id = lst[i].device;
 	err = 0;
 	memcpy(&pci_info,&lst[i],sizeof(pciinfo_t));
@@ -1137,11 +1336,45 @@
   return err;
 }
 
-static void radeon_vid_dump_regs( void ); /* forward declaration */
+typedef struct saved_regs_s
+{
+    uint32_t ov0_vid_key_clr;
+    uint32_t ov0_vid_key_msk;
+    uint32_t ov0_graphics_key_clr;
+    uint32_t ov0_graphics_key_msk;
+    uint32_t ov0_key_cntl;
+    uint32_t disp_merge_cntl;
+}saved_regs_t;
+static saved_regs_t savreg;
 
-static int radeon_init( void )
+static void save_regs( void )
+{
+    radeon_fifo_wait(6);
+    savreg.ov0_vid_key_clr	= INREG(OV0_VID_KEY_CLR);
+    savreg.ov0_vid_key_msk	= INREG(OV0_VID_KEY_MSK);
+    savreg.ov0_graphics_key_clr = INREG(OV0_GRAPHICS_KEY_CLR);
+    savreg.ov0_graphics_key_msk = INREG(OV0_GRAPHICS_KEY_MSK);
+    savreg.ov0_key_cntl		= INREG(OV0_KEY_CNTL);
+    savreg.disp_merge_cntl	= INREG(DISP_MERGE_CNTL);
+}
+
+static void restore_regs( void )
+{
+    radeon_fifo_wait(6);
+    OUTREG(OV0_VID_KEY_CLR,savreg.ov0_vid_key_clr);
+    OUTREG(OV0_VID_KEY_MSK,savreg.ov0_vid_key_msk);
+    OUTREG(OV0_GRAPHICS_KEY_CLR,savreg.ov0_graphics_key_clr);
+    OUTREG(OV0_GRAPHICS_KEY_MSK,savreg.ov0_graphics_key_msk);
+    OUTREG(OV0_KEY_CNTL,savreg.ov0_key_cntl);
+    OUTREG(DISP_MERGE_CNTL,savreg.disp_merge_cntl);
+}
+
+static int radeon_init(void)
 {
   int err;
+
+  if(__verbose>0) printf("[radeon_vid] version %d\n", VIDIX_VERSION);
+
   if(!probed) 
   {
     printf(RADEON_MSG" Driver was not probed but is being initializing\n");
@@ -1164,102 +1397,38 @@
   /* Rage Mobility (rage128) also has memsize bug */
   if (radeon_ram_size == 0 &&
       (def_cap.device_id == DEVICE_ATI_RAGE_MOBILITY_M3 ||
-       def_cap.device_id == DEVICE_ATI_RAGE_128_RL_VR ||
        def_cap.device_id == DEVICE_ATI_RAGE_MOBILITY_M32))
   {
       printf(RADEON_MSG" Workarounding buggy Rage Mobility M3 (0 vs. 8MB ram)\n");
       radeon_ram_size = 8192*1024;
   }
 #endif
+  if((radeon_mem_base = map_phys_mem(pci_info.base0,radeon_ram_size))==(void *)-1) return ENOMEM;
+  radeon_vid_make_default();
   printf(RADEON_MSG" Video memory = %uMb\n",radeon_ram_size/0x100000);
-#ifdef WIN32
-  //mapping large areas of video ram will fail on windows
-  if(radeon_ram_size > 16*1024*1024)radeon_ram_size=16*1024*1024;
-#endif
-  if((radeon_mem_base = map_phys_mem(pci_info.base0,radeon_ram_size))==(void *)-1) return ENOMEM;
-  memset(&besr,0,sizeof(bes_registers_t));
-  radeon_vid_make_default();
   err = mtrr_set_type(pci_info.base0,radeon_ram_size,MTRR_TYPE_WRCOMB);
   if(!err) printf(RADEON_MSG" Set write-combining type of video memory\n");
-
-  radeon_fifo_wait(3);
-  SAVED_OV0_GRAPHICS_KEY_CLR = INREG(OV0_GRAPHICS_KEY_CLR);
-  SAVED_OV0_GRAPHICS_KEY_MSK = INREG(OV0_GRAPHICS_KEY_MSK);
-  SAVED_OV0_VID_KEY_CLR = INREG(OV0_VID_KEY_CLR);
-  SAVED_OV0_VID_KEY_MSK = INREG(OV0_VID_KEY_MSK);
-  SAVED_OV0_KEY_CNTL = INREG(OV0_KEY_CNTL);
-  printf(RADEON_MSG" Saved overlay colorkey settings\n");
-
-#ifdef RADEON
-  switch(RadeonFamily)
-    {
-    case 100:
-    case 120:
-    case 150:
-    case 250:
-    case 280:
-      is_shift_required=1;
-      break;
-    default:
-      break;
-    }
+#ifndef RAGE128
+  {
+    memset(&rinfo,0,sizeof(rinfo_t));
+    if((besr.chip_flags&R_100) != R_100) rinfo.hasCRTC2 = 1;
+    
+    radeon_get_moninfo(&rinfo);
+	if(rinfo.hasCRTC2) {
+	    printf(RADEON_MSG" DVI port has %s monitor connected\n",GET_MON_NAME(rinfo.dviDispType));
+	    printf(RADEON_MSG" CRT port has %s monitor connected\n",GET_MON_NAME(rinfo.crtDispType));
+	}
+	else
+	    printf(RADEON_MSG" CRT port has %s monitor connected\n",GET_MON_NAME(rinfo.crtDispType));
+  }
 #endif
-
-/* XXX: hack, but it works for me (tm) */
-#ifdef WORDS_BIGENDIAN
-#if defined(RAGE128) 
-    /* code from gatos */
-    {
-	SAVED_CONFIG_CNTL = INREG(CONFIG_CNTL);
-	OUTREG(CONFIG_CNTL, SAVED_CONFIG_CNTL &
-	    ~(APER_0_BIG_ENDIAN_16BPP_SWAP|APER_0_BIG_ENDIAN_32BPP_SWAP));
-	    
-//	printf("saved: %x, current: %x\n", SAVED_CONFIG_CNTL,
-//	    INREG(CONFIG_CNTL));
-    }
-#else
-    /*code from radeon_video.c*/
-    {
-    	SAVED_CONFIG_CNTL = INREG(RADEON_SURFACE_CNTL);
-/*	OUTREG(RADEON_SURFACE_CNTL, (SAVED_CONFIG_CNTL |
-		RADEON_NONSURF_AP0_SWP_32BPP) & ~RADEON_NONSURF_AP0_SWP_16BPP);
-*/
-	OUTREG(RADEON_SURFACE_CNTL, SAVED_CONFIG_CNTL & ~(RADEON_NONSURF_AP0_SWP_32BPP
-						   | RADEON_NONSURF_AP0_SWP_16BPP));
-
-/*
-	OUTREG(RADEON_SURFACE_CNTL, (SAVED_CONFIG_CNTL | RADEON_NONSURF_AP0_SWP_32BPP)
-				    & ~RADEON_NONSURF_AP0_SWP_16BPP);
-*/
-    }
-#endif
-#endif
-
-  if(__verbose > 1) radeon_vid_dump_regs();
+  save_regs();
   return 0;  
 }
 
-static void radeon_destroy( void )
+static void radeon_destroy(void)
 {
-  /* remove colorkeying */
-  radeon_fifo_wait(3);
-  OUTREG(OV0_GRAPHICS_KEY_CLR, SAVED_OV0_GRAPHICS_KEY_CLR);
-  OUTREG(OV0_GRAPHICS_KEY_MSK, SAVED_OV0_GRAPHICS_KEY_MSK);
-  OUTREG(OV0_VID_KEY_CLR, SAVED_OV0_VID_KEY_CLR);
-  OUTREG(OV0_VID_KEY_MSK, SAVED_OV0_VID_KEY_MSK);
-  OUTREG(OV0_KEY_CNTL, SAVED_OV0_KEY_CNTL);
-  printf(RADEON_MSG" Restored overlay colorkey settings\n");
-
-#ifdef WORDS_BIGENDIAN
-#if defined(RAGE128)
-    OUTREG(CONFIG_CNTL, SAVED_CONFIG_CNTL);
-//    printf("saved: %x, restored: %x\n", SAVED_CONFIG_CNTL,
-//	INREG(CONFIG_CNTL));
-#else
-    OUTREG(RADEON_SURFACE_CNTL, SAVED_CONFIG_CNTL);
-#endif
-#endif
-
+  restore_regs();
   unmap_phys_mem(radeon_mem_base,radeon_ram_size);
   unmap_phys_mem(radeon_mmio_base,0xFFFF);
 }
@@ -1271,26 +1440,42 @@
 }
 
 /*
-  Full list of fourcc which are supported by Win2K redeon driver:
+  Full list of fourcc which are supported by Win2K radeon driver:
   YUY2, UYVY, DDES, OGLT, OGL2, OGLS, OGLB, OGNT, OGNZ, OGNS,
   IF09, YVU9, IMC4, M2IA, IYUV, VBID, DXT1, DXT2, DXT3, DXT4, DXT5
 */
-static uint32_t supported_fourcc[] = 
+typedef struct fourcc_desc_s
+{
+    uint32_t fourcc;
+    unsigned max_srcw;
+}fourcc_desc_t;
+
+static fourcc_desc_t supported_fourcc[] = 
 {
-  IMGFMT_Y800, IMGFMT_Y8, IMGFMT_YVU9, IMGFMT_IF09,
-  IMGFMT_YV12, IMGFMT_I420, IMGFMT_IYUV, 
-  IMGFMT_UYVY, IMGFMT_YUY2, IMGFMT_YVYU,
-  IMGFMT_RGB15, IMGFMT_BGR15,
-  IMGFMT_RGB16, IMGFMT_BGR16,
-  IMGFMT_RGB32, IMGFMT_BGR32
+  { IMGFMT_Y800, 1567 },
+  { IMGFMT_YVU9, 1567 },
+  { IMGFMT_IF09, 1567 },
+  { IMGFMT_YV12, 1567 },
+  { IMGFMT_I420, 1567 },
+  { IMGFMT_IYUV, 1567 }, 
+  { IMGFMT_UYVY, 1551 },
+  { IMGFMT_YUY2, 1551 },
+  { IMGFMT_YVYU, 1551 },
+  { IMGFMT_RGB15, 1551 },
+  { IMGFMT_BGR15, 1551 },
+  { IMGFMT_RGB16, 1551 },
+  { IMGFMT_BGR16, 1551 },
+  { IMGFMT_RGB32, 775 },
+  { IMGFMT_BGR32, 775 }
 };
 
-inline static int is_supported_fourcc(uint32_t fourcc)
+__inline__ static int is_supported_fourcc(uint32_t fourcc)
 {
-  unsigned int i;
-  for(i=0;i<sizeof(supported_fourcc)/sizeof(uint32_t);i++)
+  unsigned i;
+  for(i=0;i<sizeof(supported_fourcc)/sizeof(fourcc_desc_t);i++)
   {
-    if(fourcc==supported_fourcc[i]) return 1;
+    if(fourcc==supported_fourcc[i].fourcc)
+      return 1;
   }
   return 0;
 }
@@ -1304,13 +1489,15 @@
 		    VID_DEPTH_12BPP| VID_DEPTH_15BPP|
 		    VID_DEPTH_16BPP| VID_DEPTH_24BPP|
 		    VID_DEPTH_32BPP;
-	to->flags = VID_CAP_EXPAND | VID_CAP_SHRINK | VID_CAP_COLORKEY;
+	to->flags = VID_CAP_EXPAND | VID_CAP_SHRINK | VID_CAP_COLORKEY |
+		    VID_CAP_BLEND;
 	return 0;
     }
     else  to->depth = to->flags = 0;
     return ENOSYS;
 }
 
+static double H_scale_ratio;
 static void radeon_vid_dump_regs( void )
 {
   size_t i;
@@ -1320,7 +1507,7 @@
   printf(RADEON_MSG"radeon_overlay_off=%08X\n",radeon_overlay_off);
   printf(RADEON_MSG"radeon_ram_size=%08X\n",radeon_ram_size);
   printf(RADEON_MSG"video mode: %ux%u@%u\n",radeon_get_xres(),radeon_get_yres(),radeon_vid_get_dbpp());
-  printf(RADEON_MSG"flatpanel size: %ux%u\n",radeon_get_fp_xres(),radeon_get_fp_yres());
+  printf(RADEON_MSG"H_scale_ratio=%8.2f\n",H_scale_ratio);
   printf(RADEON_MSG"*** Begin of OV0 registers dump ***\n");
   for(i=0;i<sizeof(vregs)/sizeof(video_registers_t);i++)
 	printf(RADEON_MSG"%s = %08X\n",vregs[i].sname,INREG(vregs[i].name));
@@ -1334,55 +1521,33 @@
     OUTREG(OV0_EXCLUSIVE_HORZ, 0);
     OUTREG(OV0_AUTO_FLIP_CNTL, 0);   /* maybe */
     OUTREG(OV0_FILTER_CNTL, FILTER_HARDCODED_COEF);
-#ifdef RADEON
+#ifdef RAGE128    
+    OUTREG(OV0_KEY_CNTL, GRAPHIC_KEY_FN_NE);
+#else
     OUTREG(OV0_KEY_CNTL, GRAPHIC_KEY_FN_EQ);
-#else
-    OUTREG(OV0_KEY_CNTL, GRAPHIC_KEY_FN_NE);
 #endif
     OUTREG(OV0_TEST, 0);
 }
 
 static void radeon_vid_display_video( void )
 {
-    int bes_flags;
-    /** workaround for Xorg-6.8 not saving the surface registers on bigendian architectures */
-#ifdef WORDS_BIGENDIAN
-#if defined(RAGE128) 
-    /* code from gatos */
-    {
-	SAVED_CONFIG_CNTL = INREG(CONFIG_CNTL);
-	OUTREG(CONFIG_CNTL, SAVED_CONFIG_CNTL &
-	    ~(APER_0_BIG_ENDIAN_16BPP_SWAP|APER_0_BIG_ENDIAN_32BPP_SWAP));
-	    
-//	printf("saved: %x, current: %x\n", SAVED_CONFIG_CNTL,
-//	    INREG(CONFIG_CNTL));
-    }
-#else
-    /*code from radeon_video.c*/
-    {
-    	SAVED_CONFIG_CNTL = INREG(RADEON_SURFACE_CNTL);
-/*	OUTREG(RADEON_SURFACE_CNTL, (SAVED_CONFIG_CNTL |
-		RADEON_NONSURF_AP0_SWP_32BPP) & ~RADEON_NONSURF_AP0_SWP_16BPP);
-*/
-	OUTREG(RADEON_SURFACE_CNTL, SAVED_CONFIG_CNTL & ~(RADEON_NONSURF_AP0_SWP_32BPP
-						   | RADEON_NONSURF_AP0_SWP_16BPP));
-
-/*
-	OUTREG(RADEON_SURFACE_CNTL, (SAVED_CONFIG_CNTL | RADEON_NONSURF_AP0_SWP_32BPP)
-				    & ~RADEON_NONSURF_AP0_SWP_16BPP);
-*/
-    }
-#endif
-#endif
-
-
- 
+    int bes_flags,force_second;
     radeon_fifo_wait(2);
     OUTREG(OV0_REG_LOAD_CNTL,		REG_LD_CTL_LOCK);
     radeon_engine_idle();
     while(!(INREG(OV0_REG_LOAD_CNTL)&REG_LD_CTL_LOCK_READBACK));
     radeon_fifo_wait(15);
 
+    force_second=0;
+#if 0 /* Warning: for now we have black screen only! :( */
+#ifndef RAGE128
+    if(rinfo.hasCRTC2 && 
+       (rinfo.dviDispType == MT_CTV || rinfo.dviDispType == MT_STV))
+    {
+	force_second=1;
+    }
+#endif
+#endif
     /* Shutdown capturing */
     OUTREG(FCP_CNTL, FCP_CNTL__GND);
     OUTREG(CAP0_TRIG_CNTL, 0);
@@ -1394,9 +1559,9 @@
 
     if(besr.deinterlace_on) OUTREG(OV0_DEINTERLACE_PATTERN,besr.deinterlace_pattern);
 #ifdef RAGE128
-    OUTREG(OV0_COLOUR_CNTL, (((besr.brightness*64)/1000) & 0x7f) |
-                            (((besr.saturation*31+31000)/2000) << 8) |
-                            (((besr.saturation*31+31000)/2000) << 16));
+    OUTREG(OV0_COLOUR_CNTL, (besr.brightness & 0x7f) |
+			    (besr.saturation << 8) |
+			    (besr.saturation << 16));
 #endif
     radeon_fifo_wait(2);
     OUTREG(OV0_GRAPHICS_KEY_MSK, besr.graphics_key_msk);
@@ -1405,8 +1570,16 @@
 
     OUTREG(OV0_H_INC,			besr.h_inc);
     OUTREG(OV0_STEP_BY,			besr.step_by);
-    OUTREG(OV0_Y_X_START,		besr.y_x_start);
-    OUTREG(OV0_Y_X_END,			besr.y_x_end);
+    if(force_second)
+    {
+	OUTREG(OV1_Y_X_START,		besr.y_x_start);
+	OUTREG(OV1_Y_X_END,		besr.y_x_end);
+    }
+    else
+    {
+	OUTREG(OV0_Y_X_START,		besr.y_x_start);
+	OUTREG(OV0_Y_X_END,		besr.y_x_end);
+    }
     OUTREG(OV0_V_INC,			besr.v_inc);
     OUTREG(OV0_P1_BLANK_LINES_AT_TOP,	besr.p1_blank_lines_at_top);
     OUTREG(OV0_P23_BLANK_LINES_AT_TOP,	besr.p23_blank_lines_at_top);
@@ -1430,55 +1603,57 @@
     OUTREG(OV0_P23_H_ACCUM_INIT,	besr.p23_h_accum_init);
     OUTREG(OV0_P23_V_ACCUM_INIT,	besr.p23_v_accum_init);
 
-#ifdef RADEON
     bes_flags = SCALER_ENABLE |
-                SCALER_SMART_SWITCH;
-//		SCALER_HORZ_PICK_NEAREST |
-//		SCALER_VERT_PICK_NEAREST |
-#endif
-    bes_flags = SCALER_ENABLE |
-                SCALER_SMART_SWITCH |
+		SCALER_SMART_SWITCH |
 		SCALER_Y2R_TEMP |
 		SCALER_PIX_EXPAND;
     if(besr.double_buff) bes_flags |= SCALER_DOUBLE_BUFFER;
     if(besr.deinterlace_on) bes_flags |= SCALER_ADAPTIVE_DEINT;
+    if(besr.horz_pick_nearest) bes_flags |= SCALER_HORZ_PICK_NEAREST;
+    if(besr.vert_pick_nearest) bes_flags |= SCALER_VERT_PICK_NEAREST;
 #ifdef RAGE128
     bes_flags |= SCALER_BURST_PER_PLANE;
 #endif
-    switch(besr.fourcc)
-    {
-        case IMGFMT_RGB15:
-        case IMGFMT_BGR15: bes_flags |= SCALER_SOURCE_15BPP; break;
-	case IMGFMT_RGB16:
-	case IMGFMT_BGR16: bes_flags |= SCALER_SOURCE_16BPP; break;
-/*
-        case IMGFMT_RGB24:
-        case IMGFMT_BGR24: bes_flags |= SCALER_SOURCE_24BPP; break;
-*/
-        case IMGFMT_RGB32:
-	case IMGFMT_BGR32: bes_flags |= SCALER_SOURCE_32BPP; break;
-        /* 4:1:0 */
-	case IMGFMT_IF09:
-        case IMGFMT_YVU9:  bes_flags |= SCALER_SOURCE_YUV9; break;
-	/* 4:0:0 */
-	case IMGFMT_Y800:
-	case IMGFMT_Y8:
-        /* 4:2:0 */
-	case IMGFMT_IYUV:
-	case IMGFMT_I420:
-	case IMGFMT_YV12:  bes_flags |= SCALER_SOURCE_YUV12; break;
-        /* 4:2:2 */
-        case IMGFMT_YVYU:
-	case IMGFMT_UYVY:  bes_flags |= SCALER_SOURCE_YVYU422; break;
-	case IMGFMT_YUY2:
-	default:           bes_flags |= SCALER_SOURCE_VYUY422; break;
-    }
+    bes_flags |= (besr.surf_id << 8) & SCALER_SURFAC_FORMAT;
+    if(besr.load_prg_start) bes_flags |= SCALER_PRG_LOAD_START;
+    if(force_second)	bes_flags |= SCALER_USE_OV1;
+    else		bes_flags &= ~SCALER_USE_OV1;
     OUTREG(OV0_SCALE_CNTL,		bes_flags);
+    radeon_fifo_wait(6);
+    OUTREG(OV0_FILTER_CNTL,besr.filter_cntl);
+    OUTREG(OV0_FOUR_TAP_COEF_0,besr.four_tap_coeff[0]);
+    OUTREG(OV0_FOUR_TAP_COEF_1,besr.four_tap_coeff[1]);
+    OUTREG(OV0_FOUR_TAP_COEF_2,besr.four_tap_coeff[2]);
+    OUTREG(OV0_FOUR_TAP_COEF_3,besr.four_tap_coeff[3]);
+    OUTREG(OV0_FOUR_TAP_COEF_4,besr.four_tap_coeff[4]);
+    if(besr.swap_uv) OUTREG(OV0_TEST,INREG(OV0_TEST)|OV0_SWAP_UV);
     OUTREG(OV0_REG_LOAD_CNTL,		0);
-    if(__verbose > 1) printf(RADEON_MSG"we wanted: scaler=%08X\n",bes_flags);
-    if(__verbose > 1) radeon_vid_dump_regs();
+    if(__verbose > VERBOSE_LEVEL) printf(RADEON_MSG"we wanted: scaler=%08X\n",bes_flags);
+    if(__verbose > VERBOSE_LEVEL) radeon_vid_dump_regs();
 }
 
+/* Goal of this function: hide RGB background and provide black screen around movie.
+   Useful in '-vo fbdev:vidix -fs -zoom' mode.
+   Reverse effect to colorkey */
+#ifdef RAGE128
+static void radeon_vid_exclusive( void )
+{
+/* this function works only with Rage128.
+   Radeon should has something the same */
+    unsigned screenw,screenh;
+    screenw = radeon_get_xres();
+    screenh = radeon_get_yres();
+    radeon_fifo_wait(2);
+    OUTREG(OV0_EXCLUSIVE_VERT,(((screenh-1)<<16)&EXCL_VERT_END_MASK));
+    OUTREG(OV0_EXCLUSIVE_HORZ,(((screenw/8+1)<<8)&EXCL_HORZ_END_MASK)|EXCL_HORZ_EXCLUSIVE_EN);
+}
+
+static void radeon_vid_non_exclusive( void )
+{
+    OUTREG(OV0_EXCLUSIVE_HORZ,0);
+}
+#endif
+
 static unsigned radeon_query_pitch(unsigned fourcc,const vidix_yuv_t *spitch)
 {
   unsigned pitch,spy,spv,spu;
@@ -1519,10 +1694,9 @@
 		if(spy > 16 && spu == spy/2 && spv == spy/2)	pitch = spy;
 		else						pitch = 32;
 		break;
-	/* 4:1:0 */
 	case IMGFMT_IF09:
 	case IMGFMT_YVU9:
-		if(spy > 32 && spu == spy/4 && spv == spy/4)	pitch = spy;
+		if(spy >= 64 && spu == spy/4 && spv == spy/4)	pitch = spy;
 		else						pitch = 64;
 		break;
 	default:
@@ -1533,41 +1707,971 @@
   return pitch;
 }
 
+static void Calc_H_INC_STEP_BY (
+	int fieldvalue_OV0_SURFACE_FORMAT,
+	double H_scale_ratio,
+	int DisallowFourTapVertFiltering,
+	int DisallowFourTapUVVertFiltering,
+	uint32_t *val_OV0_P1_H_INC,
+	uint32_t *val_OV0_P1_H_STEP_BY,
+	uint32_t *val_OV0_P23_H_INC,
+	uint32_t *val_OV0_P23_H_STEP_BY,
+	int *P1GroupSize,
+	int *P1StepSize,
+	int *P23StepSize )
+{
+
+    double ClocksNeededFor16Pixels;
+
+    switch (fieldvalue_OV0_SURFACE_FORMAT)
+    {
+	case 3:
+	case 4: /*16BPP (ARGB1555 and RGB565) */
+	    /* All colour components are fetched in pairs */
+	    *P1GroupSize = 2;
+	    /* We don't support four tap in this mode because G's are split between two bytes. In theory we could support it if */
+	    /* we saved part of the G when fetching the R, and then filter the G, followed by the B in the following cycles. */
+	    if (H_scale_ratio>=.5)
+	    {
+		/* We are actually generating two pixels (but 3 colour components) per tick. Thus we don't have to skip */
+		/* until we reach .5. P1 and P23 are the same. */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 1;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 1;
+		*P1StepSize = 1;
+		*P23StepSize = 1;
+	    }
+	    else if (H_scale_ratio>=.25)
+	    {
+		/* Step by two */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 2;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 2;
+		*P1StepSize = 2;
+		*P23StepSize = 2;
+	    }
+	    else if (H_scale_ratio>=.125)
+	    {
+		/* Step by four */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 3;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 3;
+		*P1StepSize = 4;
+		*P23StepSize = 4;
+	    }
+	    else if (H_scale_ratio>=.0625)
+	    {
+		/* Step by eight */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 4;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 4;
+		*P1StepSize = 8;
+		*P23StepSize = 8;
+	    }
+	    else if (H_scale_ratio>=0.03125)
+	    {
+		/* Step by sixteen */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 5;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 5;
+		*P1StepSize = 16;
+		*P23StepSize = 16;
+	    }
+	    else
+	    {
+		H_scale_ratio=0.03125;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 5;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 5;
+		*P1StepSize = 16;
+		*P23StepSize = 16;
+	    }
+	    break;
+	case 6: /*32BPP RGB */
+	    if (H_scale_ratio>=1.5 && !DisallowFourTapVertFiltering)
+	    {
+		/* All colour components are fetched in pairs */
+		*P1GroupSize = 2;
+		/* With four tap filtering, we can generate two colour components every clock, or two pixels every three */
+		/* clocks. This means that we will have four tap filtering when scaling 1.5 or more. */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 0;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 0;
+		*P1StepSize = 1;
+		*P23StepSize = 1;
+	    }
+	    else if (H_scale_ratio>=0.75)
+	    {
+		/* Four G colour components are fetched at once */
+		*P1GroupSize = 4;
+		/* R and B colour components are fetched in pairs */
+		/* With two tap filtering, we can generate four colour components every clock. */
+		/* This means that we will have two tap filtering when scaling 1.0 or more. */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 1;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 1;
+		*P1StepSize = 1;
+		*P23StepSize = 1;
+	    }
+	    else if (H_scale_ratio>=0.375)
+	    {
+		/* Step by two. */
+		/* Four G colour components are fetched at once */
+		*P1GroupSize = 4;
+		/* R and B colour components are fetched in pairs */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 2;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 2;
+		*P1StepSize = 2;
+		*P23StepSize = 2;
+	    }
+	    else if (H_scale_ratio>=0.25)
+	    {
+		/* Step by two. */
+		/* Four G colour components are fetched at once */
+		*P1GroupSize = 4;
+		/* R and B colour components are fetched in pairs */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 2;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 3;
+		*P1StepSize = 2;
+		*P23StepSize = 4;
+	    }
+	    else if (H_scale_ratio>=0.1875)
+	    {
+		/* Step by four */
+		/* Four G colour components are fetched at once */
+		*P1GroupSize = 4;
+		/* R and B colour components are fetched in pairs */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 3;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 3;
+		*P1StepSize = 4;
+		*P23StepSize = 4;
+	    }
+	    else if (H_scale_ratio>=0.125)
+	    {
+		/* Step by four */
+		/* Four G colour components are fetched at once */
+		*P1GroupSize = 4;
+		/* R and B colour components are fetched in pairs */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 3;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 4;
+		*P1StepSize = 4;
+		*P23StepSize = 8;
+	    }
+	    else if (H_scale_ratio>=0.09375)
+	    {
+		/* Step by eight */
+		/* Four G colour components are fetched at once */
+		*P1GroupSize = 4;
+		/* R and B colour components are fetched in pairs */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 4;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 4;
+		*P1StepSize = 8;
+		*P23StepSize = 8;
+	    }
+	    else if (H_scale_ratio>=0.0625)
+	    {
+		/* Step by eight */
+		/* Four G colour components are fetched at once */
+		*P1GroupSize = 4;
+		/* R and B colour components are fetched in pairs */
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 5;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 5;
+		*P1StepSize = 16;
+		*P23StepSize = 16;
+	    }
+	    else
+	    {
+		H_scale_ratio=0.0625;
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 5;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 5;
+		*P1StepSize = 16;
+		*P23StepSize = 16;
+	    }
+	    break;
+	case 9:
+	    /*ToDo_Active: In mode 9 there is a possibility that HScale ratio may be set to an illegal value, so we have extra conditions in the if statement. For consistancy, these conditions be added to the other modes as well. */
+	    /* four tap on both (unless Y is too wide) */
+	    if ((H_scale_ratio>=(ClocksNeededFor16Pixels=8+2+2) / 16.0) &&
+	       ((uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5)<=0x3000) &&
+	       ((uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5)<=0x2000) &&
+	       !DisallowFourTapVertFiltering && !DisallowFourTapUVVertFiltering)
+	    {	/*0.75 */
+		/* Colour components are fetched in pairs */
+		*P1GroupSize = 2;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 0;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 0;
+		*P1StepSize = 1;
+		*P23StepSize = 1;
+	    }
+	    /* two tap on Y (because it is too big for four tap), four tap on UV */
+	    else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=4+2+2) / 16.0) &&
+		    ((uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5)<=0x3000) &&
+		    ((uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5)<=0x2000) &&
+		    DisallowFourTapVertFiltering && !DisallowFourTapUVVertFiltering)
+	    {	/*0.75 */
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 1;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 0;
+		*P1StepSize = 1;
+		*P23StepSize = 1;
+	    }
+	    /* We scale the Y with the four tap filters, but UV's are generated
+	       with dual two tap configuration. */
+	    else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=8+1+1) / 16.0) &&
+		    ((uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5)<=0x3000) &&
+		    ((uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5)<=0x2000) &&
+		    !DisallowFourTapVertFiltering)
+	    {	/*0.625 */
+		*P1GroupSize = 2;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 0;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 1;
+		*P1StepSize = 1;
+		*P23StepSize = 1;
+	    }
+	    /* We scale the Y, U, and V with the two tap filters */
+	    else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=4+1+1) / 16.0) &&
+		    ((uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5)<=0x3000) &&
+		    ((uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5)<=0x2000))
+	    {	/*0.375 */
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 1;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 1;
+		*P1StepSize = 1;
+		*P23StepSize = 1;
+	    }
+	    /* We scale step the U and V by two to allow more bandwidth for fetching Y's,
+	       thus we won't drop Y's yet. */
+	    else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=4+.5+.5) / 16.0) &&
+		    ((uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5)<=0x3000) &&
+		    ((uint16_t)((1/(H_scale_ratio*4*2)) * (1<<0xc) + 0.5)<=0x2000))
+	    {	/*>=0.3125 and >.333333~ */
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 1;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4*2)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 2;
+		*P1StepSize = 1;
+		*P23StepSize = 2;
+	    }
+	    /* We step the Y, U, and V by two. */
+	    else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=2+.5+.5) / 16.0)	&&
+		    ((uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5)<=0x3000) &&
+		    ((uint16_t)((1/(H_scale_ratio*4*2)) * (1<<0xc) + 0.5)<=0x2000))
+	    {
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 2;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4*2)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 2;
+		*P1StepSize = 2;
+		*P23StepSize = 2;
+	    }
+	    /* We step the Y by two and the U and V by four. */
+	    else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=2+.25+.25) / 16.0) &&
+		    ((uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5)<=0x3000) &&
+		    ((uint16_t)((1/(H_scale_ratio*4*4)) * (1<<0xc) + 0.5)<=0x2000))
+	    {
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 2;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 3;
+		*P1StepSize = 2;
+		*P23StepSize = 4;
+	    }
+	    /* We step the Y, U, and V by four. */
+	    else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=1+.25+.25) / 16.0) &&
+		    ((uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5)<=0x3000) &&
+		    ((uint16_t)((1/(H_scale_ratio*4*4)) * (1<<0xc) + 0.5)<=0x2000))
+	    {
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 3;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4*4)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 3;
+		*P1StepSize = 4;
+		*P23StepSize = 4;
+	    }
+	    /* We would like to step the Y by four and the U and V by eight, but we can't mix step by 3 and step by 4 for packed modes */
+
+	    /* We step the Y, U, and V by eight. */
+	    else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=.5+.125+.125) / 16.0) &&
+		    ((uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5)<=0x3000) &&
+		    ((uint16_t)((1/(H_scale_ratio*4*8)) * (1<<0xc) + 0.5)<=0x2000))
+	    {
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 4;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4*8)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 4;
+		*P1StepSize = 8;
+		*P23StepSize = 8;
+	    }
+	    /* We step the Y by eight and the U and V by sixteen. */
+	    else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=.5+.0625+.0625) / 16.0) &&
+	    ((uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5)<=0x3000) &&
+	    ((uint16_t)((1/(H_scale_ratio*4*16)) * (1<<0xc) + 0.5)<=0x2000))
+	    {
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 4;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 5;
+		*P1StepSize = 8;
+		*P23StepSize = 16;
+	    }
+	    /* We step the Y, U, and V by sixteen. */
+	    else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=.25+.0625+.0625) / 16.0) &&
+		    ((uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5)<=0x3000) &&
+		    ((uint16_t)((1/(H_scale_ratio*4*16)) * (1<<0xc) + 0.5)<=0x2000))
+	    {
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 5;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 5;
+		*P1StepSize = 16;
+		*P23StepSize = 16;
+	    }
+	    else
+	    {
+		H_scale_ratio=(ClocksNeededFor16Pixels=.25+.0625+.0625) / 16;
+		*P1GroupSize = 4;
+		*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P1_H_STEP_BY = 5;
+		*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*4*16)) * (1<<0xc) + 0.5);
+		*val_OV0_P23_H_STEP_BY = 5;
+		*P1StepSize = 16;
+		*P23StepSize = 16;
+	    }
+	    break;
+	case 10:
+	case 11:
+	case 12:
+	case 13:
+	case 14:    /* YUV12, VYUY422, YUYV422, YOverPkCRCB12, YWovenWithPkCRCB12 */
+		/* We scale the Y, U, and V with the four tap filters */
+		/* four tap on both (unless Y is too wide) */
+		if ((H_scale_ratio>=(ClocksNeededFor16Pixels=8+4+4) / 16.0) &&
+		    !DisallowFourTapVertFiltering && !DisallowFourTapUVVertFiltering)
+		{	/*0.75 */
+		    *P1GroupSize = 2;
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 0;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 0;
+		    *P1StepSize = 1;
+		    *P23StepSize = 1;
+		}
+		/* two tap on Y (because it is too big for four tap), four tap on UV */
+		else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=4+4+4) / 16.0) &&
+			DisallowFourTapVertFiltering && !DisallowFourTapUVVertFiltering)
+		{   /*0.75 */
+		    *P1GroupSize = 4;
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 1;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 0;
+		    *P1StepSize = 1;
+		    *P23StepSize = 1;
+		}
+		/* We scale the Y with the four tap filters, but UV's are generated
+		   with dual two tap configuration. */
+		else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=8+2+2) / 16.0) &&
+			  !DisallowFourTapVertFiltering)
+		{   /*0.625 */
+		    *P1GroupSize = 2;
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 0;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 1;
+		    *P1StepSize = 1;
+		    *P23StepSize = 1;
+		}
+		/* We scale the Y, U, and V with the two tap filters */
+		else if (H_scale_ratio>=(ClocksNeededFor16Pixels=4+2+2) / 16.0)
+		{   /*0.375 */
+		    *P1GroupSize = 4;
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 1;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 1;
+		    *P1StepSize = 1;
+		    *P23StepSize = 1;
+		}
+		/* We scale step the U and V by two to allow more bandwidth for
+		   fetching Y's, thus we won't drop Y's yet. */
+		else if (H_scale_ratio>=(ClocksNeededFor16Pixels=4+1+1) / 16.0)
+		{   /*0.312 */
+		    *P1GroupSize = 4;
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 1;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2*2)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 2;
+		    *P1StepSize = 1;
+		    *P23StepSize = 2;
+		}
+		/* We step the Y, U, and V by two. */
+		else if (H_scale_ratio>=(ClocksNeededFor16Pixels=2+1+1) / 16.0)
+		{
+		    *P1GroupSize = 4;
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 2;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2*2)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 2;
+		    *P1StepSize = 2;
+		    *P23StepSize = 2;
+		}
+		/* We step the Y by two and the U and V by four. */
+		else if (H_scale_ratio>=(ClocksNeededFor16Pixels=2+.5+.5) / 16.0)
+		{
+		    *P1GroupSize = 4;
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*2)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 2;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2*4)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 3;
+		    *P1StepSize = 2;
+		    *P23StepSize = 4;
+		}
+		/* We step the Y, U, and V by four. */
+		else if (H_scale_ratio>=(ClocksNeededFor16Pixels=1+.5+.5) / 16.0)
+		{
+		    *P1GroupSize = 4;
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 3;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2*4)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 3;
+		    *P1StepSize = 4;
+		    *P23StepSize = 4;
+		}
+		/* We step the Y by four and the U and V by eight. */
+		else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=1+.25+.25) / 16.0) &&
+			 (fieldvalue_OV0_SURFACE_FORMAT==10))
+		{
+		    *P1GroupSize = 4;
+		    /* Can't mix step by 3 and step by 4 for packed modes */
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*4)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 3;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2*8)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 4;
+		    *P1StepSize = 4;
+		    *P23StepSize = 8;
+		}
+		/* We step the Y, U, and V by eight. */
+		else if (H_scale_ratio>=(ClocksNeededFor16Pixels=.5+.25+.25) / 16.0)
+		{
+		    *P1GroupSize = 4;
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 4;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2*8)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 4;
+		    *P1StepSize = 8;
+		    *P23StepSize = 8;
+		}
+		/* We step the Y by eight and the U and V by sixteen. */
+		else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=.5+.125+.125) / 16.0) && (fieldvalue_OV0_SURFACE_FORMAT==10))
+		{
+		    *P1GroupSize = 4;
+		    /* Step by 5 not supported for packed modes */
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 4;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2*16)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 5;
+		    *P1StepSize = 8;
+		    *P23StepSize = 16;
+		}
+		/* We step the Y, U, and V by sixteen. */
+		else if ((H_scale_ratio>=(ClocksNeededFor16Pixels=.25+.125+.125) / 16.0) &&
+			 (fieldvalue_OV0_SURFACE_FORMAT==10))
+		{
+		    *P1GroupSize = 4;
+		    /* Step by 5 not supported for packed modes */
+		    *val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+		    *val_OV0_P1_H_STEP_BY = 5;
+		    *val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2*16)) * (1<<0xc) + 0.5);
+		    *val_OV0_P23_H_STEP_BY = 5;
+		    *P1StepSize = 16;
+		    *P23StepSize = 16;
+		}
+		else
+		{
+		    if (fieldvalue_OV0_SURFACE_FORMAT==10)
+		    {
+			H_scale_ratio=(ClocksNeededFor16Pixels=.25+.125+.125) / 16;
+			*P1GroupSize = 4;
+			*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*16)) * (1<<0xc) + 0.5);
+			*val_OV0_P1_H_STEP_BY = 5;
+			*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2*16)) * (1<<0xc) + 0.5);
+			*val_OV0_P23_H_STEP_BY = 5;
+			*P1StepSize = 16;
+			*P23StepSize = 16;
+		    }
+		    else
+		    {
+			H_scale_ratio=(ClocksNeededFor16Pixels=.5+.25+.25) / 16;
+			*P1GroupSize = 4;
+			*val_OV0_P1_H_INC = (uint16_t)((1/(H_scale_ratio*8)) * (1<<0xc) + 0.5);
+			*val_OV0_P1_H_STEP_BY = 4;
+			*val_OV0_P23_H_INC = (uint16_t)((1/(H_scale_ratio*2*8)) * (1<<0xc) + 0.5);
+			*val_OV0_P23_H_STEP_BY = 4;
+			*P1StepSize = 8;
+			*P23StepSize = 8;
+		    }
+		}
+		break;
+	default:    break;
+
+    }
+    besr.h_inc	 = (*(val_OV0_P1_H_INC)&0x3fff) | ((*(val_OV0_P23_H_INC)&0x3fff)<<16);
+    besr.step_by = (*(val_OV0_P1_H_STEP_BY)&0x7) | ((*(val_OV0_P23_H_STEP_BY)&0x7)<<8);
+}
+
+/* ********************************************************* */
+/* ** Setup Black Bordering */
+/* ********************************************************* */
+
+static void ComputeBorders( vidix_playback_t *config, int VertUVSubSample )
+{
+	double tempBLANK_LINES_AT_TOP;
+	unsigned TopLine,BottomLine,SourceLinesUsed,TopUVLine,BottomUVLine,SourceUVLinesUsed;
+	uint32_t val_OV0_P1_ACTIVE_LINES_M1,val_OV0_P1_BLNK_LN_AT_TOP_M1;
+	uint32_t val_OV0_P23_ACTIVE_LINES_M1,val_OV0_P23_BLNK_LN_AT_TOP_M1;
+
+	if (floor(config->src.y)<0) {
+	    tempBLANK_LINES_AT_TOP = -floor(config->src.y);
+	    TopLine = 0;
+	}
+	else {
+	    tempBLANK_LINES_AT_TOP = 0;
+	    TopLine = (int)floor(config->src.y);
+	}
+	/* Round rSrcBottom up and subtract one */
+	if (ceil(config->src.y+config->src.h) > config->src.h)
+	{
+	    BottomLine = config->src.h - 1;
+	}
+	else
+	{
+	    BottomLine = (int)ceil(config->src.y+config->src.h) - 1;
+	}
+
+	if (BottomLine >= TopLine)
+	{
+	    SourceLinesUsed = BottomLine - TopLine + 1;
+	}
+	else
+	{
+	    /*CYCACC_ASSERT(0, "SourceLinesUsed less than or equal to zero.") */
+	    SourceLinesUsed = 1;
+	}
+
+	{
+	    int SourceHeightInPixels;
+	    SourceHeightInPixels = BottomLine - TopLine + 1;
+	}
+
+	val_OV0_P1_ACTIVE_LINES_M1 = SourceLinesUsed - 1;
+	val_OV0_P1_BLNK_LN_AT_TOP_M1 = ((int)tempBLANK_LINES_AT_TOP-1) & 0xfff;
+
+	TopUVLine = ((int)(config->src.y/VertUVSubSample) < 0)	?  0: (int)(config->src.y/VertUVSubSample);   /* Round rSrcTop down */
+	BottomUVLine = (ceil(((config->src.y+config->src.h)/VertUVSubSample)) > (config->src.h/VertUVSubSample))
+	? (config->src.h/VertUVSubSample)-1 : (u_int)ceil(((config->src.y+config->src.h)/VertUVSubSample))-1;
+
+	if (BottomUVLine >= TopUVLine)
+	{
+	    SourceUVLinesUsed = BottomUVLine - TopUVLine + 1;
+	}
+	else
+	{
+	    /*CYCACC_ASSERT(0, "SourceUVLinesUsed less than or equal to zero.") */
+	    SourceUVLinesUsed = 1;
+	}
+	val_OV0_P23_ACTIVE_LINES_M1 = SourceUVLinesUsed - 1;
+	val_OV0_P23_BLNK_LN_AT_TOP_M1 = ((int)(tempBLANK_LINES_AT_TOP/VertUVSubSample)-1) & 0x7ff;
+	besr.p1_blank_lines_at_top = (val_OV0_P1_BLNK_LN_AT_TOP_M1  & 0xfff) |
+				     ((val_OV0_P1_ACTIVE_LINES_M1   & 0xfff) << 16);
+	besr.p23_blank_lines_at_top = (val_OV0_P23_BLNK_LN_AT_TOP_M1 & 0x7ff) |
+				     ((val_OV0_P23_ACTIVE_LINES_M1   & 0x7ff) << 16);
+}
+
+
+static void ComputeXStartEnd(
+	    int is_400,
+	    uint32_t LeftPixel,uint32_t LeftUVPixel,
+	    uint32_t MemWordsInBytes,uint32_t BytesPerPixel,
+	    uint32_t SourceWidthInPixels, uint32_t P1StepSize,
+	    uint32_t BytesPerUVPixel,uint32_t SourceUVWidthInPixels,
+	    uint32_t P23StepSize, uint32_t *p1_x_start, uint32_t *p2_x_start )
+{
+    uint32_t val_OV0_P1_X_START,val_OV0_P2_X_START,val_OV0_P3_X_START;
+    uint32_t val_OV0_P1_X_END,val_OV0_P2_X_END,val_OV0_P3_X_END;
+    /* ToDo_Active: At the moment we are not using iOV0_VID_BUF?_START_PIX, but instead		// are using iOV0_P?_X_START and iOV0_P?_X_END. We should use "start pix" and	    // "width" to derive the start and end. */
+
+    val_OV0_P1_X_START = (int)LeftPixel % (MemWordsInBytes/BytesPerPixel);
+    val_OV0_P1_X_END = (int)((val_OV0_P1_X_START + SourceWidthInPixels - 1) / P1StepSize) * P1StepSize;
+
+    val_OV0_P2_X_START = val_OV0_P2_X_END = 0;
+    switch (besr.surf_id)
+    {
+	case 9:
+	case 10:
+	case 13:
+	case 14:    /* ToDo_Active: The driver must insure that the initial value is */
+		    /* a multiple of a power of two when decimating */
+		    val_OV0_P2_X_START = (int)LeftUVPixel %
+					    (MemWordsInBytes/BytesPerUVPixel);
+		    val_OV0_P2_X_END = (int)((val_OV0_P2_X_START +
+			      SourceUVWidthInPixels - 1) / P23StepSize) * P23StepSize;
+		    break;
+	case 11:
+	case 12:    val_OV0_P2_X_START = (int)LeftUVPixel % (MemWordsInBytes/(BytesPerPixel*2));
+		    val_OV0_P2_X_END = (int)((val_OV0_P2_X_START + SourceUVWidthInPixels - 1) / P23StepSize) * P23StepSize;
+		    break;
+	case 3:
+	case 4:	    val_OV0_P2_X_START = val_OV0_P1_X_START;
+		    /* This value is needed only to allow proper setting of */
+		    /* val_OV0_PRESHIFT_P23_TO */
+		    /* val_OV0_P2_X_END = 0; */
+		    break;
+	case 6:	    val_OV0_P2_X_START = (int)LeftPixel % (MemWordsInBytes/BytesPerPixel);
+		    val_OV0_P2_X_END = (int)((val_OV0_P1_X_START + SourceWidthInPixels - 1) / P23StepSize) * P23StepSize;
+		    break;
+	default:    /* insert debug statement here. */
+		    RADEON_ASSERT("unknown fourcc\n");
+		    break;
+    }
+    val_OV0_P3_X_START = val_OV0_P2_X_START;
+    val_OV0_P3_X_END = val_OV0_P2_X_END;
+    
+    besr.p1_x_start_end = (val_OV0_P1_X_END&0x7ff) | ((val_OV0_P1_X_START&0x7ff)<<16);
+    besr.p2_x_start_end = (val_OV0_P2_X_END&0x7ff) | ((val_OV0_P2_X_START&0x7ff)<<16);
+    besr.p3_x_start_end = (val_OV0_P3_X_END&0x7ff) | ((val_OV0_P3_X_START&0x7ff)<<16);
+    if(is_400)
+    {
+	besr.p2_x_start_end = 0;
+	besr.p3_x_start_end = 0;
+    }
+    *p1_x_start = val_OV0_P1_X_START;
+    *p2_x_start = val_OV0_P2_X_START;
+}
+
+static void ComputeAccumInit(
+	    uint32_t val_OV0_P1_X_START,uint32_t val_OV0_P2_X_START,
+	    uint32_t val_OV0_P1_H_INC,uint32_t val_OV0_P23_H_INC,
+	    uint32_t val_OV0_P1_H_STEP_BY,uint32_t val_OV0_P23_H_STEP_BY,
+	    uint32_t CRT_V_INC,
+	    uint32_t P1GroupSize, uint32_t P23GroupSize,
+	    uint32_t val_OV0_P1_MAX_LN_IN_PER_LN_OUT,
+	    uint32_t val_OV0_P23_MAX_LN_IN_PER_LN_OUT)
+{
+    uint32_t val_OV0_P1_H_ACCUM_INIT,val_OV0_PRESHIFT_P1_TO;
+    uint32_t val_OV0_P23_H_ACCUM_INIT,val_OV0_PRESHIFT_P23_TO;
+    uint32_t val_OV0_P1_V_ACCUM_INIT,val_OV0_P23_V_ACCUM_INIT;
+	/* 2.5 puts the kernal 50% of the way between the source pixel that is off screen */
+	/* and the first on-screen source pixel. "(float)valOV0_P?_H_INC / (1<<0xc)" is */
+	/* the distance (in source pixel coordinates) to the center of the first */
+	/* destination pixel. Need to add additional pixels depending on how many pixels */
+	/* are fetched at a time and how many pixels in a set are masked. */
+	/* P23 values are always fetched in groups of two or four. If the start */
+	/* pixel does not fall on the boundary, then we need to shift preshift for */
+	/* some additional pixels */
+
+	{
+	    double ExtraHalfPixel;
+	    double tempAdditionalShift;
+	    double tempP1HStartPoint;
+	    double tempP23HStartPoint;
+	    double tempP1Init;
+	    double tempP23Init;
+
+	    if (besr.horz_pick_nearest) ExtraHalfPixel = 0.5;
+	    else			ExtraHalfPixel = 0.0;
+	    tempAdditionalShift = val_OV0_P1_X_START % P1GroupSize + ExtraHalfPixel;
+	    tempP1HStartPoint = tempAdditionalShift + 2.5 + ((float)val_OV0_P1_H_INC / (1<<0xd));
+	    tempP1Init = (double)((int)(tempP1HStartPoint * (1<<0x5) + 0.5)) / (1<<0x5);
+
+	    /* P23 values are always fetched in pairs. If the start pixel is odd, then we */
+	    /* need to shift an additional pixel */
+	    /* Note that if the pitch is a multiple of two, and if we store fields using */
+	    /* the traditional planer format where the V plane and the U plane share the */
+	    /* same pitch, then OverlayRegFields->val_OV0_P2_X_START % P23Group */
+	    /* OverlayRegFields->val_OV0_P3_X_START % P23GroupSize. Either way */
+	    /* it is a requirement that the U and V start on the same polarity byte */
+	    /* (even or odd). */
+	    tempAdditionalShift = val_OV0_P2_X_START % P23GroupSize + ExtraHalfPixel;
+	    tempP23HStartPoint = tempAdditionalShift + 2.5 + ((float)val_OV0_P23_H_INC / (1<<0xd));
+	    tempP23Init = (double)((int)(tempP23HStartPoint * (1<<0x5) + 0.5)) / (1 << 0x5);
+	    val_OV0_P1_H_ACCUM_INIT = (int)((tempP1Init - (int)tempP1Init) * (1<<0x5));
+	    val_OV0_PRESHIFT_P1_TO = (int)tempP1Init;
+	    val_OV0_P23_H_ACCUM_INIT = (int)((tempP23Init - (int)tempP23Init) * (1<<0x5));
+	    val_OV0_PRESHIFT_P23_TO = (int)tempP23Init;
+	}
+
+	/* ************************************************************** */
+	/* ** Calculate values for initializing the vertical accumulators */
+	/* ************************************************************** */
+
+	{
+	    double ExtraHalfLine;
+	    double ExtraFullLine;
+	    double tempP1VStartPoint;
+	    double tempP23VStartPoint;
+
+	    if (besr.vert_pick_nearest) ExtraHalfLine = 0.5;
+	    else			ExtraHalfLine = 0.0;
+
+	    if (val_OV0_P1_H_STEP_BY==0)ExtraFullLine = 1.0;
+	    else			ExtraFullLine = 0.0;
+
+	    tempP1VStartPoint = 1.5 + ExtraFullLine + ExtraHalfLine + ((float)CRT_V_INC / (1<<0xd));
+	    if (tempP1VStartPoint>2.5 + 2*ExtraFullLine)
+	    {
+		tempP1VStartPoint = 2.5 + 2*ExtraFullLine;
+	    }
+	    val_OV0_P1_V_ACCUM_INIT = (int)(tempP1VStartPoint * (1<<0x5) + 0.5);
+
+	    if (val_OV0_P23_H_STEP_BY==0)ExtraFullLine = 1.0;
+	    else			ExtraFullLine = 0.0;
+
+	    switch (besr.surf_id)
+	    {
+		case 10:
+		case 13:
+		case 14:    tempP23VStartPoint = 1.5 + ExtraFullLine + ExtraHalfLine +
+						((float)CRT_V_INC / (1<<0xe));
+			    break;
+		case 9:	    tempP23VStartPoint = 1.5 + ExtraFullLine + ExtraHalfLine +
+						((float)CRT_V_INC / (1<<0xf));
+			    break;
+		case 3:
+		case 4:
+		case 6:
+		case 11:
+		case 12:    tempP23VStartPoint = 0;
+			    break;
+		default:    tempP23VStartPoint = 0xFFFF;/* insert debug statement here */
+			    break;
+	    }
+
+	    if (tempP23VStartPoint>2.5 + 2*ExtraFullLine)
+	    {
+		tempP23VStartPoint = 2.5 + 2*ExtraFullLine;
+	    }
+
+	    val_OV0_P23_V_ACCUM_INIT = (int)(tempP23VStartPoint * (1<<0x5) + 0.5);
+	}
+    besr.p1_h_accum_init = ((val_OV0_P1_H_ACCUM_INIT&0x1f)<<15)  |((val_OV0_PRESHIFT_P1_TO&0xf)<<28);
+    besr.p1_v_accum_init = (val_OV0_P1_MAX_LN_IN_PER_LN_OUT&0x3) |((val_OV0_P1_V_ACCUM_INIT&0x7ff)<<15);
+    besr.p23_h_accum_init= ((val_OV0_P23_H_ACCUM_INIT&0x1f)<<15) |((val_OV0_PRESHIFT_P23_TO&0xf)<<28);
+    besr.p23_v_accum_init= (val_OV0_P23_MAX_LN_IN_PER_LN_OUT&0x3)|((val_OV0_P23_V_ACCUM_INIT&0x3ff)<<15);
+}
+
+typedef struct RangeAndCoefSet {
+    double Range;
+    signed char CoefSet[5][4];
+} RANGEANDCOEFSET;
+
+/* Filter Setup Routine */
+static void FilterSetup ( uint32_t val_OV0_P1_H_INC )
+{
+    static RANGEANDCOEFSET ArrayOfSets[] = {
+	{0.25, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13,   13,    3}, }},
+	{0.26, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.27, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.28, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.29, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.30, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.31, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.32, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.33, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.34, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.35, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.36, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.37, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.38, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.39, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.40, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.41, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.42, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.43, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.44, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.45, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.46, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.47, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.48, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.49, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.50, {{ 7,	16,  9,	 0}, { 7,   16,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 3,	13, 13,	 3}, }},
+	{0.51, {{ 7,	17,  8,	 0}, { 6,   17,	 9,  0}, { 5,	15, 11,	 1}, { 4,   15, 12,  1}, { 2,	14, 14,	 2}, }},
+	{0.52, {{ 7,	17,  8,	 0}, { 6,   17,	 9,  0}, { 5,	16, 11,	 0}, { 3,   15, 13,  1}, { 2,	14, 14,	 2}, }},
+	{0.53, {{ 7,	17,  8,	 0}, { 6,   17,	 9,  0}, { 5,	16, 11,	 0}, { 3,   15, 13,  1}, { 2,	14, 14,	 2}, }},
+	{0.54, {{ 7,	17,  8,	 0}, { 6,   17,	 9,  0}, { 4,	17, 11,	 0}, { 3,   15, 13,  1}, { 2,	14, 14,	 2}, }},
+	{0.55, {{ 7,	18,  7,	 0}, { 6,   17,	 9,  0}, { 4,	17, 11,	 0}, { 3,   15, 13,  1}, { 1,	15, 15,	 1}, }},
+	{0.56, {{ 7,	18,  7,	 0}, { 5,   18,	 9,  0}, { 4,	17, 11,	 0}, { 2,   17, 13,  0}, { 1,	15, 15,	 1}, }},
+	{0.57, {{ 7,	18,  7,	 0}, { 5,   18,	 9,  0}, { 4,	17, 11,	 0}, { 2,   17, 13,  0}, { 1,	15, 15,	 1}, }},
+	{0.58, {{ 7,	18,  7,	 0}, { 5,   18,	 9,  0}, { 4,	17, 11,	 0}, { 2,   17, 13,  0}, { 1,	15, 15,	 1}, }},
+	{0.59, {{ 7,	18,  7,	 0}, { 5,   18,	 9,  0}, { 4,	17, 11,	 0}, { 2,   17, 13,  0}, { 1,	15, 15,	 1}, }},
+	{0.60, {{ 7,	18,  8, -1}, { 6,   17, 10, -1}, { 4,	17, 11,	 0}, { 2,   17, 13,  0}, { 1,	15, 15,	 1}, }},
+	{0.61, {{ 7,	18,  8, -1}, { 6,   17, 10, -1}, { 4,	17, 11,	 0}, { 2,   17, 13,  0}, { 1,	15, 15,	 1}, }},
+	{0.62, {{ 7,	18,  8, -1}, { 6,   17, 10, -1}, { 4,	17, 11,	 0}, { 2,   17, 13,  0}, { 1,	15, 15,	 1}, }},
+	{0.63, {{ 7,	18,  8, -1}, { 6,   17, 10, -1}, { 4,	17, 11,	 0}, { 2,   17, 13,  0}, { 1,	15, 15,	 1}, }},
+	{0.64, {{ 7,	18,  8, -1}, { 6,   17, 10, -1}, { 4,	17, 12, -1}, { 2,   17, 13,  0}, { 1,	15, 15,	 1}, }},
+	{0.65, {{ 7,	18,  8, -1}, { 6,   17, 10, -1}, { 4,	17, 12, -1}, { 2,   17, 13,  0}, { 0,	16, 16,	 0}, }},
+	{0.66, {{ 7,	18,  8, -1}, { 6,   18, 10, -2}, { 4,	17, 12, -1}, { 2,   17, 13,  0}, { 0,	16, 16,	 0}, }},
+	{0.67, {{ 7,	20,  7, -2}, { 5,   19, 10, -2}, { 3,	18, 12, -1}, { 2,   17, 13,  0}, { 0,	16, 16,	 0}, }},
+	{0.68, {{ 7,	20,  7, -2}, { 5,   19, 10, -2}, { 3,	19, 12, -2}, { 1,   18, 14, -1}, { 0,	16, 16,	 0}, }},
+	{0.69, {{ 7,	20,  7, -2}, { 5,   19, 10, -2}, { 3,	19, 12, -2}, { 1,   18, 14, -1}, { 0,	16, 16,	 0}, }},
+	{0.70, {{ 7,	20,  7, -2}, { 5,   20,	 9, -2}, { 3,	19, 12, -2}, { 1,   18, 14, -1}, { 0,	16, 16,	 0}, }},
+	{0.71, {{ 7,	20,  7, -2}, { 5,   20,	 9, -2}, { 3,	19, 12, -2}, { 1,   18, 14, -1}, { 0,	16, 16,	 0}, }},
+	{0.72, {{ 7,	20,  7, -2}, { 5,   20,	 9, -2}, { 2,	20, 12, -2}, { 0,   19, 15, -2}, {-1,	17, 17, -1}, }},
+	{0.73, {{ 7,	20,  7, -2}, { 4,   21,	 9, -2}, { 2,	20, 12, -2}, { 0,   19, 15, -2}, {-1,	17, 17, -1}, }},
+	{0.74, {{ 6,	22,  6, -2}, { 4,   21,	 9, -2}, { 2,	20, 12, -2}, { 0,   19, 15, -2}, {-1,	17, 17, -1}, }},
+	{0.75, {{ 6,	22,  6, -2}, { 4,   21,	 9, -2}, { 1,	21, 12, -2}, { 0,   19, 15, -2}, {-1,	17, 17, -1}, }},
+	{0.76, {{ 6,	22,  6, -2}, { 4,   21,	 9, -2}, { 1,	21, 12, -2}, { 0,   19, 15, -2}, {-1,	17, 17, -1}, }},
+	{0.77, {{ 6,	22,  6, -2}, { 3,   22,	 9, -2}, { 1,	22, 12, -3}, { 0,   19, 15, -2}, {-2,	18, 18, -2}, }},
+	{0.78, {{ 6,	21,  6, -1}, { 3,   22,	 9, -2}, { 1,	22, 12, -3}, { 0,   19, 15, -2}, {-2,	18, 18, -2}, }},
+	{0.79, {{ 5,	23,  5, -1}, { 3,   22,	 9, -2}, { 0,	23, 12, -3}, {-1,   21, 15, -3}, {-2,	18, 18, -2}, }},
+	{0.80, {{ 5,	23,  5, -1}, { 3,   23,	 8, -2}, { 0,	23, 12, -3}, {-1,   21, 15, -3}, {-2,	18, 18, -2}, }},
+	{0.81, {{ 5,	23,  5, -1}, { 2,   24,	 8, -2}, { 0,	23, 12, -3}, {-1,   21, 15, -3}, {-2,	18, 18, -2}, }},
+	{0.82, {{ 5,	23,  5, -1}, { 2,   24,	 8, -2}, { 0,	23, 12, -3}, {-1,   21, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.83, {{ 5,	23,  5, -1}, { 2,   24,	 8, -2}, { 0,	23, 11, -2}, {-2,   22, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.84, {{ 4,	25,  4, -1}, { 1,   25,	 8, -2}, { 0,	23, 11, -2}, {-2,   22, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.85, {{ 4,	25,  4, -1}, { 1,   25,	 8, -2}, { 0,	23, 11, -2}, {-2,   22, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.86, {{ 4,	24,  4,	 0}, { 1,   25,	 7, -1}, {-1,	24, 11, -2}, {-2,   22, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.87, {{ 4,	24,  4,	 0}, { 1,   25,	 7, -1}, {-1,	24, 11, -2}, {-2,   22, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.88, {{ 3,	26,  3,	 0}, { 0,   26,	 7, -1}, {-1,	24, 11, -2}, {-3,   23, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.89, {{ 3,	26,  3,	 0}, { 0,   26,	 7, -1}, {-1,	24, 11, -2}, {-3,   23, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.90, {{ 3,	26,  3,	 0}, { 0,   26,	 7, -1}, {-2,	25, 11, -2}, {-3,   23, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.91, {{ 3,	26,  3,	 0}, { 0,   27,	 6, -1}, {-2,	25, 11, -2}, {-3,   23, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.92, {{ 2,	28,  2,	 0}, { 0,   27,	 6, -1}, {-2,	25, 11, -2}, {-3,   23, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.93, {{ 2,	28,  2,	 0}, { 0,   26,	 6,  0}, {-2,	25, 10, -1}, {-3,   23, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.94, {{ 2,	28,  2,	 0}, { 0,   26,	 6,  0}, {-2,	25, 10, -1}, {-3,   23, 15, -3}, {-3,	19, 19, -3}, }},
+	{0.95, {{ 1,	30,  1,	 0}, {-1,   28,	 5,  0}, {-3,	26, 10, -1}, {-3,   23, 14, -2}, {-3,	19, 19, -3}, }},
+	{0.96, {{ 1,	30,  1,	 0}, {-1,   28,	 5,  0}, {-3,	26, 10, -1}, {-3,   23, 14, -2}, {-3,	19, 19, -3}, }},
+	{0.97, {{ 1,	30,  1,	 0}, {-1,   28,	 5,  0}, {-3,	26, 10, -1}, {-3,   23, 14, -2}, {-3,	19, 19, -3}, }},
+	{0.98, {{ 1,	30,  1,	 0}, {-2,   29,	 5,  0}, {-3,	27,  9, -1}, {-3,   23, 14, -2}, {-3,	19, 19, -3}, }},
+	{0.99, {{ 0,	32,  0,	 0}, {-2,   29,	 5,  0}, {-3,	27,  9, -1}, {-4,   24, 14, -2}, {-3,	19, 19, -3}, }},
+	{1.00, {{ 0,	32,  0,	 0}, {-2,   29,	 5,  0}, {-3,	27,  9, -1}, {-4,   24, 14, -2}, {-3,	19, 19, -3}, }}
+    };
+
+    double DSR;
+
+    unsigned ArrayElement;
+
+    DSR = (double)(1<<0xc)/val_OV0_P1_H_INC;
+    if (DSR<.25) DSR=.25;
+    if (DSR>1) DSR=1;
+
+    ArrayElement = (int)((DSR-0.25) * 100);
+    besr.four_tap_coeff[0] =	 (ArrayOfSets[ArrayElement].CoefSet[0][0] & 0xf) |
+				((ArrayOfSets[ArrayElement].CoefSet[0][1] & 0x7f)<<8) |
+				((ArrayOfSets[ArrayElement].CoefSet[0][2] & 0x7f)<<16) |
+				((ArrayOfSets[ArrayElement].CoefSet[0][3] & 0xf)<<24);
+    besr.four_tap_coeff[1] =	 (ArrayOfSets[ArrayElement].CoefSet[1][0] & 0xf) |
+				((ArrayOfSets[ArrayElement].CoefSet[1][1] & 0x7f)<<8) |
+				((ArrayOfSets[ArrayElement].CoefSet[1][2] & 0x7f)<<16) |
+				((ArrayOfSets[ArrayElement].CoefSet[1][3] & 0xf)<<24);
+    besr.four_tap_coeff[2] =	 (ArrayOfSets[ArrayElement].CoefSet[2][0] & 0xf) |
+				((ArrayOfSets[ArrayElement].CoefSet[2][1] & 0x7f)<<8) |
+				((ArrayOfSets[ArrayElement].CoefSet[2][2] & 0x7f)<<16) |
+				((ArrayOfSets[ArrayElement].CoefSet[2][3] & 0xf)<<24);
+    besr.four_tap_coeff[3] =	 (ArrayOfSets[ArrayElement].CoefSet[3][0] & 0xf) |
+				((ArrayOfSets[ArrayElement].CoefSet[3][1] & 0x7f)<<8) |
+				((ArrayOfSets[ArrayElement].CoefSet[3][2] & 0x7f)<<16) |
+				((ArrayOfSets[ArrayElement].CoefSet[3][3] & 0xf)<<24);
+    besr.four_tap_coeff[4] =	 (ArrayOfSets[ArrayElement].CoefSet[4][0] & 0xf) |
+				((ArrayOfSets[ArrayElement].CoefSet[4][1] & 0x7f)<<8) |
+				((ArrayOfSets[ArrayElement].CoefSet[4][2] & 0x7f)<<16) |
+				((ArrayOfSets[ArrayElement].CoefSet[4][3] & 0xf)<<24);
+/*
+    For more details, refer to Microsoft's draft of PC99.
+*/
+}
+
+/* The minimal value of horizontal scale ratio when hard coded coefficients
+   are suitable for the best quality. */
+/* FIXME: Should it be 0.9 for Rage128 ??? */
+static const double MinHScaleHard=0.75;
+
 static int radeon_vid_init_video( vidix_playback_t *config )
 {
-    uint32_t i,tmp,src_w,src_h,dest_w,dest_h,pitch,h_inc,step_by,left,leftUV,top;
-    int is_400,is_410,is_420,is_rgb32,is_rgb,best_pitch,mpitch;
+    double V_scale_ratio;
+    uint32_t i,src_w,src_h,dest_w,dest_h,pitch,left,leftUV,top,h_inc;
+    uint32_t val_OV0_P1_H_INC=0,val_OV0_P1_H_STEP_BY=0,val_OV0_P23_H_INC=0,val_OV0_P23_H_STEP_BY=0;
+    uint32_t val_OV0_P1_X_START,val_OV0_P2_X_START;
+    uint32_t val_OV0_P1_MAX_LN_IN_PER_LN_OUT,val_OV0_P23_MAX_LN_IN_PER_LN_OUT;
+    uint32_t CRT_V_INC;
+    uint32_t BytesPerOctWord,LogMemWordsInBytes,MemWordsInBytes,LogTileWidthInMemWords;
+    uint32_t TileWidthInMemWords,TileWidthInBytes,LogTileHeight,TileHeight;
+    uint32_t PageSizeInBytes,OV0LB_Rows;
+    uint32_t SourceWidthInMemWords,SourceUVWidthInMemWords;
+    uint32_t SourceWidthInPixels,SourceUVWidthInPixels;
+    uint32_t RightPixel,RightUVPixel,LeftPixel,LeftUVPixel;
+    int is_400,is_410,is_420,best_pitch,mpitch;
+    int horz_repl_factor,interlace_factor;
+    int BytesPerPixel,BytesPerUVPixel,HorzUVSubSample,VertUVSubSample;
+    int DisallowFourTapVertFiltering,DisallowFourTapUVVertFiltering;
+
     radeon_vid_stop_video();
     left = config->src.x << 16;
     top =  config->src.y << 16;
     src_h = config->src.h;
     src_w = config->src.w;
-    is_400 = is_410 = is_420 = is_rgb32 = is_rgb = 0;
+    is_400 = is_410 = is_420 = 0;
     if(config->fourcc == IMGFMT_YV12 ||
        config->fourcc == IMGFMT_I420 ||
        config->fourcc == IMGFMT_IYUV) is_420 = 1;
     if(config->fourcc == IMGFMT_YVU9 ||
        config->fourcc == IMGFMT_IF09) is_410 = 1;
-    if(config->fourcc == IMGFMT_Y800 ||
-       config->fourcc == IMGFMT_Y8) is_400 = 1;
-    if(config->fourcc == IMGFMT_RGB32 ||
-       config->fourcc == IMGFMT_BGR32) is_rgb32 = 1;
-    if(config->fourcc == IMGFMT_RGB32 ||
-       config->fourcc == IMGFMT_BGR32 ||
-       config->fourcc == IMGFMT_RGB24 ||
-       config->fourcc == IMGFMT_BGR24 ||
-       config->fourcc == IMGFMT_RGB16 ||
-       config->fourcc == IMGFMT_BGR16 ||
-       config->fourcc == IMGFMT_RGB15 ||
-       config->fourcc == IMGFMT_BGR15) is_rgb = 1;
+    if(config->fourcc == IMGFMT_Y800) is_400 = 1;
     best_pitch = radeon_query_pitch(config->fourcc,&config->src.pitch);
     mpitch = best_pitch-1;
+    BytesPerOctWord = 16;
+    LogMemWordsInBytes = 4;
+    MemWordsInBytes = 1<<LogMemWordsInBytes;
+    LogTileWidthInMemWords = 2;
+    TileWidthInMemWords = 1<<LogTileWidthInMemWords;
+    TileWidthInBytes = 1<<(LogTileWidthInMemWords+LogMemWordsInBytes);
+    LogTileHeight = 4;
+    TileHeight = 1<<LogTileHeight;
+    PageSizeInBytes = 64*MemWordsInBytes;
+    OV0LB_Rows = 96;
+    h_inc = 1;
     switch(config->fourcc)
     {
-	/* 4:0:0 */
+	/* 4:0:0*/
 	case IMGFMT_Y800:
-	case IMGFMT_Y8:
-	/* 4:1:0 */
+	/* 4:1:0*/
 	case IMGFMT_YVU9:
 	case IMGFMT_IF09:
 	/* 4:2:0 */
@@ -1586,38 +2690,325 @@
 			  config->dest.pitch.v = best_pitch;
 			  break;
 	/* 4:2:2 */
-        default: /* RGB15, RGB16, YVYU, UYVY, YUY2 */
+	
+	default: /* RGB15, RGB16, YVYU, UYVY, YUY2 */
 			  pitch = ((src_w*2) + mpitch) & ~mpitch;
 			  config->dest.pitch.y =
 			  config->dest.pitch.u =
 			  config->dest.pitch.v = best_pitch;
 			  break;
     }
+    besr.load_prg_start=0;
+    besr.swap_uv=0;
+    switch(config->fourcc)
+    {
+	case IMGFMT_RGB15:
+			   besr.swap_uv=1;
+	case IMGFMT_BGR15: besr.surf_id = SCALER_SOURCE_15BPP>>8;
+			   besr.load_prg_start = 1;
+			   break;
+	case IMGFMT_RGB16:
+			   besr.swap_uv=1;
+	case IMGFMT_BGR16: besr.surf_id = SCALER_SOURCE_16BPP>>8;
+			   besr.load_prg_start = 1;
+			   break;
+	case IMGFMT_RGB32:
+			   besr.swap_uv=1;
+	case IMGFMT_BGR32: besr.surf_id = SCALER_SOURCE_32BPP>>8;
+			   besr.load_prg_start = 1;
+			   break;
+	/* 4:1:0*/
+	case IMGFMT_IF09:
+	case IMGFMT_YVU9:  besr.surf_id = SCALER_SOURCE_YUV9>>8;
+			   break;
+	/* 4:0:0*/
+	case IMGFMT_Y800:
+	/* 4:2:0 */
+	case IMGFMT_IYUV:
+	case IMGFMT_I420:
+	case IMGFMT_YV12:  besr.surf_id = SCALER_SOURCE_YUV12>>8;
+			   break;
+	/* 4:2:2 */
+	case IMGFMT_YVYU:
+	case IMGFMT_UYVY:  besr.surf_id = SCALER_SOURCE_YVYU422>>8;
+			   break;
+	case IMGFMT_YUY2:
+	default:	   besr.surf_id = SCALER_SOURCE_VYUY422>>8;
+			   break;
+    }
+    switch (besr.surf_id)
+    {
+	case 3:
+	case 4:
+	case 11:
+	case 12:    BytesPerPixel = 2;
+		    break;
+	case 6:	    BytesPerPixel = 4;
+		    break;
+	case 9:
+	case 10:
+	case 13:
+	case 14:    BytesPerPixel = 1;
+		    break;
+	default:    BytesPerPixel = 0;/*insert a debug statement here. */
+		    break;
+    }
+    switch (besr.surf_id)
+    {
+	case 3:
+	case 4:	    BytesPerUVPixel = 0;
+		    break;/* In RGB modes, the BytesPerUVPixel is don't care */
+	case 11:
+	case 12:    BytesPerUVPixel = 2;
+		    break;
+	case 6:	    BytesPerUVPixel = 0;
+		    break;	/* In RGB modes, the BytesPerUVPixel is don't care */
+	case 9:
+	case 10:    BytesPerUVPixel = 1;
+		    break;
+	case 13:
+	case 14:    BytesPerUVPixel = 2;
+		    break;
+	default:    BytesPerUVPixel = 0;/* insert a debug statement here. */
+		    break;
+
+    }
+    switch (besr.surf_id)
+    {
+	case 3:
+	case 4:
+	case 6:	    HorzUVSubSample = 1;
+		    break;
+	case 9:	    HorzUVSubSample = 4;
+		    break;
+	case 10:
+	case 11:
+	case 12:
+	case 13:
+	case 14:    HorzUVSubSample = 2;
+		    break;
+	default:    HorzUVSubSample = 0;/* insert debug statement here. */
+		    break;
+    }
+    switch (besr.surf_id)
+    {
+	case 3:
+	case 4:
+	case 6:
+	case 11:
+	case 12:    VertUVSubSample = 1;
+		    break;
+	case 9:	    VertUVSubSample = 4;
+		    break;
+	case 10:
+	case 13:
+	case 14:    VertUVSubSample = 2;
+		    break;
+	default:    VertUVSubSample = 0;/* insert debug statment here. */
+		    break;
+    }
+    DisallowFourTapVertFiltering = 0;	    /* Allow it by default */
+    DisallowFourTapUVVertFiltering = 0;	    /* Allow it by default */
+    LeftPixel = config->src.x;
+    RightPixel = config->src.w-1;
+    if(floor(config->src.x/HorzUVSubSample)<0)	LeftUVPixel = 0;
+    else						LeftUVPixel = (int)floor(config->src.x/HorzUVSubSample);
+    if(ceil((config->src.x+config->src.w)/HorzUVSubSample) > config->src.w/HorzUVSubSample)
+		RightUVPixel = config->src.w/HorzUVSubSample - 1;
+    else	RightUVPixel = (int)ceil((config->src.x+config->src.w)/HorzUVSubSample) - 1;
+    /* Top, Bottom and Right Crops can be out of range. The driver will program the hardware
+    // to create a black border at the top and bottom. This is useful for DVD letterboxing. */
+    SourceWidthInPixels = (int)(config->src.w + 1);
+    SourceUVWidthInPixels = (int)(RightUVPixel - LeftUVPixel + 1);
+
+    SourceWidthInMemWords = (int)(ceil(RightPixel*BytesPerPixel / MemWordsInBytes) -
+			    floor(LeftPixel*BytesPerPixel / MemWordsInBytes) + 1);
+    /* SourceUVWidthInMemWords means Source_U_or_V_or_UV_WidthInMemWords depending on whether the UV is packed together of not. */
+    SourceUVWidthInMemWords = (int)(ceil(RightUVPixel*BytesPerUVPixel /
+			      MemWordsInBytes) - floor(LeftUVPixel*BytesPerUVPixel /
+			      MemWordsInBytes) + 1);
+
+    switch (besr.surf_id)
+    {
+	case 9:
+	case 10:    if ((ceil(SourceWidthInMemWords/2)-1) * 2 > OV0LB_Rows-1)
+		    {
+			RADEON_ASSERT("ceil(SourceWidthInMemWords/2)-1) * 2 > OV0LB_Rows-1\n");
+		    }
+		    else if ((SourceWidthInMemWords-1) * 2 > OV0LB_Rows-1)
+		    {
+			DisallowFourTapVertFiltering = 1;
+		    }
+
+		    if ((ceil(SourceUVWidthInMemWords/2)-1) * 4 + 1 > OV0LB_Rows-1)
+		    {
+			/*CYCACC_ASSERT(0, "Image U plane width spans more octwords than supported by hardware.") */
+		    }
+		    else if ((SourceUVWidthInMemWords-1) * 4 + 1 > OV0LB_Rows-1)
+		    {
+			DisallowFourTapUVVertFiltering = 1;
+		    }
+
+		    if ((ceil(SourceUVWidthInMemWords/2)-1) * 4 + 3 > OV0LB_Rows-1)
+		    {
+			/*CYCACC_ASSERT(0, "Image V plane width spans more octwords than supported by hardware.") */
+		    }
+		    else if ((SourceUVWidthInMemWords-1) * 4 + 3 > OV0LB_Rows-1)
+		    {
+			DisallowFourTapUVVertFiltering = 1;
+		    }
+		    break;
+	case 13:
+	case 14:    if ((ceil(SourceWidthInMemWords/2)-1) * 2 > OV0LB_Rows-1)
+		    {
+			RADEON_ASSERT("ceil(SourceWidthInMemWords/2)-1) * 2 > OV0LB_Rows-1\n");
+		    }
+		    else if ((SourceWidthInMemWords-1) * 2 > OV0LB_Rows-1)
+		    {
+			DisallowFourTapVertFiltering = 1;
+		    }
+
+		    if ((ceil(SourceUVWidthInMemWords/2)-1) * 2 + 1 > OV0LB_Rows-1)
+		    {
+			/*CYCACC_ASSERT(0, "Image UV plane width spans more octwords than supported by hardware.") */
+		    }
+		    else if ((SourceUVWidthInMemWords-1) * 2 + 1 > OV0LB_Rows-1)
+		    {
+			DisallowFourTapUVVertFiltering = 1;
+		    }
+		    break;
+	case 3:
+	case 4:
+	case 6:
+	case 11:
+	case 12:    if ((ceil(SourceWidthInMemWords/2)-1) > OV0LB_Rows-1)
+		    {
+			RADEON_ASSERT("(ceil(SourceWidthInMemWords/2)-1) > OV0LB_Rows-1\n")
+		    }
+		    else if ((SourceWidthInMemWords-1) > OV0LB_Rows-1)
+		    {
+			DisallowFourTapVertFiltering = 1;
+		    }
+		    break;
+	default:    /* insert debug statement here. */
+		    break;
+    }
     dest_w = config->dest.w;
     dest_h = config->dest.h;
     if(radeon_is_dbl_scan()) dest_h *= 2;
     besr.dest_bpp = radeon_vid_get_dbpp();
     besr.fourcc = config->fourcc;
-
-    /* flat panel */
-    if(INREG(FP_VERT_STRETCH)&VERT_STRETCH_ENABLE){
-      besr.v_inc = (src_h * radeon_get_yres() / radeon_get_fp_yres() << 20) / dest_h;
+    if(radeon_is_interlace())	interlace_factor = 2;
+    else			interlace_factor = 1;
+    /* TODO: must be checked in doublescan mode!!! */
+    if((besr.chip_flags&R_INTEGRATED)==R_INTEGRATED)
+    {
+	/* Force the overlay clock on for integrated chips */
+        OUTPLL(VCLK_ECP_CNTL, (INPLL(VCLK_ECP_CNTL) | (1<<18)));
+    }
+    horz_repl_factor = 1 << (uint32_t)((INPLL(VCLK_ECP_CNTL) & 0x300) >> 8);
+    H_scale_ratio = (double)ceil(((double)dest_w+1)/horz_repl_factor)/src_w;
+    V_scale_ratio = (double)(dest_h+1)/src_h;
+    if(H_scale_ratio < 0.5 && V_scale_ratio < 0.5)
+    {
+	val_OV0_P1_MAX_LN_IN_PER_LN_OUT = 3;
+	val_OV0_P23_MAX_LN_IN_PER_LN_OUT = 2;
     }
-    else besr.v_inc = (src_h << 20) / dest_h;
-    if(radeon_is_interlace()) besr.v_inc *= 2;
-    h_inc = (src_w << 12) / dest_w;
-
+    else
+    if(H_scale_ratio < 1 && V_scale_ratio < 1)
+    {
+	val_OV0_P1_MAX_LN_IN_PER_LN_OUT = 2;
+	val_OV0_P23_MAX_LN_IN_PER_LN_OUT = 1;
+    }
+    else
+    {
+	val_OV0_P1_MAX_LN_IN_PER_LN_OUT = 1;
+	val_OV0_P23_MAX_LN_IN_PER_LN_OUT = 1;
+    }
+    /* N.B.: Indeed it has 6.12 format but shifted on 8 to the left!!! */
+    besr.v_inc = (uint16_t)((1./V_scale_ratio)*(1<<12)*interlace_factor+0.5);
+    CRT_V_INC = besr.v_inc/interlace_factor;
+    besr.v_inc <<= 8;
     {
-        unsigned int ecp_div;
-        ecp_div = (INPLL(VCLK_ECP_CNTL) >> 8) & 3;
-        h_inc <<= ecp_div;
-    }
+	int ThereIsTwoTapVerticalFiltering,DoNotUseMostRecentlyFetchedLine;
+	int P1GroupSize = 0;
+	int P23GroupSize;
+	int P1StepSize = 0;
+	int P23StepSize = 0;
+
+	Calc_H_INC_STEP_BY(
+	    besr.surf_id,
+	    H_scale_ratio,
+	    DisallowFourTapVertFiltering,
+	    DisallowFourTapUVVertFiltering,
+	    &val_OV0_P1_H_INC,
+	    &val_OV0_P1_H_STEP_BY,
+	    &val_OV0_P23_H_INC,
+	    &val_OV0_P23_H_STEP_BY,
+	    &P1GroupSize,
+	    &P1StepSize,
+	    &P23StepSize);
+
+	if(H_scale_ratio > MinHScaleHard)
+	{
+	    h_inc = (src_w << 12) / dest_w;
+	    besr.step_by = 0x0101;
+	    switch (besr.surf_id)
+	    {
+		case 3:
+		case 4:
+		case 6:
+			besr.h_inc = (h_inc)|(h_inc<<16);
+			break;
+		case 9:
+			besr.h_inc = h_inc | ((h_inc >> 2) << 16);
+			break;
+		default:
+			besr.h_inc = h_inc | ((h_inc >> 1) << 16);
+			break;
+	    }
+	}
 
+	P23GroupSize = 2;	/* Current vaue for all modes */
 
-    step_by = 1;
-    while(h_inc >= (2 << 12)) {
-	step_by++;
-	h_inc >>= 1;
+	besr.horz_pick_nearest=0;
+	DoNotUseMostRecentlyFetchedLine=0;
+	ThereIsTwoTapVerticalFiltering = (val_OV0_P1_H_STEP_BY!=0) || (val_OV0_P23_H_STEP_BY!=0);
+	if (ThereIsTwoTapVerticalFiltering && DoNotUseMostRecentlyFetchedLine)
+				besr.vert_pick_nearest = 1;
+	else
+				besr.vert_pick_nearest = 0;
+
+	ComputeXStartEnd(is_400,LeftPixel,LeftUVPixel,MemWordsInBytes,BytesPerPixel,
+		     SourceWidthInPixels,P1StepSize,BytesPerUVPixel,
+		     SourceUVWidthInPixels,P23StepSize,&val_OV0_P1_X_START,&val_OV0_P2_X_START);
+
+	if(H_scale_ratio > MinHScaleHard)
+	{
+	    unsigned tmp;
+	    tmp = (left & 0x0003ffff) + 0x00028000 + (h_inc << 3);
+	    besr.p1_h_accum_init = ((tmp <<  4) & 0x000f8000) |
+				    ((tmp << 12) & 0xf0000000);
+
+	    tmp = (top & 0x0000ffff) + 0x00018000;
+	    besr.p1_v_accum_init = ((tmp << 4) & OV0_P1_V_ACCUM_INIT_MASK)
+				    |(OV0_P1_MAX_LN_IN_PER_LN_OUT & 1);
+	    tmp = ((left >> 1) & 0x0001ffff) + 0x00028000 + (h_inc << 2);
+	    besr.p23_h_accum_init = ((tmp << 4) & 0x000f8000) |
+				    ((tmp << 12) & 0x70000000);
+
+	    tmp = ((top >> 1) & 0x0000ffff) + 0x00018000;
+	    besr.p23_v_accum_init = (is_420||is_410) ? 
+				    ((tmp << 4) & OV0_P23_V_ACCUM_INIT_MASK)
+				    |(OV0_P23_MAX_LN_IN_PER_LN_OUT & 1) : 0;
+	}
+	else
+	    ComputeAccumInit(	val_OV0_P1_X_START,val_OV0_P2_X_START,
+				val_OV0_P1_H_INC,val_OV0_P23_H_INC,
+				val_OV0_P1_H_STEP_BY,val_OV0_P23_H_STEP_BY,
+				CRT_V_INC,P1GroupSize,P23GroupSize,
+				val_OV0_P1_MAX_LN_IN_PER_LN_OUT,
+				val_OV0_P23_MAX_LN_IN_PER_LN_OUT);
     }
 
     /* keep everything in 16.16 */
@@ -1627,7 +3018,7 @@
 	    config->offsets[i] = config->offsets[i-1]+config->frame_size;
     if(is_420 || is_410 || is_400)
     {
-        uint32_t d1line,d2line,d3line;
+	uint32_t d1line,d2line,d3line;
 	d1line = top*pitch;
 	if(is_420)
 	{
@@ -1651,8 +3042,7 @@
 	    d2line += (left >> 17) & ~15;
 	    d3line += (left >> 17) & ~15;
 	}
-	else
-	if(is_410)
+	else /* is_410 */
 	{
 	    d2line += (left >> 18) & ~15;
 	    d3line += (left >> 18) & ~15;
@@ -1678,16 +3068,8 @@
 	    }
 	    else
 	    {
-		if (besr.fourcc == IMGFMT_I420 || besr.fourcc == IMGFMT_IYUV)
-		{
-		    besr.vid_buf_base_adrs_u[i]=((radeon_overlay_off+config->offsets[i]+config->offset.v)&VIF_BUF1_BASE_ADRS_MASK)|VIF_BUF1_PITCH_SEL;
-		    besr.vid_buf_base_adrs_v[i]=((radeon_overlay_off+config->offsets[i]+config->offset.u)&VIF_BUF2_BASE_ADRS_MASK)|VIF_BUF2_PITCH_SEL;
-		}
-		else
-		{
-		    besr.vid_buf_base_adrs_v[i]=((radeon_overlay_off+config->offsets[i]+config->offset.v)&VIF_BUF1_BASE_ADRS_MASK)|VIF_BUF1_PITCH_SEL;
-		    besr.vid_buf_base_adrs_u[i]=((radeon_overlay_off+config->offsets[i]+config->offset.u)&VIF_BUF2_BASE_ADRS_MASK)|VIF_BUF2_PITCH_SEL;
-		}
+		besr.vid_buf_base_adrs_v[i]=((radeon_overlay_off+config->offsets[i]+config->offset.v)&VIF_BUF1_BASE_ADRS_MASK)|VIF_BUF1_PITCH_SEL;
+		besr.vid_buf_base_adrs_u[i]=((radeon_overlay_off+config->offsets[i]+config->offset.u)&VIF_BUF2_BASE_ADRS_MASK)|VIF_BUF2_PITCH_SEL;
 	    }
 	}
 	config->offset.y = ((besr.vid_buf_base_adrs_y[0])&VIF_BUF0_BASE_ADRS_MASK) - radeon_overlay_off;
@@ -1701,6 +3083,13 @@
 	    config->offset.v = ((besr.vid_buf_base_adrs_v[0])&VIF_BUF1_BASE_ADRS_MASK) - radeon_overlay_off;
 	    config->offset.u = ((besr.vid_buf_base_adrs_u[0])&VIF_BUF2_BASE_ADRS_MASK) - radeon_overlay_off;
 	}
+	if(besr.fourcc == IMGFMT_I420 || besr.fourcc == IMGFMT_IYUV)
+	{
+	  uint32_t tmp;
+	  tmp = config->offset.u;
+	  config->offset.u = config->offset.v;
+	  config->offset.v = tmp;
+	}
     }
     else
     {
@@ -1712,58 +3101,31 @@
 	besr.vid_buf_base_adrs_v[i] = radeon_overlay_off + config->offsets[i] + config->offset.y;
       }
     }
-
-    tmp = (left & 0x0003ffff) + 0x00028000 + (h_inc << 3);
-    besr.p1_h_accum_init = ((tmp <<  4) & 0x000f8000) |
-			   ((tmp << 12) & 0xf0000000);
-
-    tmp = ((left >> 1) & 0x0001ffff) + 0x00028000 + (h_inc << 2);
-    besr.p23_h_accum_init = ((tmp <<  4) & 0x000f8000) |
-			    ((tmp << 12) & 0x70000000);
-    tmp = (top & 0x0000ffff) + 0x00018000;
-    besr.p1_v_accum_init = ((tmp << 4) & OV0_P1_V_ACCUM_INIT_MASK)
-			    |(OV0_P1_MAX_LN_IN_PER_LN_OUT & 1);
-
-    tmp = ((top >> 1) & 0x0000ffff) + 0x00018000;
-    besr.p23_v_accum_init = (is_420||is_410) ?
-			    ((tmp << 4) & OV0_P23_V_ACCUM_INIT_MASK)
-			    |(OV0_P23_MAX_LN_IN_PER_LN_OUT & 1) : 0;
-
     leftUV = (left >> (is_410?18:17)) & 15;
     left = (left >> 16) & 15;
-    if(is_rgb && !is_rgb32) h_inc<<=1;
-    if(is_rgb32)
-	besr.h_inc = (h_inc >> 1) | ((h_inc >> 1) << 16);
-    else
-    if(is_410)
-	besr.h_inc = h_inc | ((h_inc >> 2) << 16);
-    else
-	besr.h_inc = h_inc | ((h_inc >> 1) << 16);
-    besr.step_by = step_by | (step_by << 8);
     besr.y_x_start = (config->dest.x+X_ADJUST) | (config->dest.y << 16);
     besr.y_x_end = (config->dest.x + dest_w+X_ADJUST) | ((config->dest.y + dest_h) << 16);
-    besr.p1_blank_lines_at_top = P1_BLNK_LN_AT_TOP_M1_MASK|((src_h-1)<<16);
-    if(is_420 || is_410)
-    {
-	src_h = (src_h + 1) >> (is_410?2:1);
-	besr.p23_blank_lines_at_top = P23_BLNK_LN_AT_TOP_M1_MASK|((src_h-1)<<16);
-    }
-    else besr.p23_blank_lines_at_top = 0;
+    ComputeBorders(config,VertUVSubSample);
     besr.vid_buf_pitch0_value = pitch;
     besr.vid_buf_pitch1_value = is_410 ? pitch>>2 : is_420 ? pitch>>1 : pitch;
-    besr.p1_x_start_end = (src_w+left-1)|(left<<16);
-    if (is_410||is_420) src_w>>=is_410?2:1;
-    if(is_400)
-    {
-	besr.p2_x_start_end = 0;
-	besr.p3_x_start_end = 0;
-    }
+    /* ********************************************************* */
+    /* ** Calculate programmable coefficients as needed		 */
+    /* ********************************************************* */
+
+    /* ToDo_Active: When in pick nearest mode, we need to program the filter tap zero */
+    /* coefficients to 0, 32, 0, 0. Or use hard coded coefficients. */
+    if(H_scale_ratio > MinHScaleHard) besr.filter_cntl |= FILTER_HARDCODED_COEF;
     else
     {
-	besr.p2_x_start_end = (src_w+left-1)|(leftUV<<16);
-	besr.p3_x_start_end = besr.p2_x_start_end;
+	FilterSetup (val_OV0_P1_H_INC);
+	/* ToDo_Active: Must add the smarts into the driver to decide what type of filtering it */
+	/* would like to do. For now, we let the test application decide. */
+	besr.filter_cntl = FILTER_PROGRAMMABLE_COEF;
+	if(DisallowFourTapVertFiltering)
+	    besr.filter_cntl |= FILTER_HARD_SCALE_VERT_Y;
+	if(DisallowFourTapUVVertFiltering)
+	    besr.filter_cntl |= FILTER_HARD_SCALE_VERT_UV;
     }
-
     return 0;
 }
 
@@ -1774,22 +3136,21 @@
   dbpp = radeon_vid_get_dbpp();
   switch(info->fourcc)
   {
+    case IMGFMT_Y800:
+		awidth = (info->src.w + (pitch-1)) & ~(pitch-1);
+		info->frame_size = awidth*info->src.h;
+		break;
+    case IMGFMT_YVU9:
+    case IMGFMT_IF09:
+		awidth = (info->src.w + (pitch-1)) & ~(pitch-1);
+		info->frame_size = awidth*(info->src.h+info->src.h/8);
+		break;
     case IMGFMT_I420:
     case IMGFMT_YV12:
     case IMGFMT_IYUV:
 		awidth = (info->src.w + (pitch-1)) & ~(pitch-1);
 		info->frame_size = awidth*(info->src.h+info->src.h/2);
 		break;
-    case IMGFMT_Y800:
-    case IMGFMT_Y8:
-		awidth = (info->src.w + (pitch-1)) & ~(pitch-1);
-		info->frame_size = awidth*info->src.h;
-		break;
-    case IMGFMT_IF09:
-    case IMGFMT_YVU9:
-		awidth = (info->src.w + (pitch-1)) & ~(pitch-1);
-		info->frame_size = awidth*(info->src.h+info->src.h/8);
-		break;
     case IMGFMT_RGB32:
     case IMGFMT_BGR32:
 		awidth = (info->src.w*4 + (pitch-1)) & ~(pitch-1);
@@ -1801,23 +3162,26 @@
 		info->frame_size = awidth*info->src.h;
 		break;
   }
+  info->frame_size = (info->frame_size+4095)&~4095;
 }
 
 static int radeon_config_playback(vidix_playback_t *info)
 {
   unsigned rgb_size,nfr;
+  uint32_t radeon_video_size;
   if(!is_supported_fourcc(info->fourcc)) return ENOSYS;
   if(info->num_frames>VID_PLAY_MAXFRAMES) info->num_frames=VID_PLAY_MAXFRAMES;
   if(info->num_frames==1) besr.double_buff=0;
-  else                    besr.double_buff=1;
+  else			  besr.double_buff=1;
   radeon_compute_framesize(info);
     
   rgb_size = radeon_get_xres()*radeon_get_yres()*((radeon_vid_get_dbpp()+7)/8);
   nfr = info->num_frames;
+  radeon_video_size = radeon_ram_size;
   for(;nfr>0; nfr--)
   {
-      radeon_overlay_off = radeon_ram_size - info->frame_size*nfr;
-#ifdef HAVE_X11
+      radeon_overlay_off = radeon_video_size - info->frame_size*nfr;
+#if !defined (RAGE128) && defined(HAVE_X11)
       radeon_overlay_off -= firegl_shift;
 #endif
       radeon_overlay_off &= 0xffff0000;
@@ -1828,8 +3192,8 @@
    nfr = info->num_frames;
    for(;nfr>0; nfr--)
    {
-      radeon_overlay_off = radeon_ram_size - info->frame_size*nfr;
-#ifdef HAVE_X11
+      radeon_overlay_off = radeon_video_size - info->frame_size*nfr;
+#if !defined (RAGE128) && defined(HAVE_X11)
       radeon_overlay_off -= firegl_shift;
 #endif
       radeon_overlay_off &= 0xffff0000;
@@ -1844,13 +3208,22 @@
   return 0;
 }
 
-static int radeon_playback_on( void )
+static int radeon_playback_on(void)
 {
+#ifdef RAGE128
+  unsigned dw,dh;
+#endif
   radeon_vid_display_video();
+#ifdef RAGE128
+  dh = (besr.y_x_end >> 16) - (besr.y_x_start >> 16);
+  dw = (besr.y_x_end & 0xFFFF) - (besr.y_x_start & 0xFFFF);
+  if(dw == radeon_get_xres() || dh == radeon_get_yres()) radeon_vid_exclusive();
+  else radeon_vid_non_exclusive();
+#endif
   return 0;
 }
 
-static int radeon_playback_off( void )
+static int radeon_playback_off(void)
 {
   radeon_vid_stop_video();
   return 0;
@@ -1885,7 +3258,7 @@
     OUTREG(OV0_VID_BUF5_BASE_ADRS,	off[5]);
     OUTREG(OV0_REG_LOAD_CNTL,		0);
     if(besr.vid_nbufs == 2) radeon_wait_vsync();
-    if(__verbose > 1) radeon_vid_dump_regs();
+    if(__verbose > VERBOSE_LEVEL) radeon_vid_dump_regs();
     return 0;
 }
 
@@ -1898,7 +3271,7 @@
  ,
  0, 0, 0, 0, 0, 0, 0, 0 };
 
-static int radeon_get_eq( vidix_video_eq_t * eq)
+static int radeon_get_eq(vidix_video_eq_t * eq)
 {
   memcpy(eq,&equal,sizeof(vidix_video_eq_t));
   return 0;
@@ -1907,13 +3280,13 @@
 #ifndef RAGE128
 #define RTFSaturation(a)   (1.0 + ((a)*1.0)/1000.0)
 #define RTFBrightness(a)   (((a)*1.0)/2000.0)
-#define RTFIntensity(a)    (((a)*1.0)/2000.0)
-#define RTFContrast(a)   (1.0 + ((a)*1.0)/1000.0)
+#define RTFIntensity(a)	   (((a)*1.0)/2000.0)
+#define RTFContrast(a)	 (1.0 + ((a)*1.0)/1000.0)
 #define RTFHue(a)   (((a)*3.1416)/1000.0)
 #define RTFCheckParam(a) {if((a)<-1000) (a)=-1000; if((a)>1000) (a)=1000;}
 #endif
 
-static int radeon_set_eq( const vidix_video_eq_t * eq)
+static int radeon_set_eq(const vidix_video_eq_t * eq)
 {
 #ifdef RAGE128
   int br,sat;
@@ -1923,7 +3296,7 @@
     if(eq->cap & VEQ_CAP_BRIGHTNESS) equal.brightness = eq->brightness;
     if(eq->cap & VEQ_CAP_CONTRAST)   equal.contrast   = eq->contrast;
     if(eq->cap & VEQ_CAP_SATURATION) equal.saturation = eq->saturation;
-    if(eq->cap & VEQ_CAP_HUE)        equal.hue        = eq->hue;
+    if(eq->cap & VEQ_CAP_HUE)	     equal.hue	      = eq->hue;
     if(eq->cap & VEQ_CAP_RGB_INTENSITY)
     {
       equal.red_intensity   = eq->red_intensity;
@@ -1934,7 +3307,7 @@
 #ifdef RAGE128
     br = equal.brightness * 64 / 1000;
     if(br < -64) br = -64; if(br > 63) br = 63;
-    sat = (equal.saturation + 1000) * 16 / 1000;
+    sat = (equal.saturation*31 + 31000) / 2000;
     if(sat < 0) sat = 0; if(sat > 31) sat = 31;
     OUTREG(OV0_COLOUR_CNTL, (br & 0x7f) | (sat << 8) | (sat << 16));
 #else
@@ -1958,7 +3331,7 @@
   return 0;
 }
 
-static int radeon_playback_set_deint (const vidix_deinterlace_t * info)
+static int radeon_playback_set_deint(const vidix_deinterlace_t * info)
 {
   unsigned sflg;
   switch(info->flags)
@@ -1996,7 +3369,7 @@
   return 0;  
 }
 
-static int radeon_playback_get_deint (vidix_deinterlace_t * info)
+static int radeon_playback_get_deint(vidix_deinterlace_t * info)
 {
   if(!besr.deinterlace_on) info->flags = CFG_NON_INTERLACED;
   else
@@ -2011,8 +3384,12 @@
 /* Graphic keys */
 static vidix_grkey_t radeon_grkey;
 
-static void set_gr_key( void )
+static int set_gr_key( void )
 {
+    int result = 0;
+
+    besr.merge_cntl = 0xff000000 | /* overlay alpha */
+		      0x00ff0000;  /* graphic alpha */
     if(radeon_grkey.ckey.op == CKEY_TRUE)
     {
 	int dbpp=radeon_vid_get_dbpp();
@@ -2021,8 +3398,8 @@
 	switch(dbpp)
 	{
 	case 15:
-#ifdef RADEON
-		if(RadeonFamily > 100)
+#ifndef RAGE128
+		if((besr.chip_flags&R_100)!=R_100)
 			besr.graphics_key_clr=
 				  ((radeon_grkey.ckey.blue &0xF8))
 				| ((radeon_grkey.ckey.green&0xF8)<<8)
@@ -2035,9 +3412,9 @@
 			| ((radeon_grkey.ckey.red  &0xF8)<<7);
 		break;
 	case 16:
-#ifdef RADEON
+#ifndef RAGE128
 		/* This test may be too general/specific */
-		if(RadeonFamily > 100)
+		if((besr.chip_flags&R_100)!=R_100)
 			besr.graphics_key_clr=
 				  ((radeon_grkey.ckey.blue &0xF8))
 				| ((radeon_grkey.ckey.green&0xFC)<<8)
@@ -2050,11 +3427,6 @@
 			| ((radeon_grkey.ckey.red  &0xF8)<<8);
 		break;
 	case 24:
-		besr.graphics_key_clr=
-			  ((radeon_grkey.ckey.blue &0xFF))
-			| ((radeon_grkey.ckey.green&0xFF)<<8)
-			| ((radeon_grkey.ckey.red  &0xFF)<<16);
-		break;
 	case 32:
 		besr.graphics_key_clr=
 			  ((radeon_grkey.ckey.blue &0xFF))
@@ -2074,6 +3446,30 @@
 	besr.ckey_cntl = VIDEO_KEY_FN_TRUE|CMP_MIX_AND|GRAPHIC_KEY_FN_EQ;
 #endif
     }
+    else if(radeon_grkey.ckey.op == CKEY_ALPHA)
+    {
+	int dbpp=radeon_vid_get_dbpp();
+	besr.ckey_on=1;
+
+	switch(dbpp)
+	{
+	case 32:
+		besr.ckey_on=1;
+		besr.graphics_key_msk=0;
+		besr.graphics_key_clr=0;
+		besr.ckey_cntl = VIDEO_KEY_FN_TRUE|GRAPHIC_KEY_FN_TRUE|CMP_MIX_AND;
+		besr.merge_cntl = 0xff000000 | /* overlay alpha */
+				  0x00ff0000 | /* graphic alpha */
+				  0x00000001;  /* DISP_ALPHA_MODE_PER_PIXEL */
+		break;
+	default:
+		besr.ckey_on=0;
+		besr.graphics_key_msk=0;
+		besr.graphics_key_clr=0;
+		besr.ckey_cntl = VIDEO_KEY_FN_TRUE|GRAPHIC_KEY_FN_TRUE|CMP_MIX_AND;
+		result = 1;
+	}
+    }
     else
     {
 	besr.ckey_on=0;
@@ -2085,6 +3481,8 @@
     OUTREG(OV0_GRAPHICS_KEY_MSK, besr.graphics_key_msk);
     OUTREG(OV0_GRAPHICS_KEY_CLR, besr.graphics_key_clr);
     OUTREG(OV0_KEY_CNTL,besr.ckey_cntl);
+    OUTREG(DISP_MERGE_CNTL, besr.merge_cntl);
+    return result;
 }
 
 static int radeon_get_gkey(vidix_grkey_t *grkey)
@@ -2096,8 +3494,7 @@
 static int radeon_set_gkey(const vidix_grkey_t *grkey)
 {
     memcpy(&radeon_grkey, grkey, sizeof(vidix_grkey_t));
-    set_gr_key();
-    return(0);
+    return (set_gr_key());
 }
 
 #ifdef RAGE128