changeset 1037:c31e94fefd2a trunk

[svn] - upstream updates regarding handling of SPC700 instructions and runtime length issues.
author nenolod
date Tue, 15 May 2007 13:18:35 -0700
parents 47db8268cb7b
children 1710790c8e85
files ChangeLog src/console/Snes_Spc.cxx src/console/Snes_Spc.h src/console/Spc_Cpu.cxx src/console/Spc_Cpu.h src/console/Spc_Dsp.cxx src/console/Spc_Dsp.h src/console/Spc_Emu.cxx src/console/Spc_Emu.h
diffstat 9 files changed, 3103 insertions(+), 2267 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sat May 12 17:06:13 2007 -0700
+++ b/ChangeLog	Tue May 15 13:18:35 2007 -0700
@@ -1,3 +1,12 @@
+2007-05-13 00:06:13 +0000  Cristi Magherusan <majeru@atheme.org>
+  revision [2236]
+  smarter polling and lots of other changes
+  
+  trunk/src/lastfm/lastfm.c |  250 +++++++++++++++++++++++++++-------------------
+  trunk/src/lastfm/lastfm.h |    5 
+  2 files changed, 156 insertions(+), 99 deletions(-)
+
+
 2007-05-12 22:03:26 +0000  Ben Tucker <bnt@interchange.ubc.ca>
   revision [2234]
   Changed alarm and projectm plugins so they use auddrct now.
--- a/src/console/Snes_Spc.cxx	Sat May 12 17:06:13 2007 -0700
+++ b/src/console/Snes_Spc.cxx	Tue May 15 13:18:35 2007 -0700
@@ -1,10 +1,12 @@
-// Game_Music_Emu 0.5.2. http://www.slack.net/~ant/
+// SPC emulation support: init, sample buffering, reset, SPC loading
+
+// snes_spc 0.9.0. http://www.slack.net/~ant/
 
 #include "Snes_Spc.h"
 
 #include <string.h>
 
-/* Copyright (C) 2004-2006 Shay Green. This module is free software; you
+/* Copyright (C) 2004-2007 Shay Green. This module is free software; you
 can redistribute it and/or modify it under the terms of the GNU Lesser
 General Public License as published by the Free Software Foundation; either
 version 2.1 of the License, or (at your option) any later version. This
@@ -17,473 +19,362 @@
 
 #include "blargg_source.h"
 
-// always in the future (CPU time can go over 0, but not by this much)
-int const timer_disabled_time = 127;
+#define RAM         (m.ram.ram)
+#define REGS        (m.smp_regs [0])
+#define REGS_IN     (m.smp_regs [1])
+
+// (n ? n : 256)
+#define IF_0_THEN_256( n ) ((uint8_t) ((n) - 1) + 1)
+
+
+//// Init
 
-Snes_Spc::Snes_Spc() : dsp( mem.ram ), cpu( this, mem.ram )
+blargg_err_t Snes_Spc::init()
 {
-	set_tempo( 1.0 );
+	memset( &m, 0, sizeof m );
+	dsp.init( RAM );
+	
+	m.tempo = tempo_unit;
+	
+	// Most SPC music doesn't need ROM, and almost all the rest only rely
+	// on these two bytes
+	m.rom [0x3E] = 0xFF;
+	m.rom [0x3F] = 0xC0;
 	
-	// Put STOP instruction around memory to catch PC underflow/overflow.
-	memset( mem.padding1, 0xFF, sizeof mem.padding1 );
-	memset( mem.padding2, 0xFF, sizeof mem.padding2 );
+	static unsigned char const cycle_table [128] =
+	{//   01   23   45   67   89   AB   CD   EF
+	    0x28,0x47,0x34,0x36,0x26,0x54,0x54,0x68, // 0
+	    0x48,0x47,0x45,0x56,0x55,0x65,0x22,0x46, // 1
+	    0x28,0x47,0x34,0x36,0x26,0x54,0x54,0x74, // 2
+	    0x48,0x47,0x45,0x56,0x55,0x65,0x22,0x38, // 3
+	    0x28,0x47,0x34,0x36,0x26,0x44,0x54,0x66, // 4
+	    0x48,0x47,0x45,0x56,0x55,0x45,0x22,0x43, // 5
+	    0x28,0x47,0x34,0x36,0x26,0x44,0x54,0x75, // 6
+	    0x48,0x47,0x45,0x56,0x55,0x55,0x22,0x36, // 7
+	    0x28,0x47,0x34,0x36,0x26,0x54,0x52,0x45, // 8
+	    0x48,0x47,0x45,0x56,0x55,0x55,0x22,0xC5, // 9
+	    0x38,0x47,0x34,0x36,0x26,0x44,0x52,0x44, // A
+	    0x48,0x47,0x45,0x56,0x55,0x55,0x22,0x34, // B
+	    0x38,0x47,0x45,0x47,0x25,0x64,0x52,0x49, // C
+	    0x48,0x47,0x56,0x67,0x45,0x55,0x22,0x83, // D
+	    0x28,0x47,0x34,0x36,0x24,0x53,0x43,0x40, // E
+	    0x48,0x47,0x45,0x56,0x34,0x54,0x22,0x60, // F
+	};
 	
-	// A few tracks read from the last four bytes of IPL ROM
-	boot_rom [sizeof boot_rom - 2] = 0xC0;
-	boot_rom [sizeof boot_rom - 1] = 0xFF;
-	memset( boot_rom, 0, sizeof boot_rom - 2 );
+	// unpack cycle table
+	for ( int i = 0; i < 128; i++ )
+	{
+		int n = cycle_table [i];
+		m.cycle_table [i * 2 + 0] = n >> 4;
+		m.cycle_table [i * 2 + 1] = n & 0x0F;
+	}
+	
+	#if SPC_LESS_ACCURATE
+		memcpy( reg_times, reg_times_, sizeof reg_times );
+	#endif
+	
+	reset();
+	return 0;
 }
 
-void Snes_Spc::set_tempo( double t )
+void Snes_Spc::init_rom( uint8_t const in [rom_size] )
 {
-	int unit = (int) (16.0 / t + 0.5);
-	
-	timer [0].divisor = unit * 8; // 8 kHz
-	timer [1].divisor = unit * 8; // 8 kHz
-	timer [2].divisor = unit;     // 64 kHz
+	memcpy( m.rom, in, sizeof m.rom );
 }
 
-// Load
+void Snes_Spc::set_tempo( int t )
+{
+	m.tempo = t;
+	int const timer2_shift = 4; // 64 kHz
+	int const other_shift  = 3; //  8 kHz
+	
+	#if SPC_DISABLE_TEMPO
+		m.timers [2].prescaler = timer2_shift;
+		m.timers [1].prescaler = timer2_shift + other_shift;
+		m.timers [0].prescaler = timer2_shift + other_shift;
+	#else
+		if ( !t )
+			t = 1;
+		int const timer2_rate  = 1 << timer2_shift;
+		int rate = (timer2_rate * tempo_unit + (t >> 1)) / t;
+		if ( rate < timer2_rate / 4 )
+			rate = timer2_rate / 4; // max 4x tempo
+		m.timers [2].prescaler = rate;
+		m.timers [1].prescaler = rate << other_shift;
+		m.timers [0].prescaler = rate << other_shift;
+	#endif
+}
 
-void Snes_Spc::set_ipl_rom( void const* in )
+// Timer registers have been loaded. Applies these to the timers. Does not
+// reset timer prescalers or dividers.
+void Snes_Spc::timers_loaded()
 {
-	memcpy( boot_rom, in, sizeof boot_rom );
+	int i;
+	for ( i = 0; i < timer_count; i++ )
+	{
+		Timer* t = &m.timers [i];
+		t->period  = IF_0_THEN_256( REGS [r_t0target + i] );
+		t->enabled = REGS [r_control] >> i & 1;
+		t->counter = REGS_IN [r_t0out + i] & 0x0F;
+	}
+	
+	set_tempo( m.tempo );
 }
 
-blargg_err_t Snes_Spc::load_spc( const void* data, long size )
+// Loads registers from unified 16-byte format
+void Snes_Spc::load_regs( uint8_t const in [reg_count] )
+{
+	memcpy( REGS, in, reg_count );
+	memcpy( REGS_IN, REGS, reg_count );
+	
+	// These always read back as 0
+	REGS_IN [r_test    ] = 0;
+	REGS_IN [r_control ] = 0;
+	REGS_IN [r_t0target] = 0;
+	REGS_IN [r_t1target] = 0;
+	REGS_IN [r_t2target] = 0;
+}
+
+// RAM was just loaded from SPC, with $F0-$FF containing SMP registers
+// and timer counts. Copies these to proper registers.
+void Snes_Spc::ram_loaded()
+{
+	m.rom_enabled = 0;
+	load_regs( &RAM [0xF0] );
+	
+	// Put STOP instruction around memory to catch PC underflow/overflow
+	memset( m.ram.padding1, cpu_pad_fill, sizeof m.ram.padding1 );
+	memset( m.ram.padding2, cpu_pad_fill, sizeof m.ram.padding2 );
+}
+
+// Registers were just loaded. Applies these new values.
+void Snes_Spc::regs_loaded()
+{
+	enable_rom( REGS [r_control] & 0x80 );
+	timers_loaded();
+}
+
+void Snes_Spc::reset_time_regs()
 {
-	struct spc_file_t {
-		char    signature [27];
-		char    unused [10];
-		uint8_t pc [2];
-		uint8_t a;
-		uint8_t x;
-		uint8_t y;
-		uint8_t status;
-		uint8_t sp;
-		char    unused2 [212];
-		uint8_t ram [0x10000];
-		uint8_t dsp [128];
-		uint8_t ipl_rom [128];
-	};
-	assert( offsetof (spc_file_t,ipl_rom) == spc_file_size );
+	m.cpu_error     = 0;
+	m.echo_accessed = 0;
+	m.spc_time      = 0;
+	m.dsp_time      = 0;
+	#if SPC_LESS_ACCURATE
+		m.dsp_time = clocks_per_sample + 1;
+	#endif
+	
+	for ( int i = 0; i < timer_count; i++ )
+	{
+		Timer* t = &m.timers [i];
+		t->next_time = 1;
+		t->divider   = 0;
+	}
+	
+	regs_loaded();
+	
+	m.extra_clocks = 0;
+	reset_buf();
+}
+
+void Snes_Spc::reset_common( int timer_counter_init )
+{
+	int i;
+	for ( i = 0; i < timer_count; i++ )
+		REGS_IN [r_t0out + i] = timer_counter_init;
+	
+	// Run IPL ROM
+	memset( &m.cpu_regs, 0, sizeof m.cpu_regs );
+	m.cpu_regs.pc = rom_addr;
 	
-	const spc_file_t* spc = (spc_file_t const*) data;
+	REGS [r_test   ] = 0x0A;
+	REGS [r_control] = 0xB0; // ROM enabled, clear ports
+	for ( i = 0; i < port_count; i++ )
+		REGS_IN [r_cpuio0 + i] = 0;
 	
-	if ( size < spc_file_size )
+	reset_time_regs();
+}
+
+void Snes_Spc::soft_reset()
+{
+	reset_common( 0 );
+	dsp.soft_reset();
+}
+
+void Snes_Spc::reset()
+{
+	memset( RAM, 0xFF, 0x10000 );
+	ram_loaded();
+	reset_common( 0x0F );
+	dsp.reset();
+}
+
+char const Snes_Spc::signature [signature_size + 1] =
+		"SNES-SPC700 Sound File Data v0.30\x1A\x1A";
+
+blargg_err_t Snes_Spc::load_spc( void const* data, long size )
+{
+	spc_file_t const* const spc = (spc_file_t const*) data;
+	
+	// be sure compiler didn't insert any padding into fle_t
+	assert( sizeof (spc_file_t) == spc_min_file_size + 0x80 );
+	
+	// Check signature and file size
+	if ( size < signature_size || memcmp( spc, signature, 27 ) )
 		return "Not an SPC file";
 	
-	if ( strncmp( spc->signature, "SNES-SPC700 Sound File Data", 27 ) != 0 )
-		return "Not an SPC file";
+	if ( size < spc_min_file_size )
+		return "Corrupt SPC file";
+	
+	// CPU registers
+	m.cpu_regs.pc  = spc->pch * 0x100 + spc->pcl;
+	m.cpu_regs.a   = spc->a;
+	m.cpu_regs.x   = spc->x;
+	m.cpu_regs.y   = spc->y;
+	m.cpu_regs.psw = spc->psw;
+	m.cpu_regs.sp  = spc->sp;
 	
-	registers_t regs;
-	regs.pc     = spc->pc [1] * 0x100 + spc->pc [0];
-	regs.a      = spc->a;
-	regs.x      = spc->x;
-	regs.y      = spc->y;
-	regs.status = spc->status;
-	regs.sp     = spc->sp;
+	// RAM and registers
+	memcpy( RAM, spc->ram, 0x10000 );
+	ram_loaded();
 	
-	if ( (unsigned long) size >= sizeof *spc )
-		set_ipl_rom( spc->ipl_rom );
+	// DSP registers
+	dsp.load( spc->dsp );
 	
-	const char* error = load_state( regs, spc->ram, spc->dsp );
+	reset_time_regs();
 	
-	echo_accessed = false;
-	
-	return error;
+	return 0;
 }
 
 void Snes_Spc::clear_echo()
 {
-	if ( !(dsp.read( 0x6C ) & 0x20) )
-	{
-		unsigned addr = 0x100 * dsp.read( 0x6D );
-		size_t   size = 0x800 * dsp.read( 0x7D );
-		memset( mem.ram + addr, 0xFF, min( size, sizeof mem.ram - addr ) );
-	}
-}
-
-// Handle other file formats (emulator save states) in user code, not here.
-
-blargg_err_t Snes_Spc::load_state( const registers_t& cpu_state, const void* new_ram,
-		const void* dsp_state )
-{
-	// cpu
-	cpu.r = cpu_state;
-	
-	// Allow DSP to generate one sample before code starts
-	// (Tengai Makyo Zero, Tenjin's Table Toss first notes are lost since it
-	// clears KON 31 cycles from starting execution. It works on the SNES
-	// since the SPC player adds a few extra cycles delay after restoring
-	// KON from the DSP registers at the end of an SPC file).
-	extra_cycles = 32; 
-	
-	// ram
-	memcpy( mem.ram, new_ram, sizeof mem.ram );
-	memcpy( extra_ram, mem.ram + rom_addr, sizeof extra_ram );
-	
-	// boot rom (have to force enable_rom() to update it)
-	rom_enabled = !(mem.ram [0xF1] & 0x80);
-	enable_rom( !rom_enabled );
-	
-	// dsp
-	dsp.reset();
-	int i;
-	for ( i = 0; i < Spc_Dsp::register_count; i++ )
-		dsp.write( i, ((uint8_t const*) dsp_state) [i] );
-	
-	// timers
-	for ( i = 0; i < timer_count; i++ )
-	{
-		Timer& t = timer [i];
-		
-		t.next_tick = 0;
-		t.enabled = (mem.ram [0xF1] >> i) & 1;
-		if ( !t.enabled )
-			t.next_tick = timer_disabled_time;
-		t.count = 0;
-		t.counter = mem.ram [0xFD + i] & 15;
-		
-		int p = mem.ram [0xFA + i];
-		t.period = p ? p : 0x100;
-	}
-	
-	// Handle registers which already give 0 when read by setting RAM and not changing it.
-	// Put STOP instruction in registers which can be read, to catch attempted CPU execution.
-	mem.ram [0xF0] = 0;
-	mem.ram [0xF1] = 0;
-	mem.ram [0xF3] = 0xFF;
-	mem.ram [0xFA] = 0;
-	mem.ram [0xFB] = 0;
-	mem.ram [0xFC] = 0;
-	mem.ram [0xFD] = 0xFF;
-	mem.ram [0xFE] = 0xFF;
-	mem.ram [0xFF] = 0xFF;
-	
-	return 0; // success
-}
-
-// Hardware
-
-// Current time starts negative and ends at 0
-inline spc_time_t Snes_Spc::time() const
-{
-	return -cpu.remain();
-}
-
-// Keep track of next time to run and avoid a function call if it hasn't been reached.
-
-// Timers
-
-void Snes_Spc::Timer::run_until_( spc_time_t time )
-{
-	if ( !enabled )
-		dprintf( "next_tick: %ld, time: %ld", (long) next_tick, (long) time );
-	assert( enabled ); // when disabled, next_tick should always be in the future
-	
-	int elapsed = ((time - next_tick) / divisor) + 1;
-	next_tick += elapsed * divisor;
-	
-	elapsed += count;
-	if ( elapsed >= period ) // avoid unnecessary division
+	if ( !(dsp.read( Spc_Dsp::r_flg ) & 0x20) )
 	{
-		int n = elapsed / period;
-		elapsed -= n * period;
-		counter = (counter + n) & 15;
-	}
-	count = elapsed;
-}
-
-// DSP
-
-const int clocks_per_sample = 32; // 1.024 MHz CPU clock / 32000 samples per second
-
-void Snes_Spc::run_dsp_( spc_time_t time )
-{
-	int count = ((time - next_dsp) >> 5) + 1; // divide by clocks_per_sample
-	sample_t* buf = sample_buf;
-	if ( buf ) {
-		sample_buf = buf + count * 2; // stereo
-		assert( sample_buf <= buf_end );
-	}
-	next_dsp += count * clocks_per_sample;
-	dsp.run( count, buf );
-}
-
-inline void Snes_Spc::run_dsp( spc_time_t time )
-{
-	if ( time >= next_dsp )
-		run_dsp_( time );
-}
-
-// Debug-only check for read/write within echo buffer, since this might result in
-// inaccurate emulation due to the DSP not being caught up to the present.
-inline void Snes_Spc::check_for_echo_access( spc_addr_t addr )
-{
-	if ( !echo_accessed && !(dsp.read( 0x6C ) & 0x20) )
-	{
-		// ** If echo accesses are found that require running the DSP, cache
-		// the start and end address on DSP writes to speed up checking.
-		
-		unsigned start = 0x100 * dsp.read( 0x6D );
-		unsigned end = start + 0x800 * dsp.read( 0x7D );
-		if ( start <= addr && addr < end ) {
-			echo_accessed = true;
-			dprintf( "Read/write at $%04X within echo buffer\n", (unsigned) addr );
-		}
+		int addr = 0x100 * dsp.read( Spc_Dsp::r_esa );
+		int end  = addr + 0x800 * (dsp.read( Spc_Dsp::r_edl ) & 0x0F);
+		if ( end > 0x10000 )
+			end = 0x10000;
+		memset( &RAM [addr], 0xFF, end - addr );
 	}
 }
 
-// Read
-
-int Snes_Spc::read( spc_addr_t addr )
-{
-	int result = mem.ram [addr];
-	
-	if ( (rom_addr <= addr && addr < 0xFFFC || addr >= 0xFFFE) && rom_enabled )
-		dprintf( "Read from ROM: %04X -> %02X\n", addr, result );
-	
-	if ( unsigned (addr - 0xF0) < 0x10 )
-	{
-		assert( 0xF0 <= addr && addr <= 0xFF );
-		
-		// counters
-		int i = addr - 0xFD;
-		if ( i >= 0 )
-		{
-			Timer& t = timer [i];
-			t.run_until( time() );
-			int old = t.counter;
-			t.counter = 0;
-			return old;
-		}
-		
-		// dsp
-		if ( addr == 0xF3 )
-		{
-			run_dsp( time() );
-			if ( mem.ram [0xF2] >= Spc_Dsp::register_count )
-				dprintf( "DSP read from $%02X\n", (int) mem.ram [0xF2] );
-			return dsp.read( mem.ram [0xF2] & 0x7F );
-		}
-		
-		if ( addr == 0xF0 || addr == 0xF1 || addr == 0xF8 ||
-				addr == 0xF9 || addr == 0xFA )
-			dprintf( "Read from register $%02X\n", (int) addr );
-		
-		// Registers which always read as 0 are handled by setting mem.ram [reg] to 0
-		// at startup then never changing that value.
-		
-		check(( check_for_echo_access( addr ), true ));
-	}
-	
-	return result;
-}
-
 
-// Write
+//// Sample output
 
-void Snes_Spc::enable_rom( bool enable )
+void Snes_Spc::reset_buf()
+{
+	// Start with half extra buffer of silence
+	sample_t* out = m.extra_buf;
+	while ( out < &m.extra_buf [extra_size / 2] )
+		*out++ = 0;
+	
+	m.extra_pos = out;
+	m.buf_begin = 0;
+	
+	dsp.set_output( 0, 0 );
+}
+
+void Snes_Spc::set_output( sample_t* out, int size )
 {
-	if ( rom_enabled != enable )
+	require( (size & 1) == 0 ); // size must be even
+	
+	m.extra_clocks &= clocks_per_sample - 1;
+	if ( out )
 	{
-		rom_enabled = enable;
-		memcpy( mem.ram + rom_addr, (enable ? boot_rom : extra_ram), rom_size );
-		// TODO: ROM can still get overwritten when DSP writes to echo buffer
+		sample_t const* out_end = out + size;
+		m.buf_begin = out;
+		m.buf_end   = out_end;
+		
+		// Copy extra to output
+		sample_t const* in = m.extra_buf;
+		while ( in < m.extra_pos && out < out_end )
+			*out++ = *in++;
+		
+		// Handle output being full already
+		if ( out >= out_end )
+		{
+			// Have DSP write to remaining extra space
+			out     = dsp.extra();
+			out_end = &dsp.extra() [extra_size];
+			
+			// Copy any remaining extra samples as if DSP wrote them
+			while ( in < m.extra_pos )
+				*out++ = *in++;
+			assert( out <= out_end );
+		}
+		
+		dsp.set_output( out, out_end - out );
+	}
+	else
+	{
+		reset_buf();
 	}
 }
 
-void Snes_Spc::write( spc_addr_t addr, int data )
+void Snes_Spc::save_extra()
 {
-	// first page is very common
-	if ( addr < 0xF0 ) {
-		mem.ram [addr] = (uint8_t) data;
-	}
-	else switch ( addr )
+	// Get end pointers
+	sample_t const* main_end = m.buf_end;     // end of data written to buf
+	sample_t const* dsp_end  = dsp.out_pos(); // end of data written to dsp.extra()
+	if ( m.buf_begin <= dsp_end && dsp_end <= main_end )
 	{
-		// RAM
-		default:
-			check(( check_for_echo_access( addr ), true ));
-			if ( addr < rom_addr ) {
-				mem.ram [addr] = (uint8_t) data;
-			}
-			else {
-				extra_ram [addr - rom_addr] = (uint8_t) data;
-				if ( !rom_enabled )
-					mem.ram [addr] = (uint8_t) data;
-			}
-			break;
-		
-		// DSP
-		//case 0xF2: // mapped to RAM
-		case 0xF3: {
-			run_dsp( time() );
-			int reg = mem.ram [0xF2];
-			if ( next_dsp > 0 ) {
-				// skip mode
-				
-				// key press
-				if ( reg == 0x4C )
-					keys_pressed |= data & ~dsp.read( 0x5C );
-				
-				// key release
-				if ( reg == 0x5C ) {
-					keys_released |= data;
-					keys_pressed &= ~data;
-				}
-			}
-			if ( reg < Spc_Dsp::register_count ) {
-				dsp.write( reg, data );
-			}
-			else {
-				dprintf( "DSP write to $%02X\n", (int) reg );
-			}
-			break;
-		}
-		
-		case 0xF0: // Test register
-			dprintf( "Wrote $%02X to $F0\n", (int) data );
-			break;
-		
-		// Config
-		case 0xF1:
-		{
-			// timers
-			for ( int i = 0; i < timer_count; i++ )
-			{
-				Timer& t = timer [i];
-				if ( !(data & (1 << i)) ) {
-					t.enabled = 0;
-					t.next_tick = timer_disabled_time;
-				}
-				else if ( !t.enabled ) {
-					// just enabled
-					t.enabled = 1;
-					t.counter = 0;
-					t.count = 0;
-					t.next_tick = time();
-				}
-			}
-			
-			// port clears
-			if ( data & 0x10 ) {
-				mem.ram [0xF4] = 0;
-				mem.ram [0xF5] = 0;
-			}
-			if ( data & 0x20 ) {
-				mem.ram [0xF6] = 0;
-				mem.ram [0xF7] = 0;
-			}
-			
-			enable_rom( data & 0x80 );
-			
-			break;
-		}
-		
-		// Ports
-		case 0xF4:
-		case 0xF5:
-		case 0xF6:
-		case 0xF7:
-			// to do: handle output ports
-			break;
-		
-		//case 0xF8: // verified on SNES that these are read/write (RAM)
-		//case 0xF9:
-		
-		// Timers
-		case 0xFA:
-		case 0xFB:
-		case 0xFC: {
-			Timer& t = timer [addr - 0xFA];
-			if ( (t.period & 0xFF) != data ) {
-				t.run_until( time() );
-				t.period = data ? data : 0x100;
-			}
-			break;
-		}
-		
-		// Counters (cleared on write)
-		case 0xFD:
-		case 0xFE:
-		case 0xFF:
-			dprintf( "Wrote to counter $%02X\n", (int) addr );
-			timer [addr - 0xFD].counter = 0;
-			break;
+		main_end = dsp_end;
+		dsp_end  = dsp.extra(); // nothing in DSP's extra
 	}
+	
+	// Copy any extra samples at these ends into extra_buf
+	sample_t* out = m.extra_buf;
+	sample_t const* in;
+	for ( in = m.buf_begin + sample_count(); in < main_end; in++ )
+		*out++ = *in;
+	for ( in = dsp.extra(); in < dsp_end ; in++ )
+		*out++ = *in;
+	
+	m.extra_pos = out;
+	assert( out <= &m.extra_buf [extra_size] );
 }
 
-// Play
-
-blargg_err_t Snes_Spc::skip( long count )
+blargg_err_t Snes_Spc::play( int count, sample_t* out )
 {
-	if ( count > 4 * 32000L )
+	require( (count & 1) == 0 ); // must be even
+	if ( count )
 	{
-		// don't run DSP for long durations (2-3 times faster)
-		
-		const long sync_count = 32000L * 2;
-		
-		// keep track of any keys pressed/released (and not subsequently released)
-		keys_pressed = 0;
-		keys_released = 0;
-		// sentinel tells play to ignore DSP
-		RETURN_ERR( play( count - sync_count, skip_sentinel ) );
-		
-		// press/release keys now
-		dsp.write( 0x5C, keys_released & ~keys_pressed );
-		dsp.write( 0x4C, keys_pressed );
-		
-		clear_echo();
-		
-		// play the last few seconds normally to help synchronize DSP
-		count = sync_count;
+		set_output( out, count );
+		end_frame( count * (clocks_per_sample / 2) );
 	}
 	
-	return play( count );
+	const char* err = m.cpu_error;
+	m.cpu_error = 0;
+	return err;
 }
 
-blargg_err_t Snes_Spc::play( long count, sample_t* out )
+blargg_err_t Snes_Spc::skip( int count )
 {
-	require( count % 2 == 0 ); // output is always in pairs of samples
-	
-	// CPU time() runs from -duration to 0
-	spc_time_t duration = (count / 2) * clocks_per_sample;
-	
-	// DSP output is made on-the-fly when the CPU reads/writes DSP registers
-	sample_buf = out;
-	buf_end = out + (out && out != skip_sentinel ? count : 0);
-	next_dsp = (out == skip_sentinel) ? clocks_per_sample : -duration + clocks_per_sample;
-	
-	// Localize timer next_tick times and run them to the present to prevent a running
-	// but ignored timer's next_tick from getting too far behind and overflowing.
-	for ( int i = 0; i < timer_count; i++ )
+	#if SPC_LESS_ACCURATE
+	if ( count > 2 * sample_rate * 2 )
 	{
-		Timer& t = timer [i];
-		if ( t.enabled )
-		{
-			t.next_tick -= duration;
-			t.run_until( -duration );
-		}
+		set_output( 0, 0 );
+		
+		// Skip a multiple of 4 samples
+		time_t end = count;
+		count = (count & 3) + 1 * sample_rate * 2;
+		end = (end - count) * (clocks_per_sample / 2);
+		
+		m.skipped_kon  = 0;
+		m.skipped_koff = 0;
+		
+		// Preserve DSP and timer synchronization
+		// TODO: verify that this really preserves it
+		int old_dsp_time = m.dsp_time + m.spc_time;
+		m.dsp_time = end - m.spc_time + skipping_time;
+		end_frame( end );
+		m.dsp_time = m.dsp_time - skipping_time + old_dsp_time;
+		
+		dsp.write( Spc_Dsp::r_koff, m.skipped_koff & ~m.skipped_kon );
+		dsp.write( Spc_Dsp::r_kon , m.skipped_kon );
+		clear_echo();
 	}
+	#endif
 	
-	// Run CPU for duration, reduced by any extra cycles from previous run
-	int elapsed = cpu.run( duration - extra_cycles );
-	if ( elapsed > 0 )
-	{
-		dprintf( "Unhandled instruction $%02X, pc = $%04X\n",
-				(int) cpu.read( cpu.r.pc ), (unsigned) cpu.r.pc );
-		return "Emulation error (illegal/unsupported instruction)";
-	}
-	extra_cycles = -elapsed;
-	
-	// Catch DSP up to present.
-	run_dsp( 0 );
-	if ( out ) {
-		assert( next_dsp == clocks_per_sample );
-		assert( out == skip_sentinel || sample_buf - out == count );
-	}
-	buf_end = 0;
-	
-	return 0;
+	return play( count, 0 );
 }
--- a/src/console/Snes_Spc.h	Sat May 12 17:06:13 2007 -0700
+++ b/src/console/Snes_Spc.h	Tue May 15 13:18:35 2007 -0700
@@ -1,121 +1,284 @@
-// Super Nintendo (SNES) SPC-700 APU Emulator
+// SNES SPC-700 APU emulator
 
-// Game_Music_Emu 0.5.2
+// snes_spc 0.9.0
 #ifndef SNES_SPC_H
 #define SNES_SPC_H
 
-#include "blargg_common.h"
-#include "Spc_Cpu.h"
 #include "Spc_Dsp.h"
+#include "blargg_endian.h"
 
-class Snes_Spc {
+struct Snes_Spc {
 public:
+	typedef BOOST::uint8_t uint8_t;
+	
+	// Must be called once before using
+	blargg_err_t init();
 	
-	// Load copy of SPC data into emulator. Clear echo buffer if 'clear_echo' is true.
-	enum { spc_file_size = 0x10180 };
-	blargg_err_t load_spc( const void* spc, long spc_size );
+	// Sample pairs generated per second
+	enum { sample_rate = 32000 };
+	
+// Emulator use
 	
-	// Generate 'count' samples and optionally write to 'buf'. Count must be even.
-	// Sample output is 16-bit 32kHz, signed stereo pairs with the left channel first.
+	// Sets IPL ROM data. Library does not include ROM data. Most SPC music files
+	// don't need ROM, but a full emulator must provide this.
+	enum { rom_size = 0x40 };
+	void init_rom( uint8_t const rom [rom_size] );
+
+	// Sets destination for output samples
 	typedef short sample_t;
-	blargg_err_t play( long count, sample_t* buf = NULL );
-	
-// Optional functionality
+	void set_output( sample_t* out, int out_size );
+
+	// Number of samples written to output since last set
+	int sample_count() const;
+
+	// Resets SPC to power-on state. This resets your output buffer, so you must
+	// call set_output() after this.
+	void reset();
+
+	// Emulates pressing reset switch on SNES. This resets your output buffer, so
+	// you must call set_output() after this.
+	void soft_reset();
+
+	// 1024000 SPC clocks per second, sample pair every 32 clocks
+	typedef int time_t;
+	enum { clock_rate = 1024000 };
+	enum { clocks_per_sample = 32 };
 	
-	// Load copy of state into emulator.
-	typedef Spc_Cpu::registers_t registers_t;
-	blargg_err_t load_state( const registers_t& cpu_state, const void* ram_64k,
-		const void* dsp_regs_128 );
+	// Emulated port read/write at specified time
+	enum { port_count = 4 };
+	int  read_port ( time_t, int port );
+	void write_port( time_t, int port, int data );
+
+	// Runs SPC to end_time and starts a new time frame at 0
+	void end_frame( time_t end_time );
 	
-	// Clear echo buffer, useful because many tracks have junk in the buffer.
-	void clear_echo();
+// Sound control
 	
-	// Mute voice n if bit n (1 << n) of mask is set
-	enum { voice_count = Spc_Dsp::voice_count };
+	// Mutes voices corresponding to non-zero bits in mask (issues repeated KOFF events).
+	// Reduces emulation accuracy.
+	enum { voice_count = 8 };
 	void mute_voices( int mask );
 	
-	// Skip forward by the specified number of samples (64000 samples = 1 second)
-	blargg_err_t skip( long count );
-	
-	// Set gain, where 1.0 is normal. When greater than 1.0, output is clamped the
-	// 16-bit sample range.
-	void set_gain( double );
-	
-	// If true, prevent channels and global volumes from being phase-negated
+	// If true, prevents channels and global volumes from being phase-negated.
+	// Only supported by fast DSP.
 	void disable_surround( bool disable = true );
 	
-	// Set 128 bytes to use for IPL boot ROM. Makes copy. Default is zero filled,
-	// to avoid including copyrighted code from the SPC-700.
-	void set_ipl_rom( const void* );
+	// Sets tempo, where tempo_unit = normal, tempo_unit / 2 = half speed, etc.
+	enum { tempo_unit = 0x100 };
+	void set_tempo( int );
+	
+	enum { gain_unit = Spc_Dsp::gain_unit };
+	void set_gain( int gain );
 	
-	void set_tempo( double );
+// SPC music files
+
+	// Loads SPC data into emulator
+	enum { spc_min_file_size = 0x10180 };
+	enum { spc_file_size     = 0x10200 };
+	blargg_err_t load_spc( void const* in, long size );
+	
+	// Clears echo region. Useful after loading an SPC as many have garbage in echo.
+	void clear_echo();
+
+	// Plays for count samples and write samples to out. Discards samples if out
+	// is NULL. Count must be a multiple of 2 since output is stereo.
+	blargg_err_t play( int count, sample_t* out );
+	
+	// Skips count samples. Several times faster than play() when using fast DSP.
+	blargg_err_t skip( int count );
 	
+// State save/load (only available with accurate DSP)
+
+#if !SPC_NO_COPY_STATE_FUNCS
+	// Saves/loads state
+	enum { state_size = 67 * 1024L }; // maximum space needed when saving
+	typedef Spc_Dsp::copy_func_t copy_func_t;
+	void copy_state( unsigned char** io, copy_func_t );
+	
+	// Writes minimal header to spc_out
+	static void init_header( void* spc_out );
+
+	// Saves emulator state as SPC file data. Writes spc_file_size bytes to spc_out.
+	// Does not set up SPC header; use init_header() for that.
+	void save_spc( void* spc_out );
+
+	// Returns true if new key-on events occurred since last check. Useful for
+	// trimming silence while saving an SPC.
+	bool check_kon();
+#endif
+
 public:
-	Snes_Spc();
-	typedef BOOST::uint8_t uint8_t;
-private:
-	// timers
+	BLARGG_DISABLE_NOTHROW
+	
+	typedef BOOST::uint16_t uint16_t;
+	
+	// Time relative to m_spc_time. Speeds up code a bit by eliminating need to
+	// constantly add m_spc_time to time from CPU. CPU uses time that ends at
+	// 0 to eliminate reloading end time every instruction. It pays off.
+	typedef int rel_time_t;
+	
 	struct Timer
 	{
-		spc_time_t next_tick;
+		rel_time_t next_time; // time of next event
+		int prescaler;
 		int period;
-		int count;
-		int divisor;
+		int divider;
 		int enabled;
 		int counter;
-		
-		void run_until_( spc_time_t );
-		void run_until( spc_time_t time )
-		{
-			if ( time >= next_tick )
-				run_until_( time );
-		}
 	};
+	enum { reg_count = 0x10 };
 	enum { timer_count = 3 };
-	Timer timer [timer_count];
-
-	// hardware
-	int extra_cycles;
-	spc_time_t time() const;
-	int  read( spc_addr_t );
-	void write( spc_addr_t, int );
-	friend class Spc_Cpu;
+	enum { extra_size = Spc_Dsp::extra_size };
 	
-	// dsp
-	sample_t* sample_buf;
-	sample_t* buf_end; // to do: remove this once possible bug resolved
-	spc_time_t next_dsp;
+	enum { signature_size = 35 };
+	
+private:
 	Spc_Dsp dsp;
-	int keys_pressed;
-	int keys_released;
-	sample_t skip_sentinel [1]; // special value for play() passed by skip()
-	void run_dsp( spc_time_t );
-	void run_dsp_( spc_time_t );
-	bool echo_accessed;
-	void check_for_echo_access( spc_addr_t );
+	
+	#if SPC_LESS_ACCURATE
+		static signed char const reg_times_ [256];
+		signed char reg_times [256];
+	#endif
 	
-	// boot rom
-	enum { rom_size = 64 };
+	struct state_t
+	{
+		Timer timers [timer_count];
+		
+		uint8_t smp_regs [2] [reg_count];
+		
+		struct
+		{
+			int pc;
+			int a;
+			int x;
+			int y;
+			int psw;
+			int sp;
+		} cpu_regs;
+		
+		rel_time_t  dsp_time;
+		time_t      spc_time;
+		bool        echo_accessed;
+		
+		int         tempo;
+		int         skipped_kon;
+		int         skipped_koff;
+		const char* cpu_error;
+		
+		int         extra_clocks;
+		sample_t*   buf_begin;
+		sample_t const* buf_end;
+		sample_t*   extra_pos;
+		sample_t    extra_buf [extra_size];
+		
+		int         rom_enabled;
+		uint8_t     rom    [rom_size];
+		uint8_t     hi_ram [rom_size];
+		
+		unsigned char cycle_table [256];
+		
+		struct
+		{
+			// padding to neutralize address overflow
+			union {
+				uint8_t padding1 [0x100];
+				uint16_t align; // makes compiler align data for 16-bit access
+			} padding1 [1];
+			uint8_t ram      [0x10000];
+			uint8_t padding2 [0x100];
+		} ram;
+	};
+	state_t m;
+	
 	enum { rom_addr = 0xFFC0 };
-	bool rom_enabled;
-	void enable_rom( bool );
+	
+	enum { skipping_time = 127 };
+	
+	// Value that padding should be filled with
+	enum { cpu_pad_fill = 0xFF };
+	
+	enum {
+        r_test     = 0x0, r_control  = 0x1,
+        r_dspaddr  = 0x2, r_dspdata  = 0x3,
+        r_cpuio0   = 0x4, r_cpuio1   = 0x5,
+        r_cpuio2   = 0x6, r_cpuio3   = 0x7,
+        r_f8       = 0x8, r_f9       = 0x9,
+        r_t0target = 0xA, r_t1target = 0xB, r_t2target = 0xC,
+        r_t0out    = 0xD, r_t1out    = 0xE, r_t2out    = 0xF
+	};
+	
+	void timers_loaded();
+	void enable_rom( int enable );
+	void reset_buf();
+	void save_extra();
+	void load_regs( uint8_t const in [reg_count] );
+	void ram_loaded();
+	void regs_loaded();
+	void reset_time_regs();
+	void reset_common( int timer_counter_init );
 	
-	// CPU and RAM (at end because it's large)
-	Spc_Cpu cpu;
-	uint8_t extra_ram [rom_size];
-	struct {
-		// padding to catch jumps before beginning or past end
-		uint8_t padding1 [0x100];
+	Timer* run_timer_      ( Timer* t, rel_time_t );
+	Timer* run_timer       ( Timer* t, rel_time_t );
+	int dsp_read           ( rel_time_t );
+	void dsp_write         ( int data, rel_time_t );
+	void cpu_write_smp_reg_( int data, rel_time_t, int addr );
+	void cpu_write_smp_reg ( int data, rel_time_t, int addr );
+	void cpu_write_high    ( int data, int i, rel_time_t );
+	void cpu_write         ( int data, int addr, rel_time_t );
+	int cpu_read_smp_reg   ( int i, rel_time_t );
+	int cpu_read           ( int addr, rel_time_t );
+	unsigned CPU_mem_bit   ( uint8_t const* pc, rel_time_t );
+	
+	bool check_echo_access ( int addr );
+	uint8_t* run_until_( time_t end_time );
+	
+	struct spc_file_t
+	{
+		char    signature [signature_size];
+		uint8_t has_id666;
+		uint8_t version;
+		uint8_t pcl, pch;
+		uint8_t a;
+		uint8_t x;
+		uint8_t y;
+		uint8_t psw;
+		uint8_t sp;
+		char    text [212];
 		uint8_t ram [0x10000];
-		uint8_t padding2 [0x100];
-	} mem;
-	uint8_t boot_rom [rom_size];
+		uint8_t dsp [128];
+		uint8_t unused [0x40];
+		uint8_t ipl_rom [0x40];
+	};
+
+	static char const signature [signature_size + 1];
+	
+	void save_regs( uint8_t out [reg_count] );
 };
 
-inline void Snes_Spc::disable_surround( bool disable ) { dsp.disable_surround( disable ); }
+#include <assert.h>
+
+inline int Snes_Spc::sample_count() const { return (m.extra_clocks >> 5) * 2; }
+
+inline int Snes_Spc::read_port( time_t t, int port )
+{
+	assert( (unsigned) port < port_count );
+	return run_until_( t ) [port];
+}
+
+inline void Snes_Spc::write_port( time_t t, int port, int data )
+{
+	assert( (unsigned) port < port_count );
+	run_until_( t ) [0x10 + port] = data;
+}
+
+inline void Snes_Spc::set_gain( int gain ) { dsp.set_gain( gain ); }
 
 inline void Snes_Spc::mute_voices( int mask ) { dsp.mute_voices( mask ); }
+	
+inline void Snes_Spc::disable_surround( bool disable ) { dsp.disable_surround( disable ); }
 
-inline void Snes_Spc::set_gain( double v ) { dsp.set_gain( v ); }
+#if !SPC_NO_COPY_STATE_FUNCS
+inline bool Snes_Spc::check_kon() { return dsp.check_kon(); }
+#endif
 
 #endif
--- a/src/console/Spc_Cpu.cxx	Sat May 12 17:06:13 2007 -0700
+++ b/src/console/Spc_Cpu.cxx	Tue May 15 13:18:35 2007 -0700
@@ -1,11 +1,12 @@
-// Game_Music_Emu 0.5.2. http://www.slack.net/~ant/
+// Core SPC emulation: CPU, timers, SMP registers, memory
 
-#include "Spc_Cpu.h"
+// snes_spc 0.9.0. http://www.slack.net/~ant/
 
-#include "blargg_endian.h"
 #include "Snes_Spc.h"
 
-/* Copyright (C) 2004-2006 Shay Green. This module is free software; you
+#include <string.h>
+
+/* Copyright (C) 2004-2007 Shay Green. This module is free software; you
 can redistribute it and/or modify it under the terms of the GNU Lesser
 General Public License as published by the Free Software Foundation; either
 version 2.1 of the License, or (at your option) any later version. This
@@ -18,1045 +19,546 @@
 
 #include "blargg_source.h"
 
-// Several instructions are commented out (or not even implemented). These aren't
-// used by the SPC files tested.
+#define RAM         (m.ram.ram)
+#define REGS        (m.smp_regs [0])
+#define REGS_IN     (m.smp_regs [1])
+
+// (n ? n : 256)
+#define IF_0_THEN_256( n ) ((uint8_t) ((n) - 1) + 1)
+
+// Note: SPC_MORE_ACCURACY exists mainly so I can run my validation tests, which
+// do crazy echo buffer accesses.
+#ifndef SPC_MORE_ACCURACY
+	#define SPC_MORE_ACCURACY 0
+#endif
+
+#ifdef BLARGG_ENABLE_OPTIMIZER
+	#include BLARGG_ENABLE_OPTIMIZER
+#endif
+
+
+//// Timers
 
-// Optimize performance for the most common instructions, and size for the rest:
-//
-// 15%  0xF0    BEQ rel
-//  8%  0xE4    MOV A,dp
-//  4%  0xF5    MOV A,abs+X
-//  4%  0xD0    BNE rel
-//  4%  0x6F    RET
-//  4%  0x3F    CALL addr
-//  4%  0xF4    MOV A,dp+X
-//  3%  0xC4    MOV dp,A
-//  2%  0xEB    MOV Y,dp
-//  2%  0x3D    INC X
-//  2%  0xF6    MOV A,abs+Y
-// (1% and below not shown)
+#if SPC_DISABLE_TEMPO
+	#define TIMER_DIV( t, n ) ((n) >> t->prescaler)
+	#define TIMER_MUL( t, n ) ((n) << t->prescaler)
+#else
+	#define TIMER_DIV( t, n ) ((n) / t->prescaler)
+	#define TIMER_MUL( t, n ) ((n) * t->prescaler)
+#endif
 
-Spc_Cpu::Spc_Cpu( Snes_Spc* e, uint8_t* ram_in ) : ram( ram_in ), emu( *e )
+Snes_Spc::Timer* Snes_Spc::run_timer_( Timer* t, rel_time_t time )
 {
-	remain_ = 0;
-	assert( INT_MAX >= 0x7FFFFFFF ); // requires 32-bit int
-	blargg_verify_byte_order();
+	int elapsed = TIMER_DIV( t, time - t->next_time ) + 1;
+	t->next_time += TIMER_MUL( t, elapsed );
+	
+	if ( t->enabled )
+	{
+		int remain = IF_0_THEN_256( t->period - t->divider );
+		int divider = t->divider + elapsed;
+		int over = elapsed - remain;
+		if ( over >= 0 )
+		{
+			int n = over / t->period;
+			t->counter = (t->counter + 1 + n) & 0x0F;
+			divider = over - n * t->period;
+		}
+		t->divider = (uint8_t) divider;
+	}
+	return t;
 }
 
-#define READ( addr )            (emu.read( addr ))
-#define WRITE( addr, value )    (emu.write( addr, value ))
-
-#define READ_DP( addr )         READ( (addr) + dp )
-#define WRITE_DP( addr, value ) WRITE( (addr) + dp, value )
-
-#define READ_PROG( addr )       (ram [addr])
-#define READ_PROG16( addr )     GET_LE16( &READ_PROG( addr ) )
-
-int Spc_Cpu::read( spc_addr_t addr )
+inline Snes_Spc::Timer* Snes_Spc::run_timer( Timer* t, rel_time_t time )
 {
-	return READ( addr );
-}
-
-void Spc_Cpu::write( spc_addr_t addr, int data )
-{
-	WRITE( addr, data );
+	if ( time >= t->next_time )
+		t = run_timer_( t, time );
+	return t;
 }
 
-// Cycle table derived from text copy of SPC-700 manual (using regular expressions)
-static unsigned char const cycle_table [0x100] = {
-//  0 1 2 3 4 5 6 7 8 9 A B C D E F
-	2,8,4,5,3,4,3,6,2,6,5,4,5,4,6,8, // 0
-	2,8,4,5,4,5,5,6,5,5,6,5,2,2,4,6, // 1
-	2,8,4,5,3,4,3,6,2,6,5,4,5,4,5,4, // 2
-	2,8,4,5,4,5,5,6,5,5,6,5,2,2,3,8, // 3
-	2,8,4,5,3,4,3,6,2,6,4,4,5,4,6,6, // 4
-	2,8,4,5,4,5,5,6,5,5,4,5,2,2,4,3, // 5
-	2,8,4,5,3,4,3,6,2,6,4,4,5,4,5,5, // 6
-	2,8,4,5,4,5,5,6,5,5,5,5,2,2,3,6, // 7
-	2,8,4,5,3,4,3,6,2,6,5,4,5,2,4,5, // 8
-	2,8,4,5,4,5,5,6,5,5,5,5,2,2,12,5,// 9
-	3,8,4,5,3,4,3,6,2,6,4,4,5,2,4,4, // A
-	2,8,4,5,4,5,5,6,5,5,5,5,2,2,3,4, // B
-	3,8,4,5,4,5,4,7,2,5,6,4,5,2,4,9, // C
-	2,8,4,5,5,6,6,7,4,5,4,5,2,2,6,3, // D
-	2,8,4,5,3,4,3,6,2,4,5,3,4,3,4,3, // E
-	2,8,4,5,4,5,5,6,3,4,5,4,2,2,4,3  // F
-};
+
+//// ROM
 
-// The C,mem instructions are hardly used, so a non-inline function is used for
-// the common access code.
-unsigned Spc_Cpu::mem_bit( spc_addr_t pc )
+void Snes_Spc::enable_rom( int enable )
 {
-	unsigned addr = READ_PROG16( pc );
-	unsigned t = READ( addr & 0x1FFF ) >> (addr >> 13);
-	return (t << 8) & 0x100;
+	if ( m.rom_enabled != enable )
+	{
+		m.rom_enabled = enable;
+		if ( enable )
+			memcpy( m.hi_ram, &RAM [rom_addr], sizeof m.hi_ram );
+		memcpy( &RAM [rom_addr], (enable ? m.rom : m.hi_ram), rom_size );
+		// TODO: ROM can still get overwritten when DSP writes to echo buffer
+	}
 }
 
-spc_time_t Spc_Cpu::run( spc_time_t cycle_count )
-{
-	remain_ = cycle_count;
-	
-	uint8_t* const ram = this->ram; // cache
-	
-	// Stack pointer is kept one greater than usual SPC stack pointer to allow
-	// common pre-decrement and post-increment memory instructions that some
-	// processors have. Address wrap-around isn't supported.
-	#define PUSH( v )       (*--sp = uint8_t (v))
-	#define PUSH16( v )     (sp -= 2, SET_LE16( sp, v ))
-	#define POP()           (*sp++)
-	#define SET_SP( v )     (sp = ram + 0x101 + (v))
-	#define GET_SP()        (sp - 0x101 - ram)
 
-	uint8_t* sp;
-	SET_SP( r.sp );
-	
-	// registers
-	unsigned pc = (unsigned) r.pc;
-	int a = r.a;
-	int x = r.x;
-	int y = r.y;
-	
-	// status flags
-	
-	const int st_n = 0x80;
-	const int st_v = 0x40;
-	const int st_p = 0x20;
-	const int st_b = 0x10;
-	const int st_h = 0x08;
-	const int st_i = 0x04;
-	const int st_z = 0x02;
-	const int st_c = 0x01;
-	
-	#define IS_NEG (nz & 0x880)
-	
-	#define CALC_STATUS( out ) do {\
-		out = status & ~(st_n | st_z | st_c);\
-		out |= (c >> 8) & st_c;\
-		out |= (dp >> 3) & st_p;\
-		if ( IS_NEG ) out |= st_n;\
-		if ( !(nz & 0xFF) ) out |= st_z;\
-	} while ( 0 )       
+//// DSP
 
-	#define SET_STATUS( in ) do {\
-		status = in & ~(st_n | st_z | st_c | st_p);\
-		c = in << 8;\
-		nz = (in << 4) & 0x800;\
-		nz |= ~in & st_z;\
-		dp = (in << 3) & 0x100;\
-	} while ( 0 )
-	
-	int status;
-	int c;  // store C as 'c' & 0x100.
-	int nz; // Z set if (nz & 0xFF) == 0, N set if (nz & 0x880) != 0
-	unsigned dp; // direct page base
-	{
-		int temp = r.status;
-		SET_STATUS( temp );
-	}
-
-	goto loop;
-	
-	unsigned data; // first operand of instruction and temporary across function calls
+#if SPC_LESS_ACCURATE
+	int const max_reg_time = 29;
 	
-	// Common endings for instructions
-cbranch_taken_loop: // compare and branch
-	pc += (BOOST::int8_t) READ_PROG( pc );
-	remain_ -= 2;
-inc_pc_loop: // end of instruction with an operand
-	pc++;
-loop:
-	
-	check( (unsigned) pc < 0x10000 );
-	check( (unsigned) GET_SP() < 0x100 );
-	
-	check( (unsigned) a < 0x100 );
-	check( (unsigned) x < 0x100 );
-	check( (unsigned) y < 0x100 );
-	
-	unsigned opcode = READ_PROG( pc );
-	pc++;
-	// to do: if pc is at end of memory, this will get wrong byte
-	data = READ_PROG( pc );
-	
-	if ( remain_ <= 0 )
-		goto stop;
-	
-	remain_ -= cycle_table [opcode];
-	
-	// Use 'data' for temporaries whose lifetime crosses read/write calls, otherwise
-	// use a local temporary.
-	switch ( opcode )
+	signed char const Snes_Spc::reg_times_ [256] =
 	{
+		 -1,  0,-11,-10,-15,-11, -2, -2,  4,  3, 14, 14, 26, 26, 14, 22,
+		  2,  3,  0,  1,-12,  0,  1,  1,  7,  6, 14, 14, 27, 14, 14, 23,
+		  5,  6,  3,  4, -1,  3,  4,  4, 10,  9, 14, 14, 26, -5, 14, 23,
+		  8,  9,  6,  7,  2,  6,  7,  7, 13, 12, 14, 14, 27, -4, 14, 24,
+		 11, 12,  9, 10,  5,  9, 10, 10, 16, 15, 14, 14, -2, -4, 14, 24,
+		 14, 15, 12, 13,  8, 12, 13, 13, 19, 18, 14, 14, -2,-36, 14, 24,
+		 17, 18, 15, 16, 11, 15, 16, 16, 22, 21, 14, 14, 28, -3, 14, 25,
+		 20, 21, 18, 19, 14, 18, 19, 19, 25, 24, 14, 14, 14, 29, 14, 25,
+		 
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+		 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+	};
 	
-	#define BRANCH( cond ) {\
-		pc++;\
-		int offset = (BOOST::int8_t) data;\
-		if ( cond ) {\
-			pc += offset;\
-			remain_ -= 2;\
-		}\
-		goto loop;\
-	}
-	
-// Most-Common
-
-	case 0xF0: // BEQ (most common)
-		BRANCH( !(uint8_t) nz )
-	
-	case 0xD0: // BNE
-		BRANCH( (uint8_t) nz )
-	
-	case 0x3F: // CALL
-		PUSH16( pc + 2 );
-		pc = READ_PROG16( pc );
-		goto loop;
-	
-	case 0x6F: // RET
-		pc = POP();
-		pc += POP() * 0x100;
-		goto loop;
-
-#define CASE( n )   case n:
+	#define RUN_DSP( time, offset ) \
+		int count = (time) - (offset) - m.dsp_time;\
+		if ( count >= 0 )\
+		{\
+			int clock_count = (count & ~(clocks_per_sample - 1)) + clocks_per_sample;\
+			m.dsp_time += clock_count;\
+			dsp.run( clock_count );\
+		}
+#else
+	#define RUN_DSP( time, offset ) \
+		{\
+			int count = (time) - m.dsp_time;\
+			if ( !SPC_MORE_ACCURACY || count )\
+			{\
+				assert( count > 0 );\
+				m.dsp_time = (time);\
+				dsp.run( count );\
+			}\
+		}
+#endif
 
-// Define common address modes based on opcode for immediate mode. Execution
-// ends with data set to the address of the operand.
-#define ADDR_MODES( op )\
-	CASE( op - 0x02 ) /* (X) */\
-		data = x + dp;\
-		pc--;\
-		goto end_##op;\
-	CASE( op + 0x0F ) /* (dp)+Y */\
-		data = READ_PROG16( data + dp ) + y;\
-		goto end_##op;\
-	CASE( op - 0x01 ) /* (dp+X) */\
-		data = READ_PROG16( uint8_t (data + x) + dp );\
-		goto end_##op;\
-	CASE( op + 0x0E ) /* abs+Y */\
-		data += y;\
-		goto abs_##op;\
-	CASE( op + 0x0D ) /* abs+X */\
-		data += x;\
-	CASE( op - 0x03 ) /* abs */\
-	abs_##op:\
-		pc++;\
-		data += 0x100 * READ_PROG( pc );\
-		goto end_##op;\
-	CASE( op + 0x0C ) /* dp+X */\
-		data = uint8_t (data + x);\
-	CASE( op - 0x04 ) /* dp */\
-		data += dp;\
-	end_##op:
-
-// 1. 8-bit Data Transmission Commands. Group I
-
-	ADDR_MODES( 0xE8 ) // MOV A,addr
-	// case 0xE4: // MOV a,dp (most common)
-	mov_a_addr:
-		a = nz = READ( data );
-		goto inc_pc_loop;
-	case 0xBF: // MOV A,(X)+
-		data = x + dp;
-		x = uint8_t (x + 1);
-		pc--;
-		goto mov_a_addr;
+int Snes_Spc::dsp_read( rel_time_t time )
+{
+	RUN_DSP( time, reg_times [REGS [r_dspaddr] & 0x7F] );
+	
+	int result = dsp.read( REGS [r_dspaddr] & 0x7F );
 	
-	case 0xE8: // MOV A,imm
-		a = data;
-		nz = data;
-		goto inc_pc_loop;
+	#ifdef SPC_DSP_READ_HOOK
+		SPC_DSP_READ_HOOK( spc_time + time, (REGS [r_dspaddr] & 0x7F), result );
+	#endif
 	
-	case 0xF9: // MOV X,dp+Y
-		data = uint8_t (data + y);
-	case 0xF8: // MOV X,dp
-		data += dp;
-		goto mov_x_addr;
-	case 0xE9: // MOV X,abs
-		data = READ_PROG16( pc );
-		pc++;
-	mov_x_addr:
-		data = READ( data );
-	case 0xCD: // MOV X,imm
-		x = data;
-		nz = data;
-		goto inc_pc_loop;
-	
-	case 0xFB: // MOV Y,dp+X
-		data = uint8_t (data + x);
-	case 0xEB: // MOV Y,dp
-		data += dp;
-		goto mov_y_addr;
-	case 0xEC: // MOV Y,abs
-		data = READ_PROG16( pc );
-		pc++;
-	mov_y_addr:
-		data = READ( data );
-	case 0x8D: // MOV Y,imm
-		y = data;
-		nz = data;
-		goto inc_pc_loop;
+	return result;
+}
 
-// 2. 8-BIT DATA TRANSMISSION COMMANDS, GROUP 2
-
-	ADDR_MODES( 0xC8 ) // MOV addr,A
-		WRITE( data, a );
-		goto inc_pc_loop;
-	
-	{
-		int temp;
-	case 0xCC: // MOV abs,Y
-		temp = y;
-		goto mov_abs_temp;
-	case 0xC9: // MOV abs,X
-		temp = x;
-	mov_abs_temp:
-		WRITE( READ_PROG16( pc ), temp );
-		pc += 2;
-		goto loop;
-	}
-	
-	case 0xD9: // MOV dp+Y,X
-		data = uint8_t (data + y);
-	case 0xD8: // MOV dp,X
-		WRITE( data + dp, x );
-		goto inc_pc_loop;
-	
-	case 0xDB: // MOV dp+X,Y
-		data = uint8_t (data + x);
-	case 0xCB: // MOV dp,Y
-		WRITE( data + dp, y );
-		goto inc_pc_loop;
-
-	case 0xFA: // MOV dp,dp
-		data = READ( data + dp );
-	case 0x8F: // MOV dp,#imm
-		pc++;
-		WRITE_DP( READ_PROG( pc ), data );
-		goto inc_pc_loop;
-	
-// 3. 8-BIT DATA TRANSMISSIN COMMANDS, GROUP 3.
-	
-	case 0x7D: // MOV A,X
-		a = x;
-		nz = x;
-		goto loop;
-	
-	case 0xDD: // MOV A,Y
-		a = y;
-		nz = y;
-		goto loop;
+inline void Snes_Spc::dsp_write( int data, rel_time_t time )
+{
+	RUN_DSP( time, reg_times [REGS [r_dspaddr]] )
+	#if SPC_LESS_ACCURATE
+		else if ( m.dsp_time == skipping_time )
+		{
+			int r = REGS [r_dspaddr];
+			if ( r == Spc_Dsp::r_kon )
+				m.skipped_kon |= data & ~dsp.read( Spc_Dsp::r_koff );
+			
+			if ( r == Spc_Dsp::r_koff )
+			{
+				m.skipped_koff |= data;
+				m.skipped_kon &= ~data;
+			}
+		}
+	#endif
 	
-	case 0x5D: // MOV X,A
-		x = a;
-		nz = a;
-		goto loop;
-	
-	case 0xFD: // MOV Y,A
-		y = a;
-		nz = a;
-		goto loop;
-	
-	case 0x9D: // MOV X,SP
-		x = nz = GET_SP();
-		goto loop;
-	
-	case 0xBD: // MOV SP,X
-		SET_SP( x );
-		goto loop;
-	
-	//case 0xC6: // MOV (X),A (handled by MOV addr,A in group 2)
-	
-	case 0xAF: // MOV (X)+,A
-		WRITE_DP( x, a );
-		x++;
-		goto loop;
-	
-// 5. 8-BIT LOGIC OPERATION COMMANDS
-	
-#define LOGICAL_OP( op, func )\
-	ADDR_MODES( op ) /* addr */\
-		data = READ( data );\
-	case op: /* imm */\
-		nz = a func##= data;\
-		goto inc_pc_loop;\
-	{   unsigned addr;\
-	case op + 0x11: /* X,Y */\
-		data = READ_DP( y );\
-		addr = x + dp;\
-		pc--;\
-		goto addr_##op;\
-	case op + 0x01: /* dp,dp */\
-		data = READ_DP( data );\
-	case op + 0x10: /*dp,imm*/\
-		pc++;\
-		addr = READ_PROG( pc ) + dp;\
-	addr_##op:\
-		nz = data func READ( addr );\
-		WRITE( addr, nz );\
-		goto inc_pc_loop;\
-	}
-	
-	LOGICAL_OP( 0x28, & ); // AND
+	#ifdef SPC_DSP_WRITE_HOOK
+		SPC_DSP_WRITE_HOOK( m.spc_time + time, REGS [r_dspaddr], (uint8_t) data );
+	#endif
 	
-	LOGICAL_OP( 0x08, | ); // OR
-	
-	LOGICAL_OP( 0x48, ^ ); // EOR
-	
-// 4. 8-BIT ARITHMETIC OPERATION COMMANDS
+	if ( REGS [r_dspaddr] <= 0x7F )
+		dsp.write( REGS [r_dspaddr], data );
+	else if ( !SPC_MORE_ACCURACY )
+		dprintf( "SPC wrote to DSP register > $7F\n" );
+}
+
+
+//// Memory access extras
 
-	ADDR_MODES( 0x68 ) // CMP addr
-		data = READ( data );
-	case 0x68: // CMP imm
-		nz = a - data;
-		c = ~nz;
-		nz &= 0xFF;
-		goto inc_pc_loop;
-	
-	case 0x79: // CMP (X),(Y)
-		data = READ_DP( x );
-		nz = data - READ_DP( y );
-		c = ~nz;
-		nz &= 0xFF;
-		goto loop;
-	
-	case 0x69: // CMP (dp),(dp)
-		data = READ_DP( data );
-	case 0x78: // CMP dp,imm
-		pc++;
-		nz = READ_DP( READ_PROG( pc ) ) - data;
-		c = ~nz;
-		nz &= 0xFF;
-		goto inc_pc_loop;
-	
-	case 0x3E: // CMP X,dp
-		data += dp;
-		goto cmp_x_addr;
-	case 0x1E: // CMP X,abs
-		data = READ_PROG16( pc );
-		pc++;
-	cmp_x_addr:
-		data = READ( data );
-	case 0xC8: // CMP X,imm
-		nz = x - data;
-		c = ~nz;
-		nz &= 0xFF;
-		goto inc_pc_loop;
+#if SPC_MORE_ACCURACY
+	#define MEM_ACCESS( time, addr ) \
+	{\
+		if ( time >= m.dsp_time )\
+		{\
+			RUN_DSP( time, max_reg_time );\
+		}\
+	}
+#elif !defined (NDEBUG)
+	// Debug-only check for read/write within echo buffer, since this might result in
+	// inaccurate emulation due to the DSP not being caught up to the present.
 	
-	case 0x7E: // CMP Y,dp
-		data += dp;
-		goto cmp_y_addr;
-	case 0x5E: // CMP Y,abs
-		data = READ_PROG16( pc );
-		pc++;
-	cmp_y_addr:
-		data = READ( data );
-	case 0xAD: // CMP Y,imm
-		nz = y - data;
-		c = ~nz;
-		nz &= 0xFF;
-		goto inc_pc_loop;
-	
+	bool Snes_Spc::check_echo_access( int addr )
 	{
-		int addr;
-	case 0xB9: // SBC (x),(y)
-	case 0x99: // ADC (x),(y)
-		pc--; // compensate for inc later
-		data = READ_DP( x );
-		addr = y + dp;
-		goto adc_addr;
-	case 0xA9: // SBC dp,dp
-	case 0x89: // ADC dp,dp
-		data = READ_DP( data );
-	case 0xB8: // SBC dp,imm
-	case 0x98: // ADC dp,imm
-		pc++;
-		addr = READ_PROG( pc ) + dp;
-	adc_addr:
-		nz = READ( addr );
-		goto adc_data;
-		
-// catch ADC and SBC together, then decode later based on operand
-#undef CASE
-#define CASE( n ) case n: case (n) + 0x20:
-	ADDR_MODES( 0x88 ) // ADC/SBC addr
-		data = READ( data );
-	case 0xA8: // SBC imm
-	case 0x88: // ADC imm
-		addr = -1; // A
-		nz = a;
-	adc_data: {
-		if ( opcode & 0x20 )
-			data ^= 0xFF; // SBC
-		int carry = (c >> 8) & 1;
-		int ov = (nz ^ 0x80) + carry + (BOOST::int8_t) data; // sign-extend
-		int hc = (nz & 15) + carry;
-		c = nz += data + carry;
-		hc = (nz & 15) - hc;
-		status = (status & ~(st_v | st_h)) | ((ov >> 2) & st_v) | ((hc >> 1) & st_h);
-		if ( addr < 0 ) {
-			a = (uint8_t) nz;
-			goto inc_pc_loop;
+		if ( !(dsp.read( Spc_Dsp::r_flg ) & 0x20) )
+		{
+			int start = 0x100 * dsp.read( Spc_Dsp::r_esa );
+			int size  = 0x800 * (dsp.read( Spc_Dsp::r_edl ) & 0x0F);
+			int end   = start + (size ? size : 4);
+			if ( start <= addr && addr < end )
+			{
+				if ( !m.echo_accessed )
+				{
+					m.echo_accessed = 1;
+					return true;
+				}
+			}
 		}
-		WRITE( addr, (uint8_t) nz );
-		goto inc_pc_loop;
-	}
-	
+		return false;
 	}
 	
-// 6. ADDITION & SUBTRACTION COMMANDS
-
-#define INC_DEC_REG( reg, n )\
-		nz = reg + n;\
-		reg = (uint8_t) nz;\
-		goto loop;
+	#define MEM_ACCESS( time, addr ) check( !check_echo_access( (uint16_t) addr ) );
+#else
+	#define MEM_ACCESS( time, addr )
+#endif
 
-	case 0xBC: INC_DEC_REG( a, 1 )  // INC A
-	case 0x3D: INC_DEC_REG( x, 1 )  // INC X
-	case 0xFC: INC_DEC_REG( y, 1 )  // INC Y
-	
-	case 0x9C: INC_DEC_REG( a, -1 ) // DEC A
-	case 0x1D: INC_DEC_REG( x, -1 ) // DEC X
-	case 0xDC: INC_DEC_REG( y, -1 ) // DEC Y
+
+//// CPU write
 
-	case 0x9B: // DEC dp+X
-	case 0xBB: // INC dp+X
-		data = uint8_t (data + x);
-	case 0x8B: // DEC dp
-	case 0xAB: // INC dp
-		data += dp;
-		goto inc_abs;
-	case 0x8C: // DEC abs
-	case 0xAC: // INC abs
-		data = READ_PROG16( pc );
-		pc++;
-	inc_abs:
-		nz = ((opcode >> 4) & 2) - 1;
-		nz += READ( data );
-		WRITE( data, (uint8_t) nz );
-		goto inc_pc_loop;
-	
-// 7. SHIFT, ROTATION COMMANDS
-
-	case 0x5C: // LSR A
-		c = 0;
-	case 0x7C:{// ROR A
-		nz = ((c >> 1) & 0x80) | (a >> 1);
-		c = a << 8;
-		a = nz;
-		goto loop;
-	}
-	
-	case 0x1C: // ASL A
-		c = 0;
-	case 0x3C:{// ROL A
-		int temp = (c >> 8) & 1;
-		c = a << 1;
-		nz = c | temp;
-		a = (uint8_t) nz;
-		goto loop;
-	}
+#if SPC_MORE_ACCURACY
+static unsigned char const glitch_probs [3] [256] =
+{
+	0xC3,0x92,0x5B,0x1C,0xD1,0x92,0x5B,0x1C,0xDB,0x9C,0x72,0x18,0xCD,0x5C,0x38,0x0B,
+	0xE1,0x9C,0x74,0x17,0xCF,0x75,0x45,0x0C,0xCF,0x6E,0x4A,0x0D,0xA3,0x3A,0x1D,0x08,
+	0xDB,0xA0,0x82,0x19,0xD9,0x73,0x3C,0x0E,0xCB,0x76,0x52,0x0B,0xA5,0x46,0x1D,0x09,
+	0xDA,0x74,0x55,0x0F,0xA2,0x3F,0x21,0x05,0x9A,0x40,0x20,0x07,0x63,0x1E,0x10,0x01,
+	0xDF,0xA9,0x85,0x1D,0xD3,0x84,0x4B,0x0E,0xCF,0x6F,0x49,0x0F,0xB3,0x48,0x1E,0x05,
+	0xD8,0x77,0x52,0x12,0xB7,0x49,0x23,0x06,0xAA,0x45,0x28,0x07,0x7D,0x28,0x0F,0x07,
+	0xCC,0x7B,0x4A,0x0E,0xB2,0x4F,0x24,0x07,0xAD,0x43,0x2C,0x06,0x86,0x29,0x11,0x07,
+	0xAE,0x48,0x1F,0x0A,0x76,0x21,0x19,0x05,0x76,0x21,0x14,0x05,0x44,0x11,0x0B,0x01,
+	0xE7,0xAD,0x96,0x23,0xDC,0x86,0x59,0x0E,0xDC,0x7C,0x5F,0x15,0xBB,0x53,0x2E,0x09,
+	0xD6,0x7C,0x4A,0x16,0xBB,0x4A,0x25,0x08,0xB3,0x4F,0x28,0x0B,0x8E,0x23,0x15,0x08,
+	0xCF,0x7F,0x57,0x11,0xB5,0x4A,0x23,0x0A,0xAA,0x42,0x28,0x05,0x7D,0x22,0x12,0x03,
+	0xA6,0x49,0x28,0x09,0x82,0x2B,0x0D,0x04,0x7A,0x20,0x0F,0x04,0x3D,0x0F,0x09,0x03,
+	0xD1,0x7C,0x4C,0x0F,0xAF,0x4E,0x21,0x09,0xA8,0x46,0x2A,0x07,0x85,0x1F,0x0E,0x07,
+	0xA6,0x3F,0x26,0x07,0x7C,0x24,0x14,0x07,0x78,0x22,0x16,0x04,0x46,0x12,0x0A,0x02,
+	0xA6,0x41,0x2C,0x0A,0x7E,0x28,0x11,0x05,0x73,0x1B,0x14,0x05,0x3D,0x11,0x0A,0x02,
+	0x70,0x22,0x17,0x05,0x48,0x13,0x08,0x03,0x3C,0x07,0x0D,0x07,0x26,0x07,0x06,0x01,
 	
-	case 0x0B: // ASL dp
-		c = 0;
-		data += dp;
-		goto rol_mem;
-	case 0x1B: // ASL dp+X
-		c = 0;
-	case 0x3B: // ROL dp+X
-		data = uint8_t (data + x);
-	case 0x2B: // ROL dp
-		data += dp;
-		goto rol_mem;
-	case 0x0C: // ASL abs
-		c = 0;
-	case 0x2C: // ROL abs
-		data = READ_PROG16( pc );
-		pc++;
-	rol_mem:
-		nz = (c >> 8) & 1;
-		nz |= (c = READ( data ) << 1);
-		WRITE( data, (uint8_t) nz );
-		goto inc_pc_loop;
-	
-	case 0x4B: // LSR dp
-		c = 0;
-		data += dp;
-		goto ror_mem;
-	case 0x5B: // LSR dp+X
-		c = 0;
-	case 0x7B: // ROR dp+X
-		data = uint8_t (data + x);
-	case 0x6B: // ROR dp
-		data += dp;
-		goto ror_mem;
-	case 0x4C: // LSR abs
-		c = 0;
-	case 0x6C: // ROR abs
-		data = READ_PROG16( pc );
-		pc++;
-	ror_mem: {
-		int temp = READ( data );
-		nz = ((c >> 1) & 0x80) | (temp >> 1);
-		c = temp << 8;
-		WRITE( data, nz );
-		goto inc_pc_loop;
-	}
-
-	case 0x9F: // XCN
-		nz = a = (a >> 4) | uint8_t (a << 4);
-		goto loop;
-
-// 8. 16-BIT TRANSMISION COMMANDS
-
-	case 0xBA: // MOVW YA,dp
-		a = READ_DP( data );
-		nz = (a & 0x7F) | (a >> 1);
-		y = READ_DP( uint8_t (data + 1) );
-		nz |= y;
-		goto inc_pc_loop;
+	0xE0,0x9F,0xDA,0x7C,0x4F,0x18,0x28,0x0D,0xE9,0x9F,0xDA,0x7C,0x4F,0x18,0x1F,0x07,
+	0xE6,0x97,0xD8,0x72,0x64,0x13,0x26,0x09,0xDC,0x67,0xA9,0x38,0x21,0x07,0x15,0x06,
+	0xE9,0x91,0xD2,0x6B,0x63,0x14,0x2B,0x0E,0xD6,0x61,0xB7,0x41,0x2B,0x0E,0x10,0x09,
+	0xCF,0x59,0xB0,0x2F,0x35,0x08,0x0F,0x07,0xB6,0x30,0x7A,0x21,0x17,0x07,0x09,0x03,
+	0xE7,0xA3,0xE5,0x6B,0x65,0x1F,0x34,0x09,0xD8,0x6B,0xBE,0x45,0x27,0x07,0x10,0x07,
+	0xDA,0x54,0xB1,0x39,0x2E,0x0E,0x17,0x08,0xA9,0x3C,0x86,0x22,0x16,0x06,0x07,0x03,
+	0xD4,0x51,0xBC,0x3D,0x38,0x0A,0x13,0x06,0xB2,0x37,0x79,0x1C,0x17,0x05,0x0E,0x06,
+	0xA7,0x31,0x74,0x1C,0x11,0x06,0x0C,0x02,0x6D,0x1A,0x38,0x10,0x0B,0x05,0x06,0x03,
+	0xEB,0x9A,0xE1,0x7A,0x6F,0x13,0x34,0x0E,0xE6,0x75,0xC5,0x45,0x3E,0x0B,0x1A,0x05,
+	0xD8,0x63,0xC1,0x40,0x3C,0x1B,0x19,0x06,0xB3,0x42,0x83,0x29,0x18,0x0A,0x08,0x04,
+	0xD4,0x58,0xBA,0x43,0x3F,0x0A,0x1F,0x09,0xB1,0x33,0x8A,0x1F,0x1F,0x06,0x0D,0x05,
+	0xAF,0x3C,0x7A,0x1F,0x16,0x08,0x0A,0x01,0x72,0x1B,0x52,0x0D,0x0B,0x09,0x06,0x01,
+	0xCF,0x63,0xB7,0x47,0x40,0x10,0x14,0x06,0xC0,0x41,0x96,0x20,0x1C,0x09,0x10,0x05,
+	0xA6,0x35,0x82,0x1A,0x20,0x0C,0x0E,0x04,0x80,0x1F,0x53,0x0F,0x0B,0x02,0x06,0x01,
+	0xA6,0x31,0x81,0x1B,0x1D,0x01,0x08,0x08,0x7B,0x20,0x4D,0x19,0x0E,0x05,0x07,0x03,
+	0x6B,0x17,0x49,0x07,0x0E,0x03,0x0A,0x05,0x37,0x0B,0x1F,0x06,0x04,0x02,0x07,0x01,
 	
-	case 0xDA: // MOVW dp,YA
-		WRITE_DP( data, a );
-		WRITE_DP( uint8_t (data + 1), y );
-		goto inc_pc_loop;
-	
-// 9. 16-BIT OPERATION COMMANDS
+	0xF0,0xD6,0xED,0xAD,0xEC,0xB1,0xEB,0x79,0xAC,0x22,0x47,0x1E,0x6E,0x1B,0x32,0x0A,
+	0xF0,0xD6,0xEA,0xA4,0xED,0xC4,0xDE,0x82,0x98,0x1F,0x50,0x13,0x52,0x15,0x2A,0x0A,
+	0xF1,0xD1,0xEB,0xA2,0xEB,0xB7,0xD8,0x69,0xA2,0x1F,0x5B,0x18,0x55,0x18,0x2C,0x0A,
+	0xED,0xB5,0xDE,0x7E,0xE6,0x85,0xD3,0x59,0x59,0x0F,0x2C,0x09,0x24,0x07,0x15,0x09,
+	0xF1,0xD6,0xEA,0xA0,0xEC,0xBB,0xDA,0x77,0xA9,0x23,0x58,0x14,0x5D,0x12,0x2F,0x09,
+	0xF1,0xC1,0xE3,0x86,0xE4,0x87,0xD2,0x4E,0x68,0x15,0x26,0x0B,0x27,0x09,0x15,0x02,
+	0xEE,0xA6,0xE0,0x5C,0xE0,0x77,0xC3,0x41,0x67,0x1B,0x3C,0x07,0x2A,0x06,0x19,0x07,
+	0xE4,0x75,0xC6,0x43,0xCC,0x50,0x95,0x23,0x35,0x09,0x14,0x04,0x15,0x05,0x0B,0x04,
+	0xEE,0xD6,0xED,0xAD,0xEC,0xB1,0xEB,0x79,0xAC,0x22,0x56,0x14,0x5A,0x12,0x26,0x0A,
+	0xEE,0xBB,0xE7,0x7E,0xE9,0x8D,0xCB,0x49,0x67,0x11,0x34,0x07,0x2B,0x0B,0x14,0x07,
+	0xED,0xA7,0xE5,0x76,0xE3,0x7E,0xC4,0x4B,0x77,0x14,0x34,0x08,0x27,0x07,0x14,0x04,
+	0xE7,0x8B,0xD2,0x4C,0xCA,0x56,0x9E,0x31,0x36,0x0C,0x11,0x07,0x14,0x04,0x0A,0x02,
+	0xF0,0x9B,0xEA,0x6F,0xE5,0x81,0xC4,0x43,0x74,0x10,0x30,0x0B,0x2D,0x08,0x1B,0x06,
+	0xE6,0x83,0xCA,0x48,0xD9,0x56,0xA7,0x23,0x3B,0x09,0x12,0x09,0x15,0x07,0x0A,0x03,
+	0xE5,0x5F,0xCB,0x3C,0xCF,0x48,0x91,0x22,0x31,0x0A,0x17,0x08,0x15,0x04,0x0D,0x02,
+	0xD1,0x43,0x91,0x20,0xA9,0x2D,0x54,0x12,0x17,0x07,0x09,0x02,0x0C,0x04,0x05,0x03,
+};
+#endif
+
+// divided into multiple functions to keep rarely-used functionality separate
+// so often-used functionality can be optimized better by compiler
+
+// If write isn't preceded by read, data has this added to it
+int const no_read_before_write = 0x2000;
 
-	case 0x3A: // INCW dp
-	case 0x1A:{// DECW dp
-		data += dp;
-		
-		// low byte
-		int temp = READ( data );
-		temp += ((opcode >> 4) & 2) - 1; // +1 for INCW, -1 for DECW
-		nz = ((temp >> 1) | temp) & 0x7F;
-		WRITE( data, (uint8_t) temp );
-		
-		// high byte
-		data = uint8_t (data + 1) + dp;
-		temp >>= 8;
-		temp = uint8_t (temp + READ( data ));
-		nz |= temp;
-		WRITE( data, temp );
-		
-		goto inc_pc_loop;
-	}
-		
-	case 0x9A: // SUBW YA,dp
-	case 0x7A: // ADDW YA,dp
+void Snes_Spc::cpu_write_smp_reg_( int data, rel_time_t time, int addr )
+{
+	switch ( addr )
 	{
-		// read 16-bit addend
-		int temp = READ_DP( data );
-		int sign = READ_DP( uint8_t (data + 1) );
-		temp += 0x100 * sign;
-		status &= ~(st_v | st_h);
-		
-		// to do: fix half-carry for SUBW (it's probably wrong)
-		
-		// for SUBW, negate and truncate to 16 bits
-		if ( opcode & 0x80 ) {
-			temp = (temp ^ 0xFFFF) + 1;
-			sign = temp >> 8;
+	case r_t0target:
+	case r_t1target:
+	case r_t2target: {
+		Timer* t = &m.timers [addr - r_t0target];
+		int period = IF_0_THEN_256( data );
+		if ( t->period != period )
+		{
+			t = run_timer( t, time );
+			#if SPC_MORE_ACCURACY
+				// Insane behavior when target is written just after counter is
+				// clocked and counter matches new period and new period isn't 1, 2, 4, or 8
+				if ( t->divider == (period & 0xFF) &&
+						t->next_time == time + TIMER_MUL( t, 1 ) &&
+						((period - 1) | ~0x0F) & period )
+				{
+					//dprintf( "SPC pathological timer target write\n" );
+					
+					// If the period is 3, 5, or 9, there's a probability this behavior won't occur,
+					// based on the previous period
+					int prob = 0xFF;
+					int old_period = t->period & 0xFF;
+					if ( period == 3 ) prob = glitch_probs [0] [old_period];
+					if ( period == 5 ) prob = glitch_probs [1] [old_period];
+					if ( period == 9 ) prob = glitch_probs [2] [old_period];
+					
+					// The glitch suppresses incrementing of one of the counter bits, based on
+					// the lowest set bit in the new period
+					int b = 1;
+					while ( !(period & b) )
+						b <<= 1;
+					
+					if ( (rand() >> 4 & 0xFF) <= prob )
+						t->divider = (t->divider - b) & 0xFF;
+				}
+			#endif
+			t->period = period;
 		}
-		
-		// add low byte (A)
-		temp += a;
-		a = (uint8_t) temp;
-		nz = (temp | (temp >> 1)) & 0x7F;
-		
-		// add high byte (Y)
-		temp >>= 8;
-		c = y + temp;
-		nz = (nz | c) & 0xFF;
-		
-		// half-carry (temporary avoids CodeWarrior optimizer bug)
-		unsigned hc = (c & 15) - (y & 15);
-		status |= (hc >> 4) & st_h;
-		
-		// overflow if sign of YA changed when previous sign and addend sign were same
-		status |= (((c ^ y) & ~(y ^ sign)) >> 1) & st_v;
-		
-		y = (uint8_t) c;
-		
-		goto inc_pc_loop;
-	}
-	
-	case 0x5A: { // CMPW YA,dp
-		int temp = a - READ_DP( data );
-		nz = ((temp >> 1) | temp) & 0x7F;
-		temp = y + (temp >> 8);
-		temp -= READ_DP( uint8_t (data + 1) );
-		nz |= temp;
-		c = ~temp;
-		nz &= 0xFF;
-		goto inc_pc_loop;
-	}
-	
-// 10. MULTIPLICATION & DIVISON COMMANDS
-
-	case 0xCF: { // MUL YA
-		unsigned temp = y * a;
-		a = (uint8_t) temp;
-		nz = ((temp >> 1) | temp) & 0x7F;
-		y = temp >> 8;
-		nz |= y;
-		goto loop;
-	}
-	
-	case 0x9E: // DIV YA,X
-	{
-		// behavior based on SPC CPU tests
-		
-		status &= ~(st_h | st_v);
-		
-		if ( (y & 15) >= (x & 15) )
-			status |= st_h;
-		
-		if ( y >= x )
-			status |= st_v;
-		
-		unsigned ya = y * 0x100 + a;
-		if ( y < x * 2 )
-		{
-			a = ya / x;
-			y = ya - a * x;
-		}
-		else
-		{
-			a = 255 - (ya - x * 0x200) / (256 - x);
-			y = x   + (ya - x * 0x200) % (256 - x);
-		}
-		
-		nz = (uint8_t) a;
-		a = (uint8_t) a;
-		
-		goto loop;
+		break;
 	}
 	
-// 11. DECIMAL COMPENSATION COMMANDS
-	
-	// seem unused
-	// case 0xDF: // DAA
-	// case 0xBE: // DAS
-	
-// 12. BRANCHING COMMANDS
-
-	case 0x2F: // BRA rel
-		pc += (BOOST::int8_t) data;
-		goto inc_pc_loop;
-	
-	case 0x30: // BMI
-		BRANCH( IS_NEG )
-	
-	case 0x10: // BPL
-		BRANCH( !IS_NEG )
-	
-	case 0xB0: // BCS
-		BRANCH( c & 0x100 )
-	
-	case 0x90: // BCC
-		BRANCH( !(c & 0x100) )
-	
-	case 0x70: // BVS
-		BRANCH( status & st_v )
-	
-	case 0x50: // BVC
-		BRANCH( !(status & st_v) )
+	case r_t0out:
+	case r_t1out:
+	case r_t2out:
+		if ( !SPC_MORE_ACCURACY )
+			dprintf( "SPC wrote to counter %d\n", (int) addr - r_t0out );
+		
+		if ( data < no_read_before_write  / 2 )
+			run_timer( &m.timers [addr - r_t0out], time - 1 )->counter = 0;
+		break;
 	
-	case 0x03: // BBS dp.bit,rel
-	case 0x23:
-	case 0x43:
-	case 0x63:
-	case 0x83:
-	case 0xA3:
-	case 0xC3:
-	case 0xE3:
-		pc++;
-		if ( (READ_DP( data ) >> (opcode >> 5)) & 1 )
-			goto cbranch_taken_loop;
-		goto inc_pc_loop;
+	// Registers that act like RAM
+	case 0x8:
+	case 0x9:
+		REGS_IN [addr] = (uint8_t) data;
+		break;
 	
-	case 0x13: // BBC dp.bit,rel
-	case 0x33:
-	case 0x53:
-	case 0x73:
-	case 0x93:
-	case 0xB3:
-	case 0xD3:
-	case 0xF3:
-		pc++;
-		if ( !((READ_DP( data ) >> (opcode >> 5)) & 1) )
-			goto cbranch_taken_loop;
-		goto inc_pc_loop;
-	
-	case 0xDE: // CBNE dp+X,rel
-		data = uint8_t (data + x);
-		// fall through
-	case 0x2E: // CBNE dp,rel
-		pc++;
-		if ( READ_DP( data ) != a )
-			goto cbranch_taken_loop;
-		goto inc_pc_loop;
-	
-	case 0xFE: // DBNZ Y,rel
-		y = uint8_t (y - 1);
-		BRANCH( y )
+	case r_test:
+		if ( (uint8_t) data != 0x0A )
+			dprintf( "SPC wrote to test register\n" );
+		break;
 	
-	case 0x6E: { // DBNZ dp,rel
-		pc++;
-		unsigned temp = READ_DP( data ) - 1;
-		WRITE_DP( (uint8_t) data, (uint8_t) temp );
-		if ( temp )
-			goto cbranch_taken_loop;
-		goto inc_pc_loop;
+	case r_control:
+		// port clears
+		if ( data & 0x10 )
+		{
+			REGS_IN [r_cpuio0] = 0;
+			REGS_IN [r_cpuio1] = 0;
+		}
+		if ( data & 0x20 )
+		{
+			REGS_IN [r_cpuio2] = 0;
+			REGS_IN [r_cpuio3] = 0;
+		}
+		
+		// timers
+		{
+			for ( int i = 0; i < timer_count; i++ )
+			{
+				Timer* t = &m.timers [i];
+				int enabled = data >> i & 1;
+				if ( t->enabled != enabled )
+				{
+					t = run_timer( t, time );
+					t->enabled = enabled;
+					if ( enabled )
+					{
+						t->divider = 0;
+						t->counter = 0;
+					}
+				}
+			}
+		}
+		enable_rom( data & 0x80 );
+		break;
 	}
-	
-	case 0x1F: // JMP (abs+X)
-		pc = READ_PROG16( pc ) + x;
-		// fall through
-	case 0x5F: // JMP abs
-		pc = READ_PROG16( pc );
-		goto loop;
-	
-// 13. SUB-ROUTINE CALL RETURN COMMANDS
-	
-	case 0x0F:{// BRK
-		check( false ); // untested
-		PUSH16( pc + 1 );
-		pc = READ_PROG16( 0xFFDE ); // vector address verified
-		int temp;
-		CALC_STATUS( temp );
-		PUSH( temp );
-		status = (status | st_b) & ~st_i;
-		goto loop;
+}
+
+void Snes_Spc::cpu_write_smp_reg( int data, rel_time_t time, int addr )
+{
+	if ( addr == r_dspdata ) // 99%
+		dsp_write( data, time );
+	else
+		cpu_write_smp_reg_( data, time, addr );
+}
+
+void Snes_Spc::cpu_write_high( int data, int i, rel_time_t time )
+{
+	if ( i < rom_size )
+	{
+		m.hi_ram [i] = (uint8_t) data;
+		if ( m.rom_enabled )
+			RAM [i + rom_addr] = m.rom [i]; // restore overwritten ROM
 	}
-	
-	case 0x4F: // PCALL offset
-		pc++;
-		PUSH16( pc );
-		pc = 0xFF00 + data;
-		goto loop;
+	else
+	{
+		assert( RAM [i + rom_addr] == (uint8_t) data );
+		RAM [i + rom_addr] = cpu_pad_fill; // restore overwritten padding
+		cpu_write( data, i + rom_addr - 0x10000, time );
+	}
+}
+
+int const bits_in_int = CHAR_BIT * sizeof (int);
+
+void Snes_Spc::cpu_write( int data, int addr, rel_time_t time )
+{
+	MEM_ACCESS( time, addr )
 	
-	case 0x01: // TCALL n
-	case 0x11:
-	case 0x21:
-	case 0x31:
-	case 0x41:
-	case 0x51:
-	case 0x61:
-	case 0x71:
-	case 0x81:
-	case 0x91:
-	case 0xA1:
-	case 0xB1:
-	case 0xC1:
-	case 0xD1:
-	case 0xE1:
-	case 0xF1:
-		PUSH16( pc );
-		pc = READ_PROG16( 0xFFDE - (opcode >> 3) );
-		goto loop;
+	// RAM
+	RAM [addr] = (uint8_t) data;
+	int reg = addr - 0xF0;
+	if ( reg >= 0 ) // 64%
+	{
+		// $F0-$FF
+		if ( reg < reg_count ) // 87%
+		{
+			REGS [reg] = (uint8_t) data;
+			
+			// Ports
+			#ifdef SPC_PORT_WRITE_HOOK
+				if ( (unsigned) (reg - r_cpuio0) < port_count )
+					SPC_PORT_WRITE_HOOK( m.spc_time + time, (reg - r_cpuio0),
+							(uint8_t) data, &REGS [r_cpuio0] );
+			#endif
+			
+			// Registers other than $F2 and $F4-$F7
+			//if ( reg != 2 && reg != 4 && reg != 5 && reg != 6 && reg != 7 )
+			// TODO: this is a bit on the fragile side
+			if ( ((~0x2F00 << (bits_in_int - 16)) << reg) < 0 ) // 36%
+				cpu_write_smp_reg( data, time, reg );
+		}
+		// High mem/address wrap-around
+		else
+		{
+			reg -= rom_addr - 0xF0;
+			if ( reg >= 0 ) // 1% in IPL ROM area or address wrapped around
+				cpu_write_high( data, reg, time );
+		}
+	}
+}
+
+
+//// CPU read
+
+inline int Snes_Spc::cpu_read_smp_reg( int reg, rel_time_t time )
+{
+	int result = REGS_IN [reg];
+	reg -= r_dspaddr;
+	// DSP addr and data
+	if ( (unsigned) reg <= 1 ) // 4% 0xF2 and 0xF3
+	{
+		result = REGS [r_dspaddr];
+		if ( (unsigned) reg == 1 )
+			result = dsp_read( time ); // 0xF3
+	}
+	return result;
+}
+
+int Snes_Spc::cpu_read( int addr, rel_time_t time )
+{
+	MEM_ACCESS( time, addr )
 	
-// 14. STACK OPERATION COMMANDS
-
+	// RAM
+	int result = RAM [addr];
+	int reg = addr - 0xF0;
+	if ( reg >= 0 ) // 40%
 	{
-		int temp;
-	case 0x7F: // RET1
-		temp = POP();
-		pc = POP();
-		pc |= POP() << 8;
-		goto set_status;
-	case 0x8E: // POP PSW
-		temp = POP();
-	set_status:
-		SET_STATUS( temp );
-		goto loop;
+		reg -= 0x10;
+		if ( (unsigned) reg >= 0xFF00 ) // 21%
+		{
+			reg += 0x10 - r_t0out;
+			
+			// Timers
+			if ( (unsigned) reg < timer_count ) // 90%
+			{
+				Timer* t = &m.timers [reg];
+				if ( time >= t->next_time )
+					t = run_timer_( t, time );
+				result = t->counter;
+				t->counter = 0;
+			}
+			// Other registers
+			else if ( reg < 0 ) // 10%
+			{
+				result = cpu_read_smp_reg( reg + r_t0out, time );
+			}
+			else // 1%
+			{
+				assert( reg + (r_t0out + 0xF0 - 0x10000) < 0x100 );
+				result = cpu_read( reg + (r_t0out + 0xF0 - 0x10000), time );
+			}
+		}
 	}
 	
-	case 0x0D: { // PUSH PSW
-		int temp;
-		CALC_STATUS( temp );
-		PUSH( temp );
-		goto loop;
-	}
+	return result;
+}
+
+
+//// Run
 
-	case 0x2D: // PUSH A
-		PUSH( a );
-		goto loop;
-	
-	case 0x4D: // PUSH X
-		PUSH( x );
-		goto loop;
-	
-	case 0x6D: // PUSH Y
-		PUSH( y );
-		goto loop;
-	
-	case 0xAE: // POP A
-		a = POP();
-		goto loop;
-	
-	case 0xCE: // POP X
-		x = POP();
-		goto loop;
-	
-	case 0xEE: // POP Y
-		y = POP();
-		goto loop;
-	
-// 15. BIT OPERATION COMMANDS
+// Prefix and suffix for CPU emulator function
+#define SPC_CPU_RUN_FUNC \
+BOOST::uint8_t* Snes_Spc::run_until_( time_t end_time )\
+{\
+	rel_time_t rel_time = m.spc_time - end_time;\
+	assert( rel_time <= 0 );\
+	m.spc_time = end_time;\
+	m.dsp_time += rel_time;\
+	m.timers [0].next_time += rel_time;\
+	m.timers [1].next_time += rel_time;\
+	m.timers [2].next_time += rel_time;
 
-	case 0x02: // SET1
-	case 0x22:
-	case 0x42:
-	case 0x62:
-	case 0x82:
-	case 0xA2:
-	case 0xC2:
-	case 0xE2:
-	case 0x12: // CLR1
-	case 0x32:
-	case 0x52:
-	case 0x72:
-	case 0x92:
-	case 0xB2:
-	case 0xD2:
-	case 0xF2: {
-		data += dp;
-		int bit = 1 << (opcode >> 5);
-		int mask = ~bit;
-		if ( opcode & 0x10 )
-			bit = 0;
-		WRITE( data, (READ( data ) & mask) | bit );
-		goto inc_pc_loop;
-	}
-		
-	case 0x0E: // TSET1 abs
-	case 0x4E:{// TCLR1 abs
-		data = READ_PROG16( pc );
-		pc += 2;
-		unsigned temp = READ( data );
-		nz = temp & a;
-		temp &= ~a;
-		if ( !(opcode & 0x40) )
-			temp |= a;
-		WRITE( data, temp );
-		goto loop;
+#define SPC_CPU_RUN_FUNC_END \
+	m.spc_time += rel_time;\
+	m.dsp_time -= rel_time;\
+	m.timers [0].next_time -= rel_time;\
+	m.timers [1].next_time -= rel_time;\
+	m.timers [2].next_time -= rel_time;\
+	assert( m.spc_time <= end_time );\
+	return &REGS [r_cpuio0];\
+}
+
+int const cpu_lag_max = 12 - 1; // DIV YA,X takes 12 clocks
+
+void Snes_Spc::end_frame( time_t end_time )
+{
+	// Catch CPU up to as close to end as possible. If final instruction
+	// would exceed end, does NOT execute it and leaves m.spc_time < end.
+	if ( end_time > m.spc_time )
+		run_until_( end_time );
+	
+	m.spc_time     -= end_time;
+	m.extra_clocks += end_time;
+	
+	// Greatest number of clocks early that emulation can stop early due to
+	// not being able to execute current instruction without going over
+	// allowed time.
+	assert( -cpu_lag_max <= m.spc_time && m.spc_time <= 0 );
+	
+	// Catch timers up to CPU
+	for ( int i = 0; i < timer_count; i++ )
+		run_timer( &m.timers [i], 0 );
+	
+	// Catch DSP up to CPU
+	if ( m.dsp_time < 0 )
+	{
+		RUN_DSP( 0, max_reg_time );
 	}
 	
-	case 0x4A: // AND1 C,mem.bit
-		c &= mem_bit( pc );
-		pc += 2;
-		goto loop;
-	
-	case 0x6A: // AND1 C,/mem.bit
-		check( false ); // untested
-		c &= ~mem_bit( pc );
-		pc += 2;
-		goto loop;
-	
-	case 0x0A: // OR1 C,mem.bit
-		check( false ); // untested
-		c |= mem_bit( pc );
-		pc += 2;
-		goto loop;
-	
-	case 0x2A: // OR1 C,/mem.bit
-		check( false ); // untested
-		c |= ~mem_bit( pc );
-		pc += 2;
-		goto loop;
-	
-	case 0x8A: // EOR1 C,mem.bit
-		c ^= mem_bit( pc );
-		pc += 2;
-		goto loop;
-	
-	case 0xEA: { // NOT1 mem.bit
-		data = READ_PROG16( pc );
-		pc += 2;
-		unsigned temp = READ( data & 0x1FFF );
-		temp ^= 1 << (data >> 13);
-		WRITE( data & 0x1FFF, temp );
-		goto loop;
-	}
-	
-	case 0xCA: { // MOV1 mem.bit,C
-		data = READ_PROG16( pc );
-		pc += 2;
-		unsigned temp = READ( data & 0x1FFF );
-		unsigned bit = data >> 13;
-		temp = (temp & ~(1 << bit)) | (((c >> 8) & 1) << bit);
-		WRITE( data & 0x1FFF, temp );
-		goto loop;
-	}
-	
-	case 0xAA: // MOV1 C,mem.bit
-		c = mem_bit( pc );
-		pc += 2;
-		goto loop;
-	
-// 16. PROGRAM STATUS FLAG OPERATION COMMANDS
+	// Save any extra samples beyond what should be generated
+	if ( m.buf_begin )
+		save_extra();
+}
 
-	case 0x60: // CLRC
-		c = 0;
-		goto loop;
-		
-	case 0x80: // SETC
-		c = ~0;
-		goto loop;
-	
-	case 0xED: // NOTC
-		c ^= 0x100;
-		goto loop;
-		
-	case 0xE0: // CLRV
-		status &= ~(st_v | st_h);
-		goto loop;
-	
-	case 0x20: // CLRP
-		dp = 0;
-		goto loop;
-	
-	case 0x40: // SETP
-		dp = 0x100;
-		goto loop;
-	
-	case 0xA0: // EI
-		check( false ); // untested
-		status |= st_i;
-		goto loop;
-	
-	case 0xC0: // DI
-		check( false ); // untested
-		status &= ~st_i;
-		goto loop;
-	
-// 17. OTHER COMMANDS
-
-	case 0x00: // NOP
-		goto loop;
-	
-	//case 0xEF: // SLEEP
-	//case 0xFF: // STOP
-	
-	} // switch
-	
-	// unhandled instructions fall out of switch so emulator can catch them
-	
-stop:
-	pc--;
-	
-	{
-		int temp;
-		CALC_STATUS( temp );
-		r.status = (uint8_t) temp;
-	}
-	
-	r.pc = pc;
-	r.sp = (uint8_t) GET_SP();
-	r.a  = (uint8_t) a;
-	r.x  = (uint8_t) x;
-	r.y  = (uint8_t) y;
-	
-	return remain_;
-}
+// Inclusion here allows static memory access functions and better optimization
+#include "Spc_Cpu.h"
--- a/src/console/Spc_Cpu.h	Sat May 12 17:06:13 2007 -0700
+++ b/src/console/Spc_Cpu.h	Tue May 15 13:18:35 2007 -0700
@@ -1,57 +1,1220 @@
-// Super Nintendo (SNES) SPC-700 CPU emulator
+// snes_spc 0.9.0. http://www.slack.net/~ant/
+
+/* Copyright (C) 2004-2007 Shay Green. This module is free software; you
+can redistribute it and/or modify it under the terms of the GNU Lesser
+General Public License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version. This
+module is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+details. You should have received a copy of the GNU Lesser General Public
+License along with this module; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
 
-// Game_Music_Emu 0.5.2
-#ifndef SPC_CPU_H
-#define SPC_CPU_H
+//// Memory access
+
+#if SPC_MORE_ACCURACY
+	#define SUSPICIOUS_OPCODE( name ) ((void) 0)
+#else
+	#define SUSPICIOUS_OPCODE( name ) dprintf( "SPC: suspicious opcode: " name "\n" )
+#endif
+
+#define CPU_READ( time, offset, addr )\
+	cpu_read( addr, time + offset )
+
+#define CPU_WRITE( time, offset, addr, data )\
+	cpu_write( data, addr, time + offset )
 
-#include "blargg_common.h"
+#if SPC_MORE_ACCURACY
+	#define CPU_READ_TIMER( time, offset, addr, out )\
+		{ out = CPU_READ( time, offset, addr ); }
 
-typedef unsigned spc_addr_t;
-typedef blargg_long spc_time_t;
+#else
+	// timers are by far the most common thing read from dp
+	#define CPU_READ_TIMER( time, offset, addr_, out )\
+	{\
+		rel_time_t adj_time = time + offset;\
+		int dp_addr = addr_;\
+		int ti = dp_addr - (r_t0out + 0xF0);\
+		if ( (unsigned) ti < timer_count )\
+		{\
+			Timer* t = &m.timers [ti];\
+			if ( adj_time >= t->next_time )\
+				t = run_timer_( t, adj_time );\
+			out = t->counter;\
+			t->counter = 0;\
+		}\
+		else\
+		{\
+			out = ram [dp_addr];\
+			int i = dp_addr - 0xF0;\
+			if ( (unsigned) i < 0x10 )\
+				out = cpu_read_smp_reg( i, adj_time );\
+		}\
+	}
+#endif
 
-class Snes_Spc;
+#define TIME_ADJ( n )   (n)
 
-class Spc_Cpu {
-	typedef BOOST::uint8_t uint8_t;
-	uint8_t* const ram;
-public:
-	// Keeps pointer to 64K RAM
-	Spc_Cpu( Snes_Spc* spc, uint8_t* ram );
-	
-	// SPC-700 registers. *Not* kept updated during a call to run().
-	struct registers_t {
-		long pc; // more than 16 bits to allow overflow detection
-		uint8_t a;
-		uint8_t x;
-		uint8_t y;
-		uint8_t status;
-		uint8_t sp;
-	} r;
-	
-	// Run CPU for at least 'count' cycles. Return the number of cycles remaining
-	// when emulation stopped (negative if extra cycles were emulated). Emulation
-	// stops when there are no more remaining cycles or an unhandled instruction
-	// is encountered (STOP, SLEEP, and any others not yet implemented). In the
-	// latter case, the return value is greater than zero.
-	spc_time_t run( spc_time_t count );
-	
-	// Number of clock cycles remaining for current run() call
-	spc_time_t remain() const;
-	
-	// Access memory as the emulated CPU does
-	int  read ( spc_addr_t );
-	void write( spc_addr_t, int );
-	
-private:
-	// noncopyable
-	Spc_Cpu( const Spc_Cpu& );
-	Spc_Cpu& operator = ( const Spc_Cpu& );
-	unsigned mem_bit( spc_addr_t );
-	
-	spc_time_t remain_;
-	Snes_Spc& emu;
-};
+#define READ_TIMER( time, addr, out )       CPU_READ_TIMER( rel_time, TIME_ADJ(time), (addr), out )
+#define READ(  time, addr )                 CPU_READ ( rel_time, TIME_ADJ(time), (addr) )
+#define WRITE( time, addr, data )           CPU_WRITE( rel_time, TIME_ADJ(time), (addr), (data) )
+
+#define DP_ADDR( addr )                     (dp + (addr))
+
+#define READ_DP_TIMER(  time, addr, out )   CPU_READ_TIMER( rel_time, TIME_ADJ(time), DP_ADDR( addr ), out )
+#define READ_DP(  time, addr )              READ ( time, DP_ADDR( addr ) )
+#define WRITE_DP( time, addr, data )        WRITE( time, DP_ADDR( addr ), data )
+
+#define READ_PROG16( addr )                 GET_LE16( ram + (addr) )
+
+#define SET_PC( n )     (pc = ram + (n))
+#define GET_PC()        (pc - ram)
+#define READ_PC( pc )   (*(pc))
+#define READ_PC16( pc ) GET_LE16( pc )
+
+// TODO: remove non-wrapping versions?
+#define SPC_NO_SP_WRAPAROUND 0
+
+#define SET_SP( v )     (sp = ram + 0x101 + (v))
+#define GET_SP()        (sp - 0x101 - ram)
+
+#if SPC_NO_SP_WRAPAROUND
+#define PUSH16( v )     (sp -= 2, SET_LE16( sp, v ))
+#define PUSH( v )       (void) (*--sp = (uint8_t) (v))
+#define POP( out )      (void) ((out) = *sp++)
 
-inline spc_time_t Spc_Cpu::remain() const { return remain_; }
+#else
+#define PUSH16( data )\
+{\
+	int addr = (sp -= 2) - ram;\
+	if ( addr > 0x100 )\
+	{\
+		SET_LE16( sp, data );\
+	}\
+	else\
+	{\
+		ram [(uint8_t) addr + 0x100] = (uint8_t) data;\
+		sp [1] = (uint8_t) (data >> 8);\
+		sp += 0x100;\
+	}\
+}
+
+#define PUSH( data )\
+{\
+	*--sp = (uint8_t) (data);\
+	if ( sp - ram == 0x100 )\
+		sp += 0x100;\
+}
+
+#define POP( out )\
+{\
+	out = *sp++;\
+	if ( sp - ram == 0x201 )\
+	{\
+		out = sp [-0x101];\
+		sp -= 0x100;\
+	}\
+}
 
 #endif
+
+#define MEM_BIT( rel ) CPU_mem_bit( pc, rel_time + rel )
+
+unsigned Snes_Spc::CPU_mem_bit( uint8_t const* pc, rel_time_t rel_time )
+{
+	unsigned addr = READ_PC16( pc );
+	unsigned t = READ( 0, addr & 0x1FFF ) >> (addr >> 13);
+	return t << 8 & 0x100;
+}
+
+//// Status flag handling
+
+// Hex value in name to clarify code and bit shifting.
+// Flag stored in indicated variable during emulation
+int const n80 = 0x80; // nz
+int const v40 = 0x40; // psw
+int const p20 = 0x20; // dp
+int const b10 = 0x10; // psw
+int const h08 = 0x08; // psw
+int const i04 = 0x04; // psw
+int const z02 = 0x02; // nz
+int const c01 = 0x01; // c
+
+int const nz_neg_mask = 0x880; // either bit set indicates N flag set
+
+#define GET_PSW( out )\
+{\
+	out = psw & ~(n80 | p20 | z02 | c01);\
+	out |= c  >> 8 & c01;\
+	out |= dp >> 3 & p20;\
+	out |= ((nz >> 4) | nz) & n80;\
+	if ( !(uint8_t) nz ) out |= z02;\
+}
+
+#define SET_PSW( in )\
+{\
+	psw = in;\
+	c   = in << 8;\
+	dp  = in << 3 & 0x100;\
+	nz  = (in << 4 & 0x800) | (~in & z02);\
+}
+
+SPC_CPU_RUN_FUNC
+{
+	uint8_t* const ram = RAM;
+	int a = m.cpu_regs.a;
+	int x = m.cpu_regs.x;
+	int y = m.cpu_regs.y;
+	uint8_t const* pc;
+	uint8_t* sp;
+	int psw;
+	int c;
+	int nz;
+	int dp;
+	
+	SET_PC( m.cpu_regs.pc );
+	SET_SP( m.cpu_regs.sp );
+	SET_PSW( m.cpu_regs.psw );
+	
+	goto loop;
+	
+	
+	// Main loop
+	
+cbranch_taken_loop:
+	pc += *(BOOST::int8_t const*) pc;
+inc_pc_loop:
+	pc++;
+loop:
+{
+	unsigned opcode;
+	unsigned data;
+	
+	check( (unsigned) a < 0x100 );
+	check( (unsigned) x < 0x100 );
+	check( (unsigned) y < 0x100 );
+	
+	opcode = *pc;
+	if ( (rel_time += m.cycle_table [opcode]) > 0 )
+		goto out_of_time;
+	
+	#ifdef SPC_CPU_OPCODE_HOOK
+		SPC_CPU_OPCODE_HOOK( GET_PC(), opcode );
+	#endif
+	/*
+	//SUB_CASE_COUNTER( 1 );
+	#define PROFILE_TIMER_LOOP( op, addr, len )\
+	if ( opcode == op )\
+	{\
+		int cond = (unsigned) ((addr) - 0xFD) < 3 &&\
+				pc [len] == 0xF0 && pc [len+1] == 0xFE - len;\
+		SUB_CASE_COUNTER( op && cond );\
+	}
+	
+	PROFILE_TIMER_LOOP( 0xEC, GET_LE16( pc + 1 ), 3 );
+	PROFILE_TIMER_LOOP( 0xEB, pc [1], 2 );
+	PROFILE_TIMER_LOOP( 0xE4, pc [1], 2 );
+	*/
+	
+	// TODO: if PC is at end of memory, this will get wrong operand (very obscure)
+	data = *++pc;
+	switch ( opcode )
+	{
+	
+// Common instructions
+
+#define BRANCH( cond )\
+{\
+	pc++;\
+	pc += (BOOST::int8_t) data;\
+	if ( cond )\
+		goto loop;\
+	pc -= (BOOST::int8_t) data;\
+	rel_time -= 2;\
+	goto loop;\
+}
+
+	case 0xF0: // BEQ
+		BRANCH( !(uint8_t) nz ) // 89% taken
+	
+	case 0xD0: // BNE
+		BRANCH( (uint8_t) nz )
+	
+	case 0x3F:{// CALL
+		int old_addr = GET_PC() + 2;
+		SET_PC( READ_PC16( pc ) );
+		PUSH16( old_addr );
+		goto loop;
+	}
+	
+	case 0x6F:// RET
+		#if SPC_NO_SP_WRAPAROUND
+		{
+			SET_PC( GET_LE16( sp ) );
+			sp += 2;
+		}
+		#else
+		{
+			int addr = sp - ram;
+			SET_PC( GET_LE16( sp ) );
+			sp += 2;
+			if ( addr < 0x1FF )
+				goto loop;
+			
+			SET_PC( sp [-0x101] * 0x100 + ram [(uint8_t) addr + 0x100] );
+			sp -= 0x100;
+		}
+		#endif
+		goto loop;
+	
+	case 0xE4: // MOV a,dp
+		++pc;
+		// 80% from timer
+		READ_DP_TIMER( 0, data, a = nz );
+		goto loop;
+	
+	case 0xFA:{// MOV dp,dp
+		int temp;
+		READ_DP_TIMER( -2, data, temp );
+		data = temp + no_read_before_write ;
+	}
+	// fall through
+	case 0x8F:{// MOV dp,#imm
+		int temp = READ_PC( pc + 1 );
+		pc += 2;
+		
+		#if !SPC_MORE_ACCURACY
+		{
+			int i = dp + temp;
+			ram [i] = (uint8_t) data;
+			i -= 0xF0;
+			if ( (unsigned) i < 0x10 ) // 76%
+			{
+				REGS [i] = (uint8_t) data;
+				
+				// Registers other than $F2 and $F4-$F7
+				//if ( i != 2 && i != 4 && i != 5 && i != 6 && i != 7 )
+				if ( ((~0x2F00 << (bits_in_int - 16)) << i) < 0 ) // 12%
+					cpu_write_smp_reg( data, rel_time, i );
+			}
+		}
+		#else
+			WRITE_DP( 0, temp, data );
+		#endif
+		goto loop;
+	}
+	
+	case 0xC4: // MOV dp,a
+		++pc;
+		#if !SPC_MORE_ACCURACY
+		{
+			int i = dp + data;
+			ram [i] = (uint8_t) a;
+			i -= 0xF0;
+			if ( (unsigned) i < 0x10 ) // 39%
+			{
+				unsigned sel = i - 2;
+				REGS [i] = (uint8_t) a;
+				
+				if ( sel == 1 ) // 51% $F3
+					dsp_write( a, rel_time );
+				else if ( sel > 1 ) // 1% not $F2 or $F3
+					cpu_write_smp_reg_( a, rel_time, i );
+			}
+		}
+		#else
+			WRITE_DP( 0, data, a );
+		#endif
+		goto loop;
+	
+#define CASE( n )   case n:
+
+// Define common address modes based on opcode for immediate mode. Execution
+// ends with data set to the address of the operand.
+#define ADDR_MODES_( op )\
+	CASE( op - 0x02 ) /* (X) */\
+		data = x + dp;\
+		pc--;\
+		goto end_##op;\
+	CASE( op + 0x0F ) /* (dp)+Y */\
+		data = READ_PROG16( data + dp ) + y;\
+		goto end_##op;\
+	CASE( op - 0x01 ) /* (dp+X) */\
+		data = READ_PROG16( ((uint8_t) (data + x)) + dp );\
+		goto end_##op;\
+	CASE( op + 0x0E ) /* abs+Y */\
+		data += y;\
+		goto abs_##op;\
+	CASE( op + 0x0D ) /* abs+X */\
+		data += x;\
+	CASE( op - 0x03 ) /* abs */\
+	abs_##op:\
+		data += 0x100 * READ_PC( ++pc );\
+		goto end_##op;\
+	CASE( op + 0x0C ) /* dp+X */\
+		data = (uint8_t) (data + x);
+
+#define ADDR_MODES_NO_DP( op )\
+	ADDR_MODES_( op )\
+		data += dp;\
+	end_##op:
+
+#define ADDR_MODES( op )\
+	ADDR_MODES_( op )\
+	CASE( op - 0x04 ) /* dp */\
+		data += dp;\
+	end_##op:
+
+// 1. 8-bit Data Transmission Commands. Group I
+
+	ADDR_MODES_NO_DP( 0xE8 ) // MOV A,addr
+		a = nz = READ( 0, data );
+		goto inc_pc_loop;
+	
+	case 0xBF:{// MOV A,(X)+
+		int temp = x + dp;
+		x = (uint8_t) (x + 1);
+		a = nz = READ( -1, temp );
+		goto loop;
+	}
+	
+	case 0xE8: // MOV A,imm
+		a  = data;
+		nz = data;
+		goto inc_pc_loop;
+	
+	case 0xF9: // MOV X,dp+Y
+		data = (uint8_t) (data + y);
+	case 0xF8: // MOV X,dp
+		READ_DP_TIMER( 0, data, x = nz );
+		goto inc_pc_loop;
+	
+	case 0xE9: // MOV X,abs
+		data = READ_PC16( pc );
+		++pc;
+		data = READ( 0, data );
+	case 0xCD: // MOV X,imm
+		x  = data;
+		nz = data;
+		goto inc_pc_loop;
+	
+	case 0xFB: // MOV Y,dp+X
+		data = (uint8_t) (data + x);
+	case 0xEB: // MOV Y,dp
+		// 70% from timer
+		pc++;
+		READ_DP_TIMER( 0, data, y = nz );
+		goto loop;
+	
+	case 0xEC:{// MOV Y,abs
+		int temp = READ_PC16( pc );
+		pc += 2;
+		READ_TIMER( 0, temp, y = nz );
+		//y = nz = READ( 0, temp );
+		goto loop;
+	}
+	
+	case 0x8D: // MOV Y,imm
+		y  = data;
+		nz = data;
+		goto inc_pc_loop;
+	
+// 2. 8-BIT DATA TRANSMISSION COMMANDS, GROUP 2
+
+	ADDR_MODES_NO_DP( 0xC8 ) // MOV addr,A
+		WRITE( 0, data, a );
+		goto inc_pc_loop;
+	
+	{
+		int temp;
+	case 0xCC: // MOV abs,Y
+		temp = y;
+		goto mov_abs_temp;
+	case 0xC9: // MOV abs,X
+		temp = x;
+	mov_abs_temp:
+		WRITE( 0, READ_PC16( pc ), temp );
+		pc += 2;
+		goto loop;
+	}
+	
+	case 0xD9: // MOV dp+Y,X
+		data = (uint8_t) (data + y);
+	case 0xD8: // MOV dp,X
+		WRITE( 0, data + dp, x );
+		goto inc_pc_loop;
+	
+	case 0xDB: // MOV dp+X,Y
+		data = (uint8_t) (data + x);
+	case 0xCB: // MOV dp,Y
+		WRITE( 0, data + dp, y );
+		goto inc_pc_loop;
+
+// 3. 8-BIT DATA TRANSMISSIN COMMANDS, GROUP 3.
+	
+	case 0x7D: // MOV A,X
+		a  = x;
+		nz = x;
+		goto loop;
+	
+	case 0xDD: // MOV A,Y
+		a  = y;
+		nz = y;
+		goto loop;
+	
+	case 0x5D: // MOV X,A
+		x  = a;
+		nz = a;
+		goto loop;
+	
+	case 0xFD: // MOV Y,A
+		y  = a;
+		nz = a;
+		goto loop;
+	
+	case 0x9D: // MOV X,SP
+		x = nz = GET_SP();
+		goto loop;
+	
+	case 0xBD: // MOV SP,X
+		SET_SP( x );
+		goto loop;
+	
+	//case 0xC6: // MOV (X),A (handled by MOV addr,A in group 2)
+	
+	case 0xAF: // MOV (X)+,A
+		WRITE_DP( 0, x, a + no_read_before_write  );
+		x++;
+		goto loop;
+	
+// 5. 8-BIT LOGIC OPERATION COMMANDS
+	
+#define LOGICAL_OP( op, func )\
+	ADDR_MODES( op ) /* addr */\
+		data = READ( 0, data );\
+	case op: /* imm */\
+		nz = a func##= data;\
+		goto inc_pc_loop;\
+	{   unsigned addr;\
+	case op + 0x11: /* X,Y */\
+		data = READ_DP( -2, y );\
+		addr = x + dp;\
+		goto addr_##op;\
+	case op + 0x01: /* dp,dp */\
+		data = READ_DP( -3, data );\
+	case op + 0x10:{/*dp,imm*/\
+		uint8_t const* addr2 = pc + 1;\
+		pc += 2;\
+		addr = READ_PC( addr2 ) + dp;\
+	}\
+	addr_##op:\
+		nz = data func READ( -1, addr );\
+		WRITE( 0, addr, nz );\
+		goto loop;\
+	}
+	
+	LOGICAL_OP( 0x28, & ); // AND
+	
+	LOGICAL_OP( 0x08, | ); // OR
+	
+	LOGICAL_OP( 0x48, ^ ); // EOR
+	
+// 4. 8-BIT ARITHMETIC OPERATION COMMANDS
+
+	ADDR_MODES( 0x68 ) // CMP addr
+		data = READ( 0, data );
+	case 0x68: // CMP imm
+		nz = a - data;
+		c = ~nz;
+		nz &= 0xFF;
+		goto inc_pc_loop;
+	
+	case 0x79: // CMP (X),(Y)
+		data = READ_DP( -2, y );
+		nz = READ_DP( -1, x ) - data;
+		c = ~nz;
+		nz &= 0xFF;
+		goto loop;
+	
+	case 0x69: // CMP dp,dp
+		data = READ_DP( -3, data );
+	case 0x78: // CMP dp,imm
+		nz = READ_DP( -1, READ_PC( ++pc ) ) - data;
+		c = ~nz;
+		nz &= 0xFF;
+		goto inc_pc_loop;
+	
+	case 0x3E: // CMP X,dp
+		data += dp;
+		goto cmp_x_addr;
+	case 0x1E: // CMP X,abs
+		data = READ_PC16( pc );
+		pc++;
+	cmp_x_addr:
+		data = READ( 0, data );
+	case 0xC8: // CMP X,imm
+		nz = x - data;
+		c = ~nz;
+		nz &= 0xFF;
+		goto inc_pc_loop;
+	
+	case 0x7E: // CMP Y,dp
+		data += dp;
+		goto cmp_y_addr;
+	case 0x5E: // CMP Y,abs
+		data = READ_PC16( pc );
+		pc++;
+	cmp_y_addr:
+		data = READ( 0, data );
+	case 0xAD: // CMP Y,imm
+		nz = y - data;
+		c = ~nz;
+		nz &= 0xFF;
+		goto inc_pc_loop;
+	
+	{
+		int addr;
+	case 0xB9: // SBC (x),(y)
+	case 0x99: // ADC (x),(y)
+		pc--; // compensate for inc later
+		data = READ_DP( -2, y );
+		addr = x + dp;
+		goto adc_addr;
+	case 0xA9: // SBC dp,dp
+	case 0x89: // ADC dp,dp
+		data = READ_DP( -3, data );
+	case 0xB8: // SBC dp,imm
+	case 0x98: // ADC dp,imm
+		addr = READ_PC( ++pc ) + dp;
+	adc_addr:
+		nz = READ( -1, addr );
+		goto adc_data;
+		
+// catch ADC and SBC together, then decode later based on operand
+#undef CASE
+#define CASE( n ) case n: case (n) + 0x20:
+	ADDR_MODES( 0x88 ) // ADC/SBC addr
+		data = READ( 0, data );
+	case 0xA8: // SBC imm
+	case 0x88: // ADC imm
+		addr = -1; // A
+		nz = a;
+	adc_data: {
+		int flags;
+		if ( opcode >= 0xA0 ) // SBC
+			data ^= 0xFF;
+		
+		flags = data ^ nz;
+		nz += data + (c >> 8 & 1);
+		flags ^= nz;
+		
+		psw = (psw & ~(v40 | h08)) |
+				(flags >> 1 & h08) |
+				((flags + 0x80) >> 2 & v40);
+		c = nz;
+		if ( addr < 0 )
+		{
+			a = (uint8_t) nz;
+			goto inc_pc_loop;
+		}
+		WRITE( 0, addr, /*(uint8_t)*/ nz );
+		goto inc_pc_loop;
+	}
+	
+	}
+	
+// 6. ADDITION & SUBTRACTION COMMANDS
+
+#define INC_DEC_REG( reg, op )\
+		nz  = reg op;\
+		reg = (uint8_t) nz;\
+		goto loop;
+
+	case 0xBC: INC_DEC_REG( a, + 1 ) // INC A
+	case 0x3D: INC_DEC_REG( x, + 1 ) // INC X
+	case 0xFC: INC_DEC_REG( y, + 1 ) // INC Y
+	
+	case 0x9C: INC_DEC_REG( a, - 1 ) // DEC A
+	case 0x1D: INC_DEC_REG( x, - 1 ) // DEC X
+	case 0xDC: INC_DEC_REG( y, - 1 ) // DEC Y
+
+	case 0x9B: // DEC dp+X
+	case 0xBB: // INC dp+X
+		data = (uint8_t) (data + x);
+	case 0x8B: // DEC dp
+	case 0xAB: // INC dp
+		data += dp;
+		goto inc_abs;
+	case 0x8C: // DEC abs
+	case 0xAC: // INC abs
+		data = READ_PC16( pc );
+		pc++;
+	inc_abs:
+		nz = (opcode >> 4 & 2) - 1;
+		nz += READ( -1, data );
+		WRITE( 0, data, /*(uint8_t)*/ nz );
+		goto inc_pc_loop;
+	
+// 7. SHIFT, ROTATION COMMANDS
+
+	case 0x5C: // LSR A
+		c = 0;
+	case 0x7C:{// ROR A
+		nz = (c >> 1 & 0x80) | (a >> 1);
+		c = a << 8;
+		a = nz;
+		goto loop;
+	}
+	
+	case 0x1C: // ASL A
+		c = 0;
+	case 0x3C:{// ROL A
+		int temp = c >> 8 & 1;
+		c = a << 1;
+		nz = c | temp;
+		a = (uint8_t) nz;
+		goto loop;
+	}
+	
+	case 0x0B: // ASL dp
+		c = 0;
+		data += dp;
+		goto rol_mem;
+	case 0x1B: // ASL dp+X
+		c = 0;
+	case 0x3B: // ROL dp+X
+		data = (uint8_t) (data + x);
+	case 0x2B: // ROL dp
+		data += dp;
+		goto rol_mem;
+	case 0x0C: // ASL abs
+		c = 0;
+	case 0x2C: // ROL abs
+		data = READ_PC16( pc );
+		pc++;
+	rol_mem:
+		nz = c >> 8 & 1;
+		nz |= (c = READ( -1, data ) << 1);
+		WRITE( 0, data, /*(uint8_t)*/ nz );
+		goto inc_pc_loop;
+	
+	case 0x4B: // LSR dp
+		c = 0;
+		data += dp;
+		goto ror_mem;
+	case 0x5B: // LSR dp+X
+		c = 0;
+	case 0x7B: // ROR dp+X
+		data = (uint8_t) (data + x);
+	case 0x6B: // ROR dp
+		data += dp;
+		goto ror_mem;
+	case 0x4C: // LSR abs
+		c = 0;
+	case 0x6C: // ROR abs
+		data = READ_PC16( pc );
+		pc++;
+	ror_mem: {
+		int temp = READ( -1, data );
+		nz = (c >> 1 & 0x80) | (temp >> 1);
+		c = temp << 8;
+		WRITE( 0, data, nz );
+		goto inc_pc_loop;
+	}
+
+	case 0x9F: // XCN
+		nz = a = (a >> 4) | (uint8_t) (a << 4);
+		goto loop;
+
+// 8. 16-BIT TRANSMISION COMMANDS
+
+	case 0xBA: // MOVW YA,dp
+		a = READ_DP( -2, data );
+		nz = (a & 0x7F) | (a >> 1);
+		y = READ_DP( 0, (uint8_t) (data + 1) );
+		nz |= y;
+		goto inc_pc_loop;
+	
+	case 0xDA: // MOVW dp,YA
+		WRITE_DP( -1, data, a );
+		WRITE_DP( 0, (uint8_t) (data + 1), y + no_read_before_write  );
+		goto inc_pc_loop;
+	
+// 9. 16-BIT OPERATION COMMANDS
+
+	case 0x3A: // INCW dp
+	case 0x1A:{// DECW dp
+		int temp;
+		// low byte
+		data += dp;
+		temp = READ( -3, data );
+		temp += (opcode >> 4 & 2) - 1; // +1 for INCW, -1 for DECW
+		nz = ((temp >> 1) | temp) & 0x7F;
+		WRITE( -2, data, /*(uint8_t)*/ temp );
+		
+		// high byte
+		data = (uint8_t) (data + 1) + dp;
+		temp = (uint8_t) ((temp >> 8) + READ( -1, data ));
+		nz |= temp;
+		WRITE( 0, data, temp );
+		
+		goto inc_pc_loop;
+	}
+		
+	case 0x7A: // ADDW YA,dp
+	case 0x9A:{// SUBW YA,dp
+		int lo = READ_DP( -2, data );
+		int hi = READ_DP( 0, (uint8_t) (data + 1) );
+		int result;
+		int flags;
+		
+		if ( opcode == 0x9A ) // SUBW
+		{
+			lo = (lo ^ 0xFF) + 1;
+			hi ^= 0xFF;
+		}
+		
+		lo += a;
+		result = y + hi + (lo >> 8);
+		flags = hi ^ y ^ result;
+		
+		psw = (psw & ~(v40 | h08)) |
+				(flags >> 1 & h08) |
+				((flags + 0x80) >> 2 & v40);
+		c = result;
+		a = (uint8_t) lo;
+		result = (uint8_t) result;
+		y = result;
+		nz = (((lo >> 1) | lo) & 0x7F) | result;
+		
+		goto inc_pc_loop;
+	}
+	
+	case 0x5A: { // CMPW YA,dp
+		int temp = a - READ_DP( -1, data );
+		nz = ((temp >> 1) | temp) & 0x7F;
+		temp = y + (temp >> 8);
+		temp -= READ_DP( 0, (uint8_t) (data + 1) );
+		nz |= temp;
+		c  = ~temp;
+		nz &= 0xFF;
+		goto inc_pc_loop;
+	}
+	
+// 10. MULTIPLICATION & DIVISON COMMANDS
+
+	case 0xCF: { // MUL YA
+		unsigned temp = y * a;
+		a = (uint8_t) temp;
+		nz = ((temp >> 1) | temp) & 0x7F;
+		y = temp >> 8;
+		nz |= y;
+		goto loop;
+	}
+	
+	case 0x9E: // DIV YA,X
+	{
+		unsigned ya = y * 0x100 + a;
+		
+		psw &= ~(h08 | v40);
+		
+		if ( y >= x )
+			psw |= v40;
+		
+		if ( (y & 15) >= (x & 15) )
+			psw |= h08;
+		
+		if ( y < x * 2 )
+		{
+			a = ya / x;
+			y = ya - a * x;
+		}
+		else
+		{
+			a = 255 - (ya - x * 0x200) / (256 - x);
+			y = x   + (ya - x * 0x200) % (256 - x);
+		}
+		
+		nz = (uint8_t) a;
+		a = (uint8_t) a;
+		
+		goto loop;
+	}
+	
+// 11. DECIMAL COMPENSATION COMMANDS
+	
+	case 0xDF: // DAA
+		SUSPICIOUS_OPCODE( "DAA" );
+		if ( a > 0x99 || c & 0x100 )
+		{
+			a += 0x60;
+			c = 0x100;
+		}
+		
+		if ( (a & 0x0F) > 9 || psw & h08 )
+			a += 0x06;
+		
+		nz = a;
+		a = (uint8_t) a;
+		goto loop;
+	
+	case 0xBE: // DAS
+		SUSPICIOUS_OPCODE( "DAS" );
+		if ( a > 0x99 || !(c & 0x100) )
+		{
+			a -= 0x60;
+			c = 0;
+		}
+		
+		if ( (a & 0x0F) > 9 || !(psw & h08) )
+			a -= 0x06;
+		
+		nz = a;
+		a = (uint8_t) a;
+		goto loop;
+	
+// 12. BRANCHING COMMANDS
+
+	case 0x2F: // BRA rel
+		pc += (BOOST::int8_t) data;
+		goto inc_pc_loop;
+	
+	case 0x30: // BMI
+		BRANCH( (nz & nz_neg_mask) )
+	
+	case 0x10: // BPL
+		BRANCH( !(nz & nz_neg_mask) )
+	
+	case 0xB0: // BCS
+		BRANCH( c & 0x100 )
+	
+	case 0x90: // BCC
+		BRANCH( !(c & 0x100) )
+	
+	case 0x70: // BVS
+		BRANCH( psw & v40 )
+	
+	case 0x50: // BVC
+		BRANCH( !(psw & v40) )
+	
+	#define CBRANCH( cond )\
+	{\
+		pc++;\
+		if ( cond )\
+			goto cbranch_taken_loop;\
+		rel_time -= 2;\
+		goto inc_pc_loop;\
+	}
+	
+	case 0x03: // BBS dp.bit,rel
+	case 0x23:
+	case 0x43:
+	case 0x63:
+	case 0x83:
+	case 0xA3:
+	case 0xC3:
+	case 0xE3:
+		CBRANCH( READ_DP( -4, data ) >> (opcode >> 5) & 1 )
+	
+	case 0x13: // BBC dp.bit,rel
+	case 0x33:
+	case 0x53:
+	case 0x73:
+	case 0x93:
+	case 0xB3:
+	case 0xD3:
+	case 0xF3:
+		CBRANCH( !(READ_DP( -4, data ) >> (opcode >> 5) & 1) )
+	
+	case 0xDE: // CBNE dp+X,rel
+		data = (uint8_t) (data + x);
+		// fall through
+	case 0x2E:{// CBNE dp,rel
+		int temp;
+		// 61% from timer
+		READ_DP_TIMER( -4, data, temp );
+		CBRANCH( temp != a )
+	}
+	
+	case 0x6E: { // DBNZ dp,rel
+		unsigned temp = READ_DP( -4, data ) - 1;
+		WRITE_DP( -3, (uint8_t) data, /*(uint8_t)*/ temp + no_read_before_write  );
+		CBRANCH( temp )
+	}
+	
+	case 0xFE: // DBNZ Y,rel
+		y = (uint8_t) (y - 1);
+		BRANCH( y )
+	
+	case 0x1F: // JMP [abs+X]
+		SET_PC( READ_PC16( pc ) + x );
+		// fall through
+	case 0x5F: // JMP abs
+		SET_PC( READ_PC16( pc ) );
+		goto loop;
+	
+// 13. SUB-ROUTINE CALL RETURN COMMANDS
+	
+	case 0x0F:{// BRK
+		int temp;
+		int ret_addr = GET_PC();
+		SUSPICIOUS_OPCODE( "BRK" );
+		SET_PC( READ_PROG16( 0xFFDE ) ); // vector address verified
+		PUSH16( ret_addr );
+		GET_PSW( temp );
+		psw = (psw | b10) & ~i04;
+		PUSH( temp );
+		goto loop;
+	}
+	
+	case 0x4F:{// PCALL offset
+		int ret_addr = GET_PC() + 1;
+		SET_PC( 0xFF00 | data );
+		PUSH16( ret_addr );
+		goto loop;
+	}
+	
+	case 0x01: // TCALL n
+	case 0x11:
+	case 0x21:
+	case 0x31:
+	case 0x41:
+	case 0x51:
+	case 0x61:
+	case 0x71:
+	case 0x81:
+	case 0x91:
+	case 0xA1:
+	case 0xB1:
+	case 0xC1:
+	case 0xD1:
+	case 0xE1:
+	case 0xF1: {
+		int ret_addr = GET_PC();
+		SET_PC( READ_PROG16( 0xFFDE - (opcode >> 3) ) );
+		PUSH16( ret_addr );
+		goto loop;
+	}
+	
+// 14. STACK OPERATION COMMANDS
+
+	{
+		int temp;
+	case 0x7F: // RET1
+		temp = *sp;
+		SET_PC( GET_LE16( sp + 1 ) );
+		sp += 3;
+		goto set_psw;
+	case 0x8E: // POP PSW
+		POP( temp );
+	set_psw:
+		SET_PSW( temp );
+		goto loop;
+	}
+	
+	case 0x0D: { // PUSH PSW
+		int temp;
+		GET_PSW( temp );
+		PUSH( temp );
+		goto loop;
+	}
+
+	case 0x2D: // PUSH A
+		PUSH( a );
+		goto loop;
+	
+	case 0x4D: // PUSH X
+		PUSH( x );
+		goto loop;
+	
+	case 0x6D: // PUSH Y
+		PUSH( y );
+		goto loop;
+	
+	case 0xAE: // POP A
+		POP( a );
+		goto loop;
+	
+	case 0xCE: // POP X
+		POP( x );
+		goto loop;
+	
+	case 0xEE: // POP Y
+		POP( y );
+		goto loop;
+	
+// 15. BIT OPERATION COMMANDS
+
+	case 0x02: // SET1
+	case 0x22:
+	case 0x42:
+	case 0x62:
+	case 0x82:
+	case 0xA2:
+	case 0xC2:
+	case 0xE2:
+	case 0x12: // CLR1
+	case 0x32:
+	case 0x52:
+	case 0x72:
+	case 0x92:
+	case 0xB2:
+	case 0xD2:
+	case 0xF2: {
+		int bit = 1 << (opcode >> 5);
+		int mask = ~bit;
+		if ( opcode & 0x10 )
+			bit = 0;
+		data += dp;
+		WRITE( 0, data, (READ( -1, data ) & mask) | bit );
+		goto inc_pc_loop;
+	}
+		
+	case 0x0E: // TSET1 abs
+	case 0x4E: // TCLR1 abs
+		data = READ_PC16( pc );
+		pc += 2;
+		{
+			unsigned temp = READ( -2, data );
+			nz = (uint8_t) (a - temp);
+			temp &= ~a;
+			if ( opcode == 0x0E )
+				temp |= a;
+			WRITE( 0, data, temp );
+		}
+		goto loop;
+	
+	case 0x4A: // AND1 C,mem.bit
+		c &= MEM_BIT( 0 );
+		pc += 2;
+		goto loop;
+	
+	case 0x6A: // AND1 C,/mem.bit
+		c &= ~MEM_BIT( 0 );
+		pc += 2;
+		goto loop;
+	
+	case 0x0A: // OR1 C,mem.bit
+		c |= MEM_BIT( -1 );
+		pc += 2;
+		goto loop;
+	
+	case 0x2A: // OR1 C,/mem.bit
+		c |= ~MEM_BIT( -1 );
+		pc += 2;
+		goto loop;
+	
+	case 0x8A: // EOR1 C,mem.bit
+		c ^= MEM_BIT( -1 );
+		pc += 2;
+		goto loop;
+	
+	case 0xEA: // NOT1 mem.bit
+		data = READ_PC16( pc );
+		pc += 2;
+		{
+			unsigned temp = READ( -1, data & 0x1FFF );
+			temp ^= 1 << (data >> 13);
+			WRITE( 0, data & 0x1FFF, temp );
+		}
+		goto loop;
+	
+	case 0xCA: // MOV1 mem.bit,C
+		data = READ_PC16( pc );
+		pc += 2;
+		{
+			unsigned temp = READ( -2, data & 0x1FFF );
+			unsigned bit = data >> 13;
+			temp = (temp & ~(1 << bit)) | ((c >> 8 & 1) << bit);
+			WRITE( 0, data & 0x1FFF, temp + no_read_before_write  );
+		}
+		goto loop;
+	
+	case 0xAA: // MOV1 C,mem.bit
+		c = MEM_BIT( 0 );
+		pc += 2;
+		goto loop;
+	
+// 16. PROGRAM PSW FLAG OPERATION COMMANDS
+
+	case 0x60: // CLRC
+		c = 0;
+		goto loop;
+		
+	case 0x80: // SETC
+		c = ~0;
+		goto loop;
+	
+	case 0xED: // NOTC
+		c ^= 0x100;
+		goto loop;
+		
+	case 0xE0: // CLRV
+		psw &= ~(v40 | h08);
+		goto loop;
+	
+	case 0x20: // CLRP
+		dp = 0;
+		goto loop;
+	
+	case 0x40: // SETP
+		dp = 0x100;
+		goto loop;
+	
+	case 0xA0: // EI
+		SUSPICIOUS_OPCODE( "EI" );
+		psw |= i04;
+		goto loop;
+	
+	case 0xC0: // DI
+		SUSPICIOUS_OPCODE( "DI" );
+		psw &= ~i04;
+		goto loop;
+	
+// 17. OTHER COMMANDS
+
+	case 0x00: // NOP
+		goto loop;
+	
+	case 0xFF:{// STOP
+		// handle PC wrap-around
+		unsigned addr = GET_PC() - 1;
+		if ( addr >= 0x10000 )
+		{
+			addr &= 0xFFFF;
+			SET_PC( addr );
+			dprintf( "SPC: PC wrapped around\n" );
+			goto loop;
+		}
+	}
+	// fall through
+	case 0xEF: // SLEEP
+		SUSPICIOUS_OPCODE( "STOP/SLEEP" );
+		--pc;
+		rel_time = 0;
+		m.cpu_error = "SPC emulation error";
+		goto stop;
+	} // switch
+	
+	assert( 0 ); // catch any unhandled instructions
+}   
+out_of_time:
+	rel_time -= m.cycle_table [*pc]; // undo partial execution of opcode
+stop:
+	
+	// Uncache registers
+	if ( GET_PC() >= 0x10000 )
+		dprintf( "SPC: PC wrapped around\n" );
+	m.cpu_regs.pc = (uint16_t) GET_PC();
+	m.cpu_regs.sp = ( uint8_t) GET_SP();
+	m.cpu_regs.a  = ( uint8_t) a;
+	m.cpu_regs.x  = ( uint8_t) x;
+	m.cpu_regs.y  = ( uint8_t) y;
+	{
+		int temp;
+		GET_PSW( temp );
+		m.cpu_regs.psw = (uint8_t) temp;
+	}
+}
+SPC_CPU_RUN_FUNC_END
--- a/src/console/Spc_Dsp.cxx	Sat May 12 17:06:13 2007 -0700
+++ b/src/console/Spc_Dsp.cxx	Tue May 15 13:18:35 2007 -0700
@@ -1,14 +1,11 @@
-// Game_Music_Emu 0.5.2. http://www.slack.net/~ant/
-
-// Based on Brad Martin's OpenSPC DSP emulator
+// snes_spc 0.9.0. http://www.slack.net/~ant/
 
 #include "Spc_Dsp.h"
 
 #include "blargg_endian.h"
 #include <string.h>
 
-/* Copyright (C) 2002 Brad Martin */
-/* Copyright (C) 2004-2006 Shay Green. This module is free software; you
+/* Copyright (C) 2007 Shay Green. This module is free software; you
 can redistribute it and/or modify it under the terms of the GNU Lesser
 General Public License as published by the Free Software Foundation; either
 version 2.1 of the License, or (at your option) any later version. This
@@ -25,611 +22,76 @@
 	#include BLARGG_ENABLE_OPTIMIZER
 #endif
 
-Spc_Dsp::Spc_Dsp( uint8_t* ram_ ) : ram( ram_ )
-{
-	set_gain( 1.0 );
-	mute_voices( 0 );
-	disable_surround( false );
-	
-	assert( offsetof (globals_t,unused9 [2]) == register_count );
-	assert( sizeof (voice) == register_count );
-	blargg_verify_byte_order();
-}
+#if INT_MAX < 0x7FFFFFFF
+	#error "Requires that int type have at least 32 bits"
+#endif
 
-void Spc_Dsp::mute_voices( int mask )
-{
-	for ( int i = 0; i < voice_count; i++ )
-		voice_state [i].enabled = (mask >> i & 1) ? 31 : 7;
-}
 
-void Spc_Dsp::reset()
-{
-	keys = 0;
-	echo_ptr = 0;
-	noise_count = 0;
-	noise = 1;
-	fir_offset = 0;
-	
-	g.flags = 0xE0; // reset, mute, echo off
-	g.key_ons = 0;
-	
-	for ( int i = 0; i < voice_count; i++ )
-	{
-		voice_t& v = voice_state [i];
-		v.on_cnt = 0;
-		v.volume [0] = 0;
-		v.volume [1] = 0;
-		v.envstate = state_release;
-	}
-	
-	memset( fir_buf, 0, sizeof fir_buf );
-}
+// TODO: add to blargg_endian.h
+#define GET_LE16SA( addr )      ((BOOST::int16_t) GET_LE16( addr ))
+#define GET_LE16A( addr )       GET_LE16( addr )
+#define SET_LE16A( addr, data ) SET_LE16( addr, data )
 
-void Spc_Dsp::write( int i, int data )
+static BOOST::uint8_t const initial_regs [Spc_Dsp::register_count] =
 {
-	require( (unsigned) i < register_count );
-	
-	reg [i] = data;
-	int high = i >> 4;
-	switch ( i & 0x0F )
-	{
-		// voice volume
-		case 0:
-		case 1: {
-			short* volume = voice_state [high].volume;
-			int left  = (int8_t) reg [i & ~1];
-			int right = (int8_t) reg [i |  1];
-			volume [0] = left;
-			volume [1] = right;
-			// kill surround only if enabled and signs of volumes differ
-			if ( left * right < surround_threshold )
-			{
-				if ( left < 0 )
-					volume [0] = -left;
-				else
-					volume [1] = -right;
-			}
-			break;
-		}
-		
-		// fir coefficients
-		case 0x0F:
-			fir_coeff [high] = (int8_t) data; // sign-extend
-			break;
-	}
-}
-
-// This table is for envelope timing.  It represents the number of counts
-// that should be subtracted from the counter each sample period (32kHz).
-// The counter starts at 30720 (0x7800). Each count divides exactly into
-// 0x7800 without remainder.
-const int env_rate_init = 0x7800;
-static short const env_rates [0x20] =
-{
-	0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
-	0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
-	0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
-	0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
+	0x45,0x8B,0x5A,0x9A,0xE4,0x82,0x1B,0x78,0x00,0x00,0xAA,0x96,0x89,0x0E,0xE0,0x80,
+	0x2A,0x49,0x3D,0xBA,0x14,0xA0,0xAC,0xC5,0x00,0x00,0x51,0xBB,0x9C,0x4E,0x7B,0xFF,
+	0xF4,0xFD,0x57,0x32,0x37,0xD9,0x42,0x22,0x00,0x00,0x5B,0x3C,0x9F,0x1B,0x87,0x9A,
+	0x6F,0x27,0xAF,0x7B,0xE5,0x68,0x0A,0xD9,0x00,0x00,0x9A,0xC5,0x9C,0x4E,0x7B,0xFF,
+	0xEA,0x21,0x78,0x4F,0xDD,0xED,0x24,0x14,0x00,0x00,0x77,0xB1,0xD1,0x36,0xC1,0x67,
+	0x52,0x57,0x46,0x3D,0x59,0xF4,0x87,0xA4,0x00,0x00,0x7E,0x44,0x9C,0x4E,0x7B,0xFF,
+	0x75,0xF5,0x06,0x97,0x10,0xC3,0x24,0xBB,0x00,0x00,0x7B,0x7A,0xE0,0x60,0x12,0x0F,
+	0xF7,0x74,0x1C,0xE5,0x39,0x3D,0x73,0xC1,0x00,0x00,0x7A,0xB3,0xFF,0x4E,0x7B,0xFF
 };
 
-const int env_range = 0x800;
-
-inline int Spc_Dsp::clock_envelope( int v )
-{                               /* Return value is current 
-								 * ENVX */
-	raw_voice_t& raw_voice = this->voice [v];
-	voice_t& voice = voice_state [v];
-	
-	int envx = voice.envx;
-	if ( voice.envstate == state_release )
-	{
-		/*
-		 * Docs: "When in the state of "key off". the "click" sound is 
-		 * prevented by the addition of the fixed value 1/256" WTF???
-		 * Alright, I'm going to choose to interpret that this way:
-		 * When a note is keyed off, start the RELEASE state, which
-		 * subtracts 1/256th each sample period (32kHz).  Note there's 
-		 * no need for a count because it always happens every update. 
-		 */
-		envx -= env_range / 256;
-		if ( envx <= 0 )
-		{
-			envx = 0;
-			keys &= ~(1 << v);
-			return -1;
-		}
-		voice.envx = envx;
-		raw_voice.envx = envx >> 8;
-		return envx;
-	}
-	
-	int cnt = voice.envcnt;
-	int adsr1 = raw_voice.adsr [0];
-	if ( adsr1 & 0x80 )
-	{
-		switch ( voice.envstate )
-		{
-			case state_attack: {
-				// increase envelope by 1/64 each step
-				int t = adsr1 & 15;
-				if ( t == 15 )
-				{
-					envx += env_range / 2;
-				}
-				else
-				{
-					cnt -= env_rates [t * 2 + 1];
-					if ( cnt > 0 )
-						break;
-					envx += env_range / 64;
-					cnt = env_rate_init;
-				}
-				if ( envx >= env_range )
-				{
-					envx = env_range - 1;
-					voice.envstate = state_decay;
-				}
-				voice.envx = envx;
-				break;
-			}
-			
-			case state_decay: {
-				// Docs: "DR... [is multiplied] by the fixed value
-				// 1-1/256." Well, at least that makes some sense.
-				// Multiplying ENVX by 255/256 every time DECAY is
-				// updated. 
-				cnt -= env_rates [((adsr1 >> 3) & 0xE) + 0x10];
-				if ( cnt <= 0 )
-				{
-					cnt = env_rate_init;
-					envx -= ((envx - 1) >> 8) + 1;
-					voice.envx = envx;
-				}
-				int sustain_level = raw_voice.adsr [1] >> 5;
-				
-				if ( envx <= (sustain_level + 1) * 0x100 )
-					voice.envstate = state_sustain;
-				break;
-			}
-			
-			case state_sustain:
-				// Docs: "SR [is multiplied] by the fixed value 1-1/256."
-				// Multiplying ENVX by 255/256 every time SUSTAIN is
-				// updated. 
-				cnt -= env_rates [raw_voice.adsr [1] & 0x1F];
-				if ( cnt <= 0 )
-				{
-					cnt = env_rate_init;
-					envx -= ((envx - 1) >> 8) + 1;
-					voice.envx = envx;
-				}
-				break;
-			
-			case state_release:
-				// handled above
-				break;
-		}
-	}
-	else
-	{                           /* GAIN mode is set */
-		/*
-		 * Note: if the game switches between ADSR and GAIN modes
-		 * partway through, should the count be reset, or should it
-		 * continue from where it was? Does the DSP actually watch for 
-		 * that bit to change, or does it just go along with whatever
-		 * it sees when it performs the update? I'm going to assume
-		 * the latter and not update the count, unless I see a game
-		 * that obviously wants the other behavior.  The effect would
-		 * be pretty subtle, in any case. 
-		 */
-		int t = raw_voice.gain;
-		if (t < 0x80)
-		{
-			envx = voice.envx = t << 4;
-		}
-		else switch (t >> 5)
-		{
-		case 4:         /* Docs: "Decrease (linear): Subtraction
-							 * of the fixed value 1/64." */
-			cnt -= env_rates [t & 0x1F];
-			if (cnt > 0)
-				break;
-			cnt = env_rate_init;
-			envx -= env_range / 64;
-			if ( envx < 0 )
-			{
-				envx = 0;
-				if ( voice.envstate == state_attack )
-					voice.envstate = state_decay;
-			}
-			voice.envx = envx;
-			break;
-		case 5:         /* Docs: "Drecrease <sic> (exponential):
-							 * Multiplication by the fixed value
-							 * 1-1/256." */
-			cnt -= env_rates [t & 0x1F];
-			if (cnt > 0)
-				break;
-			cnt = env_rate_init;
-			envx -= ((envx - 1) >> 8) + 1;
-			if ( envx < 0 )
-			{
-				envx = 0;
-				if ( voice.envstate == state_attack )
-					voice.envstate = state_decay;
-			}
-			voice.envx = envx;
-			break;
-		case 6:         /* Docs: "Increase (linear): Addition of
-							 * the fixed value 1/64." */
-			cnt -= env_rates [t & 0x1F];
-			if (cnt > 0)
-				break;
-			cnt = env_rate_init;
-			envx += env_range / 64;
-			if ( envx >= env_range )
-				envx = env_range - 1;
-			voice.envx = envx;
-			break;
-		case 7:         /* Docs: "Increase (bent line): Addition
-							 * of the constant 1/64 up to .75 of the
-							 * constaint <sic> 1/256 from .75 to 1." */
-			cnt -= env_rates [t & 0x1F];
-			if (cnt > 0)
-				break;
-			cnt = env_rate_init;
-			if ( envx < env_range * 3 / 4 )
-				envx += env_range / 64;
-			else
-				envx += env_range / 256;
-			if ( envx >= env_range )
-				envx = env_range - 1;
-			voice.envx = envx;
-			break;
-		}
-	}
-	voice.envcnt = cnt;
-	raw_voice.envx = envx >> 4;
-	return envx;
-}
-
-// Clamp n into range -32768 <= n <= 32767
-inline int clamp_16( int n )
-{
-	if ( (BOOST::int16_t) n != n )
-		n = BOOST::int16_t (0x7FFF - (n >> 31));
-	return n;
+// if ( io < -32768 ) io = -32768;
+// if ( io >  32767 ) io =  32767;
+#define CLAMP16( io )\
+{\
+	if ( (int16_t) io != io )\
+		io = (io >> 31) ^ 0x7FFF;\
 }
 
-void Spc_Dsp::run( long count, short* out_buf )
+// Access global DSP register
+#define REG(n)      m.regs [r_##n]
+
+// Access voice DSP register
+#define VREG(r,n)   r [v_##n]
+
+#define WRITE_SAMPLES( l, r, out ) \
+{\
+	out [0] = l;\
+	out [1] = r;\
+	out += 2;\
+	if ( out >= m.out_end )\
+	{\
+		check( out == m.out_end );\
+		check( m.out_end != &m.extra [extra_size] || \
+			(m.extra <= m.out_begin && m.extra < &m.extra [extra_size]) );\
+		out       = m.extra;\
+		m.out_end = &m.extra [extra_size];\
+	}\
+}\
+
+void Spc_Dsp::set_output( sample_t* out, int size )
 {
-	// to do: make clock_envelope() inline so that this becomes a leaf function?
-	
-	// Should we just fill the buffer with silence? Flags won't be cleared
-	// during this run so it seems it should keep resetting every sample.
-	if ( g.flags & 0x80 )
-		reset();
-	
-	struct src_dir {
-		char start [2];
-		char loop [2];
-	};
-	
-	const src_dir* const sd = (src_dir*) &ram [g.wave_page * 0x100];
-	
-	int left_volume  = g.left_volume;
-	int right_volume = g.right_volume;
-	if ( left_volume * right_volume < surround_threshold )
-		right_volume = -right_volume; // kill global surround
-	left_volume  *= emu_gain;
-	right_volume *= emu_gain;
-	
-	while ( --count >= 0 )
+	require( (size & 1) == 0 ); // must be even
+	if ( !out )
 	{
-		// Here we check for keys on/off.  Docs say that successive writes
-		// to KON/KOF must be separated by at least 2 Ts periods or risk
-		// being neglected.  Therefore DSP only looks at these during an
-		// update, and not at the time of the write.  Only need to do this
-		// once however, since the regs haven't changed over the whole
-		// period we need to catch up with. 
-		
-		g.wave_ended &= ~g.key_ons; // Keying on a voice resets that bit in ENDX.
-		
-		if ( g.noise_enables )
-		{
-			noise_count -= env_rates [g.flags & 0x1F];
-			if ( noise_count <= 0 )
-			{
-				noise_count = env_rate_init;
-				
-				noise_amp = BOOST::int16_t (noise * 2);
-				
-				// TODO: switch to Galios style
-				int feedback = (noise << 13) ^ (noise << 14);
-				noise = (feedback & 0x4000) | (noise >> 1);
-			}
-		}
-		
-		// What is the expected behavior when pitch modulation is enabled on
-		// voice 0? Jurassic Park 2 does this. Assume 0 for now.
-		blargg_long prev_outx = 0;
-		
-		int echol = 0;
-		int echor = 0;
-		int left = 0;
-		int right = 0;
-		for ( int vidx = 0; vidx < voice_count; vidx++ )
-		{
-			const int vbit = 1 << vidx;
-			raw_voice_t& raw_voice = voice [vidx];
-			voice_t& voice = voice_state [vidx];
-			
-			if ( voice.on_cnt && !--voice.on_cnt )
-			{
-				// key on
-				keys |= vbit;
-				voice.addr = GET_LE16( sd [raw_voice.waveform].start );
-				voice.block_remain = 1;
-				voice.envx = 0;
-				voice.block_header = 0;
-				voice.fraction = 0x3FFF; // decode three samples immediately
-				voice.interp0 = 0; // BRR decoder filter uses previous two samples
-				voice.interp1 = 0;
-				
-				// NOTE: Real SNES does *not* appear to initialize the
-				// envelope counter to anything in particular. The first
-				// cycle always seems to come at a random time sooner than 
-				// expected; as yet, I have been unable to find any
-				// pattern.  I doubt it will matter though, so we'll go
-				// ahead and do the full time for now. 
-				voice.envcnt = env_rate_init;
-				voice.envstate = state_attack;
-			}
-			
-			if ( g.key_ons & vbit & ~g.key_offs )
-			{
-				// voice doesn't come on if key off is set
-				g.key_ons &= ~vbit;
-				voice.on_cnt = 8;
-			}
-			
-			if ( keys & g.key_offs & vbit )
-			{
-				// key off
-				voice.envstate = state_release;
-				voice.on_cnt = 0;
-			}
-			
-			int envx;
-			if ( !(keys & vbit) || (envx = clock_envelope( vidx )) < 0 )
-			{
-				raw_voice.envx = 0;
-				raw_voice.outx = 0;
-				prev_outx = 0;
-				continue;
-			}
-			
-			// Decode samples when fraction >= 1.0 (0x1000)
-			for ( int n = voice.fraction >> 12; --n >= 0; )
-			{
-				if ( !--voice.block_remain )
-				{
-					if ( voice.block_header & 1 )
-					{
-						g.wave_ended |= vbit;
-					
-						if ( voice.block_header & 2 )
-						{
-							// verified (played endless looping sample and ENDX was set)
-							voice.addr = GET_LE16( sd [raw_voice.waveform].loop );
-						}
-						else
-						{
-							// first block was end block; don't play anything (verified)
-							goto sample_ended; // to do: find alternative to goto
-						}
-					}
-					
-					voice.block_header = ram [voice.addr++];
-					voice.block_remain = 16; // nybbles
-				}
-				
-				// if next block has end flag set, *this* block ends *early* (verified)
-				if ( voice.block_remain == 9 && (ram [voice.addr + 5] & 3) == 1 &&
-						(voice.block_header & 3) != 3 )
-				{
-			sample_ended:
-					g.wave_ended |= vbit;
-					keys &= ~vbit;
-					raw_voice.envx = 0;
-					voice.envx = 0;
-					// add silence samples to interpolation buffer
-					do
-					{
-						voice.interp3 = voice.interp2;
-						voice.interp2 = voice.interp1;
-						voice.interp1 = voice.interp0;
-						voice.interp0 = 0;
-					}
-					while ( --n >= 0 );
-					break;
-				}
-				
-				int delta = ram [voice.addr];
-				if ( voice.block_remain & 1 )
-				{
-					delta <<= 4; // use lower nybble
-					voice.addr++;
-				}
-				
-				// Use sign-extended upper nybble
-				delta = int8_t (delta) >> 4;
-				
-				// For invalid ranges (D,E,F): if the nybble is negative,
-				// the result is F000.  If positive, 0000. Nothing else
-				// like previous range, etc seems to have any effect.  If
-				// range is valid, do the shift normally.  Note these are
-				// both shifted right once to do the filters properly, but 
-				// the output will be shifted back again at the end.
-				int shift = voice.block_header >> 4;
-				delta = (delta << shift) >> 1;
-				if ( shift > 0x0C )
-					delta = (delta >> 14) & ~0x7FF;
-				
-				// One, two and three point IIR filters
-				int smp1 = voice.interp0;
-				int smp2 = voice.interp1;
-				if ( voice.block_header & 8 )
-				{
-					delta += smp1;
-					delta -= smp2 >> 1;
-					if ( !(voice.block_header & 4) )
-					{
-						delta += (-smp1 - (smp1 >> 1)) >> 5;
-						delta += smp2 >> 5;
-					}
-					else
-					{
-						delta += (-smp1 * 13) >> 7;
-						delta += (smp2 + (smp2 >> 1)) >> 4;
-					}
-				}
-				else if ( voice.block_header & 4 )
-				{
-					delta += smp1 >> 1;
-					delta += (-smp1) >> 5;
-				}
-				
-				voice.interp3 = voice.interp2;
-				voice.interp2 = smp2;
-				voice.interp1 = smp1;
-				voice.interp0 = BOOST::int16_t (clamp_16( delta ) * 2); // sign-extend
-			}
-			
-			// rate (with possible modulation)
-			int rate = GET_LE16( raw_voice.rate ) & 0x3FFF;
-			if ( g.pitch_mods & vbit )
-				rate = (rate * (prev_outx + 32768)) >> 15;
-			
-			// Gaussian interpolation using most recent 4 samples
-			int index = voice.fraction >> 2 & 0x3FC;
-			voice.fraction = (voice.fraction & 0x0FFF) + rate;
-			const BOOST::int16_t* table  = (BOOST::int16_t const*) ((char const*) gauss + index);
-			const BOOST::int16_t* table2 = (BOOST::int16_t const*) ((char const*) gauss + (255*4 - index));
-			int s = ((table  [0] * voice.interp3) >> 12) +
-					((table  [1] * voice.interp2) >> 12) +
-					((table2 [1] * voice.interp1) >> 12);
-			s = (BOOST::int16_t) (s * 2);
-			s += (table2 [0] * voice.interp0) >> 11 & ~1;
-			int output = clamp_16( s );
-			if ( g.noise_enables & vbit )
-				output = noise_amp;
-			
-			// scale output and set outx values
-			output = (output * envx) >> 11 & ~1;
-			
-			// output and apply muting (by setting voice.enabled to 31)
-			// if voice is externally disabled (not a SNES feature)
-			int l = (voice.volume [0] * output) >> voice.enabled;
-			int r = (voice.volume [1] * output) >> voice.enabled;
-			prev_outx = output;
-			raw_voice.outx = int8_t (output >> 8);
-			if ( g.echo_ons & vbit )
-			{
-				echol += l;
-				echor += r;
-			}
-			left  += l;
-			right += r;
-		}
-		// end of channel loop
-		
-		// main volume control
-		left  = (left  * left_volume ) >> (7 + emu_gain_bits);
-		right = (right * right_volume) >> (7 + emu_gain_bits);
-		
-		// Echo FIR filter
-		
-		// read feedback from echo buffer
-		int echo_ptr = this->echo_ptr;
-		uint8_t* echo_buf = &ram [(g.echo_page * 0x100 + echo_ptr) & 0xFFFF];
-		echo_ptr += 4;
-		if ( echo_ptr >= (g.echo_delay & 15) * 0x800 )
-			echo_ptr = 0;
-		int fb_left  = (BOOST::int16_t) GET_LE16( echo_buf     ); // sign-extend
-		int fb_right = (BOOST::int16_t) GET_LE16( echo_buf + 2 ); // sign-extend
-		this->echo_ptr = echo_ptr;
-		
-		// put samples in history ring buffer
-		const int fir_offset = this->fir_offset;
-		short (*fir_pos) [2] = &fir_buf [fir_offset];
-		this->fir_offset = (fir_offset + 7) & 7; // move backwards one step
-		fir_pos [0] [0] = (short) fb_left;
-		fir_pos [0] [1] = (short) fb_right;
-		fir_pos [8] [0] = (short) fb_left; // duplicate at +8 eliminates wrap checking below
-		fir_pos [8] [1] = (short) fb_right;
-		
-		// FIR
-		fb_left =       fb_left * fir_coeff [7] +
-				fir_pos [1] [0] * fir_coeff [6] +
-				fir_pos [2] [0] * fir_coeff [5] +
-				fir_pos [3] [0] * fir_coeff [4] +
-				fir_pos [4] [0] * fir_coeff [3] +
-				fir_pos [5] [0] * fir_coeff [2] +
-				fir_pos [6] [0] * fir_coeff [1] +
-				fir_pos [7] [0] * fir_coeff [0];
-		
-		fb_right =     fb_right * fir_coeff [7] +
-				fir_pos [1] [1] * fir_coeff [6] +
-				fir_pos [2] [1] * fir_coeff [5] +
-				fir_pos [3] [1] * fir_coeff [4] +
-				fir_pos [4] [1] * fir_coeff [3] +
-				fir_pos [5] [1] * fir_coeff [2] +
-				fir_pos [6] [1] * fir_coeff [1] +
-				fir_pos [7] [1] * fir_coeff [0];
-		
-		left  += (fb_left  * g.left_echo_volume ) >> 14;
-		right += (fb_right * g.right_echo_volume) >> 14;
-		
-		// echo buffer feedback
-		if ( !(g.flags & 0x20) )
-		{
-			echol += (fb_left  * g.echo_feedback) >> 14;
-			echor += (fb_right * g.echo_feedback) >> 14;
-			SET_LE16( echo_buf    , clamp_16( echol ) );
-			SET_LE16( echo_buf + 2, clamp_16( echor ) );
-		}
-		
-		if ( out_buf )
-		{
-			// write final samples
-			
-			left  = clamp_16( left  );
-			right = clamp_16( right );
-			
-			int mute = g.flags & 0x40;
-			
-			out_buf [0] = (short) left;
-			out_buf [1] = (short) right;
-			out_buf += 2;
-			
-			// muting
-			if ( mute )
-			{
-				out_buf [-2] = 0;
-				out_buf [-1] = 0;
-			}
-		}
+		out  = m.extra;
+		size = extra_size;
 	}
+	m.out_begin = out;
+	m.out       = out;
+	m.out_end   = out + size;
 }
 
-// Base normal_gauss table is almost exactly (with an error of 0 or -1 for each entry):
-// int normal_gauss [512];
-// normal_gauss [i] = exp((i-511)*(i-511)*-9.975e-6)*pow(sin(0.00307096*i),1.7358)*1304.45
+// Volume registers and efb are signed! Easy to forget int8_t cast.
+// Prefixes are to avoid accidental use of locals with same names.
 
-// Interleved gauss table (to improve cache coherency).
-// gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i]
-const BOOST::int16_t Spc_Dsp::gauss [512] =
+// Interleved gauss table (to improve cache coherency)
+// interleved_gauss [i] = gauss [(i & 1) * 256 + 255 - (i >> 1 & 0xFF)]
+static short const interleved_gauss [512] =
 {
  370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
  339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
@@ -664,3 +126,579 @@
    0, 434,   0, 430,   0, 426,   0, 422,   0, 418,   0, 414,   0, 410,   0, 405,
    0, 401,   0, 397,   0, 393,   0, 389,   0, 385,   0, 381,   0, 378,   0, 374,
 };
+
+
+//// Counters
+
+#define RATE( rate, div )\
+	(rate >= div ? rate / div * 8 - 1 : rate - 1)
+
+static unsigned const counter_mask [32] =
+{
+	RATE(   2,2), RATE(2048,4), RATE(1536,3),
+	RATE(1280,5), RATE(1024,4), RATE( 768,3),
+	RATE( 640,5), RATE( 512,4), RATE( 384,3),
+	RATE( 320,5), RATE( 256,4), RATE( 192,3),
+	RATE( 160,5), RATE( 128,4), RATE(  96,3),
+	RATE(  80,5), RATE(  64,4), RATE(  48,3),
+	RATE(  40,5), RATE(  32,4), RATE(  24,3),
+	RATE(  20,5), RATE(  16,4), RATE(  12,3),
+	RATE(  10,5), RATE(   8,4), RATE(   6,3),
+	RATE(   5,5), RATE(   4,4), RATE(   3,3),
+	              RATE(   2,4),
+	              RATE(   1,4)
+};
+#undef RATE
+
+inline void Spc_Dsp::init_counter()
+{
+	// counters start out with this synchronization
+	m.counters [0] =     1;
+	m.counters [1] =     0;
+	m.counters [2] = -0x20u;
+	m.counters [3] =  0x0B;
+	
+	int n = 2;
+	for ( int i = 1; i < 32; i++ )
+	{
+		m.counter_select [i] = &m.counters [n];
+		if ( !--n )
+			n = 3;
+	}
+	m.counter_select [ 0] = &m.counters [0];
+	m.counter_select [30] = &m.counters [2];
+}
+
+inline void Spc_Dsp::run_counter( int i )
+{
+	int n = m.counters [i];
+	if ( !(n-- & 7) )
+		n -= 6 - i;
+	m.counters [i] = n;
+}
+
+#define READ_COUNTER( rate )\
+	(*m.counter_select [rate] & counter_mask [rate])
+
+
+//// Emulation
+
+void Spc_Dsp::run( int clock_count )
+{
+	int new_phase = m.phase + clock_count;
+	int count = new_phase >> 5;
+	m.phase = new_phase & 31;
+	if ( !count )
+		return;
+	
+	uint8_t* const ram = m.ram;
+	uint8_t const* const dir = &ram [REG(dir) * 0x100];
+	int const slow_gaussian = (REG(pmon) >> 1) | REG(non);
+	int const noise_rate = REG(flg) & 0x1F;
+	
+	// Global volume
+	int mvoll = (int8_t) REG(mvoll);
+	int mvolr = (int8_t) REG(mvolr);
+	if ( mvoll * mvolr < m.surround_threshold )
+		mvoll = -mvoll; // eliminate surround
+	
+	do
+	{
+		// KON/KOFF reading
+		if ( (m.every_other_sample ^= 1) != 0 )
+		{
+			m.new_kon &= ~m.kon;
+			m.kon    = m.new_kon;
+			m.t_koff = REG(koff); 
+		}
+		
+		run_counter( 1 );
+		run_counter( 2 );
+		run_counter( 3 );
+		
+		// Noise
+		if ( !READ_COUNTER( noise_rate ) )
+		{
+			int feedback = (m.noise << 13) ^ (m.noise << 14);
+			m.noise = (feedback & 0x4000) ^ (m.noise >> 1);
+		}
+		
+		// Voices
+		int pmon_input = 0;
+		int main_out_l = 0;
+		int main_out_r = 0;
+		int echo_out_l = 0;
+		int echo_out_r = 0;
+		voice_t* v = m.voices;
+		uint8_t* v_regs = m.regs;
+		int vbit = 1;
+		do
+		{
+			#define SAMPLE_PTR(i) GET_LE16A( &dir [VREG(v_regs,srcn) * 4 + i * 2] )
+			
+			int brr_header = ram [v->brr_addr];
+			int kon_delay = v->kon_delay;
+			
+			// Pitch
+			int pitch = GET_LE16A( &VREG(v_regs,pitchl) ) & 0x3FFF;
+			if ( REG(pmon) & vbit )
+				pitch += ((pmon_input >> 5) * pitch) >> 10;
+			
+			// KON phases
+			if ( --kon_delay >= 0 )
+			{
+				v->kon_delay = kon_delay;
+				
+				// Get ready to start BRR decoding on next sample
+				if ( kon_delay == 4 )
+				{
+					v->brr_addr   = SAMPLE_PTR( 0 );
+					v->brr_offset = 1;
+					v->buf_pos    = v->buf;
+					brr_header    = 0; // header is ignored on this sample
+				}
+				
+				// Envelope is never run during KON
+				v->env        = 0;
+				v->hidden_env = 0;
+				
+				// Disable BRR decoding until last three samples
+				v->interp_pos = (kon_delay & 3 ? 0x4000 : 0);
+				
+				// Pitch is never added during KON
+				pitch = 0;
+			}
+			
+			int env = v->env;
+			
+			// Gaussian interpolation
+			{
+				int output = 0;
+				VREG(v_regs,envx) = (uint8_t) (env >> 4);
+				if ( env )
+				{
+					// Make pointers into gaussian based on fractional position between samples
+					int offset = (unsigned) v->interp_pos >> 3 & 0x1FE;
+					short const* fwd = interleved_gauss       + offset;
+					short const* rev = interleved_gauss + 510 - offset; // mirror left half of gaussian
+					
+					int const* in = &v->buf_pos [(unsigned) v->interp_pos >> 12];
+					
+					if ( !(slow_gaussian & vbit) ) // 99%
+					{
+						// Faster approximation when exact sample value isn't necessary for pitch mod
+						output = (fwd [0] * in [0] +
+						          fwd [1] * in [1] +
+						          rev [1] * in [2] +
+						          rev [0] * in [3]) >> 11;
+						output = (output * env) >> 11;
+					}
+					else
+					{
+						output = (int16_t) (m.noise * 2);
+						if ( !(REG(non) & vbit) )
+						{
+							output  = (fwd [0] * in [0]) >> 11;
+							output += (fwd [1] * in [1]) >> 11;
+							output += (rev [1] * in [2]) >> 11;
+							output = (int16_t) output;
+							output += (rev [0] * in [3]) >> 11;
+							
+							CLAMP16( output );
+							output &= ~1;
+						}
+						output = (output * env) >> 11 & ~1;
+					}
+					
+					// Output
+					int l = output * v->volume [0];
+					int r = output * v->volume [1];
+					
+					main_out_l += l;
+					main_out_r += r;
+					
+					if ( REG(eon) & vbit )
+					{
+						echo_out_l += l;
+						echo_out_r += r;
+					}
+				}
+				
+				pmon_input = output;
+				VREG(v_regs,outx) = (uint8_t) (output >> 8);
+			}
+			
+			// Soft reset or end of sample
+			if ( REG(flg) & 0x80 || (brr_header & 3) == 1 )
+			{
+				v->env_mode = env_release;
+				env         = 0;
+			}
+			
+			if ( m.every_other_sample )
+			{
+				// KOFF
+				if ( m.t_koff & vbit )
+					v->env_mode = env_release;
+				
+				// KON
+				if ( m.kon & vbit )
+				{
+					v->kon_delay = 5;
+					v->env_mode  = env_attack;
+					REG(endx) &= ~vbit;
+				}
+			}
+			
+			// Envelope
+			if ( !v->kon_delay )
+			{
+				if ( v->env_mode == env_release ) // 97%
+				{
+					env -= 0x8;
+					v->env = env;
+					if ( env <= 0 )
+					{
+						v->env = 0;
+						goto skip_brr; // no BRR decoding for you!
+					}
+				}
+				else // 3%
+				{
+					int rate;
+					int const adsr0 = VREG(v_regs,adsr0);
+					int env_data = VREG(v_regs,adsr1);
+					if ( adsr0 >= 0x80 ) // 97% ADSR
+					{
+						if ( v->env_mode > env_decay ) // 89%
+						{
+							env--;
+							env -= env >> 8;
+							rate = env_data & 0x1F;
+							
+							// optimized handling
+							v->hidden_env = env;
+							if ( READ_COUNTER( rate ) )
+								goto exit_env;
+							v->env = env;
+							goto exit_env;
+						}
+						else if ( v->env_mode == env_decay )
+						{
+							env--;
+							env -= env >> 8;
+							rate = (adsr0 >> 3 & 0x0E) + 0x10;
+						}
+						else // env_attack
+						{
+							rate = (adsr0 & 0x0F) * 2 + 1;
+							env += rate < 31 ? 0x20 : 0x400;
+						}
+					}
+					else // GAIN
+					{
+						int mode;
+						env_data = VREG(v_regs,gain);
+						mode = env_data >> 5;
+						if ( mode < 4 ) // direct
+						{
+							env = env_data * 0x10;
+							rate = 31;
+						}
+						else
+						{
+							rate = env_data & 0x1F;
+							if ( mode == 4 ) // 4: linear decrease
+							{
+								env -= 0x20;
+							}
+							else if ( mode < 6 ) // 5: exponential decrease
+							{
+								env--;
+								env -= env >> 8;
+							}
+							else // 6,7: linear increase
+							{
+								env += 0x20;
+								if ( mode > 6 && (unsigned) v->hidden_env >= 0x600 )
+									env += 0x8 - 0x20; // 7: two-slope linear increase
+							}
+						}
+					}
+					
+					// Sustain level
+					if ( (env >> 8) == (env_data >> 5) && v->env_mode == env_decay )
+						v->env_mode = env_sustain;
+					
+					v->hidden_env = env;
+					
+					// unsigned cast because linear decrease going negative also triggers this
+					if ( (unsigned) env > 0x7FF )
+					{
+						env = (env < 0 ? 0 : 0x7FF);
+						if ( v->env_mode == env_attack )
+							v->env_mode = env_decay;
+					}
+					
+					if ( !READ_COUNTER( rate ) )
+						v->env = env; // nothing else is controlled by the counter
+				}
+			}
+		exit_env:
+			
+			{
+				// Apply pitch
+				int old_pos = v->interp_pos;
+				int interp_pos = (old_pos & 0x3FFF) + pitch;
+				if ( interp_pos > 0x7FFF )
+					interp_pos = 0x7FFF;
+				v->interp_pos = interp_pos;
+				
+				// BRR decode if necessary
+				if ( old_pos >= 0x4000 )
+				{
+					// Arrange the four input nybbles in 0xABCD order for easy decoding
+					int nybbles = ram [(v->brr_addr + v->brr_offset) & 0xFFFF] * 0x100 +
+							ram [(v->brr_addr + v->brr_offset + 1) & 0xFFFF];
+					
+					// Advance read position
+					int const brr_block_size = 9;
+					int brr_offset = v->brr_offset;
+					if ( (brr_offset += 2) >= brr_block_size )
+					{
+						// Next BRR block
+						int brr_addr = (v->brr_addr + brr_block_size) & 0xFFFF;
+						assert( brr_offset == brr_block_size );
+						if ( brr_header & 1 )
+						{
+							brr_addr = SAMPLE_PTR( 1 );
+							if ( !v->kon_delay )
+								REG(endx) |= vbit;
+						}
+						v->brr_addr = brr_addr;
+						brr_offset  = 1;
+					}
+					v->brr_offset = brr_offset;
+					
+					// Decode
+					
+					// 0: >>1  1: <<0  2: <<1 ... 12: <<11  13-15: >>4 <<11
+					static unsigned char const shifts [16 * 2] = {
+						13,12,12,12,12,12,12,12,12,12,12, 12, 12, 16, 16, 16,
+						 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
+					};
+					int const scale = brr_header >> 4;
+					int const right_shift = shifts [scale];
+					int const left_shift  = shifts [scale + 16];
+					
+					// Write to next four samples in circular buffer
+					int* pos = v->buf_pos;
+					int* end;
+					
+					// Decode four samples
+					for ( end = pos + 4; pos < end; pos++, nybbles <<= 4 )
+					{
+						// Extract upper nybble and scale appropriately
+						int s = ((int16_t) nybbles >> right_shift) << left_shift;
+						
+						// Apply IIR filter (8 is the most commonly used)
+						int const filter = brr_header & 0x0C;
+						int const p1 = pos [brr_buf_size - 1];
+						int const p2 = pos [brr_buf_size - 2] >> 1;
+						if ( filter >= 8 )
+						{
+							s += p1;
+							s -= p2;
+							if ( filter == 8 ) // s += p1 * 0.953125 - p2 * 0.46875
+							{
+								s += p2 >> 4;
+								s += (p1 * -3) >> 6;
+							}
+							else // s += p1 * 0.8984375 - p2 * 0.40625
+							{
+								s += (p1 * -13) >> 7;
+								s += (p2 * 3) >> 4;
+							}
+						}
+						else if ( filter ) // s += p1 * 0.46875
+						{
+							s += p1 >> 1;
+							s += (-p1) >> 5;
+						}
+						
+						// Adjust and write sample
+						CLAMP16( s );
+						s = (int16_t) (s * 2);
+						pos [brr_buf_size] = pos [0] = s; // second copy simplifies wrap-around
+					}
+					
+					if ( pos >= &v->buf [brr_buf_size] )
+						pos = v->buf;
+					v->buf_pos = pos;
+				}
+			}
+skip_brr:
+			// Next voice
+			vbit <<= 1;
+			v_regs += 0x10;
+			v++;
+		}
+		while ( vbit < 0x100 );
+		
+		// Echo position
+		int echo_offset = m.echo_offset;
+		uint8_t* const echo_ptr = &ram [(REG(esa) * 0x100 + echo_offset) & 0xFFFF];
+		if ( !echo_offset )
+			m.echo_length = (REG(edl) & 0x0F) * 0x800;
+		echo_offset += 4;
+		if ( echo_offset >= m.echo_length )
+			echo_offset = 0;
+		m.echo_offset = echo_offset;
+		
+		// FIR
+		int echo_in_l = GET_LE16SA( echo_ptr + 0 );
+		int echo_in_r = GET_LE16SA( echo_ptr + 2 );
+		
+		int (*echo_hist_pos) [2] = m.echo_hist_pos;
+		if ( ++echo_hist_pos >= &m.echo_hist [echo_hist_size] )
+			echo_hist_pos = m.echo_hist;
+		m.echo_hist_pos = echo_hist_pos;
+		
+		echo_hist_pos [0] [0] = echo_hist_pos [8] [0] = echo_in_l;
+		echo_hist_pos [0] [1] = echo_hist_pos [8] [1] = echo_in_r;
+		
+		#define CALC_FIR_( i, in )  ((in) * (int8_t) REG(fir + i * 0x10))
+		echo_in_l = CALC_FIR_( 7, echo_in_l );
+		echo_in_r = CALC_FIR_( 7, echo_in_r );
+		
+		#define CALC_FIR( i, ch )   CALC_FIR_( i, echo_hist_pos [i + 1] [ch] )
+		#define DO_FIR( i )\
+			echo_in_l += CALC_FIR( i, 0 );\
+			echo_in_r += CALC_FIR( i, 1 );
+		DO_FIR( 0 );
+		DO_FIR( 1 );
+		DO_FIR( 2 );
+		#if defined (__MWERKS__) && __MWERKS__ < 0x3200
+			__eieio(); // keeps compiler from stupidly "caching" things in memory
+		#endif
+		DO_FIR( 3 );
+		DO_FIR( 4 );
+		DO_FIR( 5 );
+		DO_FIR( 6 );
+		
+		// Echo out
+		if ( !(REG(flg) & 0x20) )
+		{
+			int l = (echo_out_l >> 7) + ((echo_in_l * (int8_t) REG(efb)) >> 14);
+			int r = (echo_out_r >> 7) + ((echo_in_r * (int8_t) REG(efb)) >> 14);
+			
+			// just to help pass more validation tests
+			#if SPC_MORE_ACCURACY
+				l &= ~1;
+				r &= ~1;
+			#endif
+			
+			CLAMP16( l );
+			CLAMP16( r );
+			
+			SET_LE16A( echo_ptr + 0, l );
+			SET_LE16A( echo_ptr + 2, r );
+		}
+		
+		// Sound out
+		int l = (((main_out_l * mvoll + echo_in_l * (int8_t) REG(evoll)) >> 14) * m.gain) >> 8;
+		int r = (((main_out_r * mvolr + echo_in_r * (int8_t) REG(evolr)) >> 14) * m.gain) >> 8;
+		
+		CLAMP16( l );
+		CLAMP16( r );
+		
+		if ( (REG(flg) & 0x40) )
+		{
+			l = 0;
+			r = 0;
+		}
+		
+		sample_t* out = m.out;
+		WRITE_SAMPLES( l, r, out );
+		m.out = out;
+	}
+	while ( --count );
+}
+
+
+//// Setup
+
+void Spc_Dsp::mute_voices( int mask )
+{
+	m.mute_mask = mask;
+	for ( int i = 0; i < voice_count; i++ )
+	{
+		m.voices [i].enabled = (mask >> i & 1) - 1;
+		update_voice_vol( i * 0x10 );
+	}
+}
+
+void Spc_Dsp::init( void* ram_64k )
+{
+	m.ram = (uint8_t*) ram_64k;
+	set_gain( gain_unit );
+	mute_voices( 0 );
+	disable_surround( false );
+	set_output( 0, 0 );
+	reset();
+	
+	#ifndef NDEBUG
+		// be sure this sign-extends
+		assert( (int16_t) 0x8000 == -0x8000 );
+		
+		// be sure right shift preserves sign
+		assert( (-1 >> 1) == -1 );
+		
+		// check clamp macro
+		int i;
+		i = +0x8000; CLAMP16( i ); assert( i == +0x7FFF );
+		i = -0x8001; CLAMP16( i ); assert( i == -0x8000 );
+		
+		blargg_verify_byte_order();
+	#endif
+}
+
+void Spc_Dsp::soft_reset_common()
+{
+	require( m.ram ); // init() must have been called already
+	
+	m.noise              = 0x4000;
+	m.echo_hist_pos      = m.echo_hist;
+	m.every_other_sample = 1;
+	m.echo_offset        = 0;
+	m.phase              = 0;
+	
+	init_counter();
+}
+
+void Spc_Dsp::soft_reset()
+{
+	REG(flg) = 0xE0;
+	soft_reset_common();
+}
+
+void Spc_Dsp::load( uint8_t const regs [register_count] )
+{
+	memcpy( m.regs, regs, sizeof m.regs );
+	memset( &m.regs [register_count], 0, offsetof (state_t,ram) - register_count );
+	
+	// Internal state
+	int i;
+	for ( i = voice_count; --i >= 0; )
+	{
+		voice_t& v = m.voices [i];
+		v.brr_offset = 1;
+		v.buf_pos    = v.buf;
+	}
+	m.new_kon = REG(kon);
+	
+	mute_voices( m.mute_mask );
+	soft_reset_common();
+}
+
+void Spc_Dsp::reset() { load( initial_regs ); }
--- a/src/console/Spc_Dsp.h	Sat May 12 17:06:13 2007 -0700
+++ b/src/console/Spc_Dsp.h	Tue May 15 13:18:35 2007 -0700
@@ -1,152 +1,218 @@
-// Super Nintendo (SNES) SPC DSP emulator
+// Fast SNES SPC-700 DSP emulator (about 3x speed of accurate one)
 
-// Game_Music_Emu 0.5.2
+// snes_spc 0.9.0
 #ifndef SPC_DSP_H
 #define SPC_DSP_H
 
 #include "blargg_common.h"
 
 class Spc_Dsp {
-	typedef BOOST::int8_t int8_t;
+public:
 	typedef BOOST::uint8_t uint8_t;
-public:
+	
+// Setup
+	
+	// Initializes DSP and has it use the 64K RAM provided
+	void init( void* ram_64k );
+
+	// Sets destination for output samples. If out is NULL or out_size is 0,
+	// doesn't generate any.
+	typedef short sample_t;
+	void set_output( sample_t* out, int out_size );
+
+	// Number of samples written to output since it was last set, always
+	// a multiple of 2. Undefined if more samples were generated than
+	// output buffer could hold.
+	int sample_count() const;
+
+// Emulation
 	
-	// Keeps pointer to 64K ram
-	Spc_Dsp( uint8_t* ram );
+	// Resets DSP to power-on state
+	void reset();
+
+	// Emulates pressing reset switch on SNES
+	void soft_reset();
 	
-	// Mute voice n if bit n (1 << n) of mask is clear.
+	// Reads/writes DSP registers. For accuracy, you must first call spc_run_dsp()
+	// to catch the DSP up to present.
+	int  read ( int addr ) const;
+	void write( int addr, int data );
+
+	// Runs DSP for specified number of clocks (~1024000 per second). Every 32 clocks
+	// a pair of samples is be generated.
+	void run( int clock_count );
+
+// Sound control
+
+	// Mutes voices corresponding to non-zero bits in mask (overrides VxVOL with 0).
+	// Reduces emulation accuracy.
 	enum { voice_count = 8 };
 	void mute_voices( int mask );
-	
-	// Clear state and silence everything.
-	void reset();
-	
-	// Set gain, where 1.0 is normal. When greater than 1.0, output is clamped to
-	// the 16-bit sample range.
-	void set_gain( double );
+
+	// If true, prevents channels and global volumes from being phase-negated
+	void disable_surround( bool disable = true );
 	
-	// If true, prevent channels and global volumes from being phase-negated
-	void disable_surround( bool disable );
+	enum { gain_unit = 0x100 };
+	void set_gain( int gain );
 	
-	// Read/write register 'n', where n ranges from 0 to register_count - 1.
-	enum { register_count = 128 };
-	int  read ( int n );
-	void write( int n, int );
+// State
 	
-	// Run DSP for 'count' samples. Write resulting samples to 'buf' if not NULL.
-	void run( long count, short* buf = NULL );
-	
-	
-// End of public interface
-private:
+	// Resets DSP and uses supplied values to initialize registers
+	enum { register_count = 128 };
+	void load( uint8_t const regs [register_count] );
+
+// DSP register addresses
+
+	// Global registers
+	enum {
+	    r_mvoll = 0x0C, r_mvolr = 0x1C,
+	    r_evoll = 0x2C, r_evolr = 0x3C,
+	    r_kon   = 0x4C, r_koff  = 0x5C,
+	    r_flg   = 0x6C, r_endx  = 0x7C,
+	    r_efb   = 0x0D, r_pmon  = 0x2D,
+	    r_non   = 0x3D, r_eon   = 0x4D,
+	    r_dir   = 0x5D, r_esa   = 0x6D,
+	    r_edl   = 0x7D,
+	    r_fir   = 0x0F // 8 coefficients at 0x0F, 0x1F ... 0x7F
+	};
+
+	// Voice registers
+	enum {
+		v_voll   = 0x00, v_volr   = 0x01,
+		v_pitchl = 0x02, v_pitchh = 0x03,
+		v_srcn   = 0x04, v_adsr0  = 0x05,
+		v_adsr1  = 0x06, v_gain   = 0x07,
+		v_envx   = 0x08, v_outx   = 0x09
+	};
+
+public:
+	enum { extra_size = 16 };
+	sample_t* extra()               { return m.extra; }
+	sample_t const* out_pos() const { return m.out; }
+public:
+	BLARGG_DISABLE_NOTHROW
 	
-	struct raw_voice_t {
-		int8_t  left_vol;
-		int8_t  right_vol;
-		uint8_t rate [2];
-		uint8_t waveform;
-		uint8_t adsr [2];   // envelope rates for attack, decay, and sustain
-		uint8_t gain;       // envelope gain (if not using ADSR)
-		int8_t  envx;       // current envelope level
-		int8_t  outx;       // current sample
-		int8_t  unused [6];
-	};
+	typedef BOOST::int8_t   int8_t;
+	typedef BOOST::int16_t int16_t;
+	
+	enum { echo_hist_size = 8 };
 	
-	struct globals_t {
-		int8_t  unused1 [12];
-		int8_t  left_volume;        // 0C   Main Volume Left (-.7)
-		int8_t  echo_feedback;      // 0D   Echo Feedback (-.7)
-		int8_t  unused2 [14];
-		int8_t  right_volume;       // 1C   Main Volume Right (-.7)
-		int8_t  unused3 [15];
-		int8_t  left_echo_volume;   // 2C   Echo Volume Left (-.7)
-		uint8_t pitch_mods;         // 2D   Pitch Modulation on/off for each voice
-		int8_t  unused4 [14];
-		int8_t  right_echo_volume;  // 3C   Echo Volume Right (-.7)
-		uint8_t noise_enables;      // 3D   Noise output on/off for each voice
-		int8_t  unused5 [14];
-		uint8_t key_ons;            // 4C   Key On for each voice
-		uint8_t echo_ons;           // 4D   Echo on/off for each voice
-		int8_t  unused6 [14];
-		uint8_t key_offs;           // 5C   key off for each voice (instantiates release mode)
-		uint8_t wave_page;          // 5D   source directory (wave table offsets)
-		int8_t  unused7 [14];
-		uint8_t flags;              // 6C   flags and noise freq
-		uint8_t echo_page;          // 6D
-		int8_t  unused8 [14];
-		uint8_t wave_ended;         // 7C
-		uint8_t echo_delay;         // 7D   ms >> 4
-		char    unused9 [2];
+	enum env_mode_t { env_release, env_attack, env_decay, env_sustain };
+	enum { brr_buf_size = 12 };
+	struct voice_t
+	{
+		int buf [brr_buf_size*2];// decoded samples (twice the size to simplify wrap handling)
+		int* buf_pos;           // place in buffer where next samples will be decoded
+		int interp_pos;         // relative fractional position in sample (0x1000 = 1.0)
+		int brr_addr;           // address of current BRR block
+		int brr_offset;         // current decoding offset in BRR block
+		int kon_delay;          // KON delay/current setup phase
+		env_mode_t env_mode;
+		int env;                // current envelope level
+		int hidden_env;         // used by GAIN mode 7, very obscure quirk
+		int volume [2];         // copy of volume from DSP registers, with surround disabled
+		int enabled;            // -1 if enabled, 0 if muted
 	};
-	
-	union {
-		raw_voice_t voice [voice_count];
-		uint8_t reg [register_count];
-		globals_t g;
+private:
+	struct state_t
+	{
+		uint8_t regs [register_count];
+		
+		// Echo history keeps most recent 8 samples (twice the size to simplify wrap handling)
+		int echo_hist [echo_hist_size * 2] [2];
+		int (*echo_hist_pos) [2]; // &echo_hist [0 to 7]
+		
+		int every_other_sample; // toggles every sample
+		int kon;                // KON value when last checked
+		int noise;
+		int echo_offset;        // offset from ESA in echo buffer
+		int echo_length;        // number of bytes that echo_offset will stop at
+		int phase;              // next clock cycle to run (0-31)
+		unsigned counters [4];
+		
+		int new_kon;
+		int t_koff;
+		
+		voice_t voices [voice_count];
+		
+		unsigned* counter_select [32];
+		
+		// non-emulation state
+		uint8_t* ram; // 64K shared RAM between DSP and SMP
+		int mute_mask;
+		int gain;
+		int surround_threshold;
+		sample_t* out;
+		sample_t* out_end;
+		sample_t* out_begin;
+		sample_t extra [extra_size];
 	};
-	
-	uint8_t* const ram;
-	
-	// Cache of echo FIR values for faster access
-	short fir_coeff [voice_count];
-	
-	// fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code
-	short fir_buf [16] [2];
-	int fir_offset; // (0 to 7)
+	state_t m;
 	
-	enum { emu_gain_bits = 8 };
-	int emu_gain;
-	
-	int keyed_on; // 8-bits for 8 voices
-	int keys;
-	
-	int echo_ptr;
-	int noise_amp;
-	int noise;
-	int noise_count;
-	
-	int surround_threshold;
-	
-	static BOOST::int16_t const gauss [];
-	
-	enum state_t {
-		state_attack,
-		state_decay,
-		state_sustain,
-		state_release
-	};
-	
-	struct voice_t {
-		short volume [2];
-		short fraction;// 12-bit fractional position
-		short interp3; // most recent four decoded samples
-		short interp2;
-		short interp1;
-		short interp0;
-		short block_remain; // number of nybbles remaining in current block
-		unsigned short addr;
-		short block_header; // header byte from current block
-		short envcnt;
-		short envx;
-		short on_cnt;
-		short enabled; // 7 if enabled, 31 if disabled
-		short envstate;
-		short unused; // pad to power of 2
-	};
-	
-	voice_t voice_state [voice_count];
-	
-	int clock_envelope( int );
+	void init_counter();
+	void run_counter( int );
+	void soft_reset_common();
+	void write_outline( int addr, int data );
+	void update_voice_vol( int addr );
 };
 
-inline void Spc_Dsp::disable_surround( bool disable ) { surround_threshold = disable ? 0 : -0x7FFF; }
+#include <assert.h>
+
+inline int Spc_Dsp::sample_count() const { return m.out - m.out_begin; }
 
-inline void Spc_Dsp::set_gain( double v ) { emu_gain = (int) (v * (1 << emu_gain_bits)); }
+inline int Spc_Dsp::read( int addr ) const
+{
+	assert( (unsigned) addr < register_count );
+	return m.regs [addr];
+}
 
-inline int Spc_Dsp::read( int i )
+inline void Spc_Dsp::update_voice_vol( int addr )
 {
-	assert( (unsigned) i < register_count );
-	return reg [i];
+	int l = (int8_t) m.regs [addr + v_voll];
+	int r = (int8_t) m.regs [addr + v_volr];
+	
+	if ( l * r < m.surround_threshold )
+	{
+		// signs differ, so negate those that are negative
+		l ^= l >> 7;
+		r ^= r >> 7;
+	}
+	
+	voice_t& v = m.voices [addr >> 4];
+	int enabled = v.enabled;
+	v.volume [0] = l & enabled;
+	v.volume [1] = r & enabled;
 }
 
+inline void Spc_Dsp::write( int addr, int data )
+{
+	assert( (unsigned) addr < register_count );
+	
+	m.regs [addr] = (uint8_t) data;
+	int low = addr & 0x0F;
+	if ( low < 0x2 ) // voice volumes
+	{
+		update_voice_vol( low ^ addr );
+	}
+	else if ( low == 0xC )
+	{
+		if ( addr == r_kon )
+			m.new_kon = (uint8_t) data;
+		
+		if ( addr == r_endx ) // always cleared, regardless of data written
+			m.regs [r_endx] = 0;
+	}
+}
+
+inline void Spc_Dsp::set_gain( int gain ) { m.gain = gain; }
+
+inline void Spc_Dsp::disable_surround( bool disable )
+{
+	m.surround_threshold = disable ? 0 : -0x4000;
+}
+
+#define SPC_NO_COPY_STATE_FUNCS 1
+
+#define SPC_LESS_ACCURATE 1
+
 #endif
--- a/src/console/Spc_Emu.cxx	Sat May 12 17:06:13 2007 -0700
+++ b/src/console/Spc_Emu.cxx	Tue May 15 13:18:35 2007 -0700
@@ -216,7 +216,7 @@
 	blargg_err_t load_( Data_Reader& in )
 	{
 		long file_size = in.remain();
-		if ( file_size < Snes_Spc::spc_file_size )
+		if ( file_size < Snes_Spc::spc_min_file_size )
 			return gme_wrong_file_type;
 		RETURN_ERR( in.read( &header, Spc_Emu::header_size ) );
 		RETURN_ERR( check_spc_header( header.tag ) );
@@ -247,7 +247,8 @@
 
 blargg_err_t Spc_Emu::set_sample_rate_( long sample_rate )
 {
-	apu.set_gain( gain() );
+	RETURN_ERR( apu.init() );
+	apu.set_gain( (int) (gain() * Snes_Spc::gain_unit) );
 	if ( sample_rate != native_sample_rate )
 	{
 		RETURN_ERR( resampler.buffer_size( native_sample_rate / 20 * 2 ) );
@@ -268,14 +269,14 @@
 	file_data = in;
 	file_size = size;
 	set_voice_count( Snes_Spc::voice_count );
-	if ( size < Snes_Spc::spc_file_size )
+	if ( size < Snes_Spc::spc_min_file_size )
 		return gme_wrong_file_type;
 	return check_spc_header( in );
 }
 
 // Emulation
 
-void Spc_Emu::set_tempo_( double t ) { apu.set_tempo( t ); }
+void Spc_Emu::set_tempo_( double t ) { apu.set_tempo( (int) (t * Snes_Spc::tempo_unit) ); }
 
 blargg_err_t Spc_Emu::start_track_( int track )
 {
@@ -293,6 +294,9 @@
 		count = long (count * resampler.ratio()) & ~1;
 		count -= resampler.skip_input( count );
 	}
+	
+	// TODO: shouldn't skip be adjusted for the 64 samples read afterwards?
+	
 	if ( count > 0 )
 		RETURN_ERR( apu.skip( count ) );
 	
--- a/src/console/Spc_Emu.h	Sat May 12 17:06:13 2007 -0700
+++ b/src/console/Spc_Emu.h	Tue May 15 13:18:35 2007 -0700
@@ -31,7 +31,7 @@
 		byte date [11];
 		byte len_secs [3];
 		byte fade_msec [4];
-		char author [32];
+		char author [32]; // sometimes first char should be skipped (see official SPC spec)
 		byte mute_mask;
 		byte emulator;
 		byte unused2 [46];