view liba52/imdct_mlib.c @ 4689:61f4b8fd380e

Fixing "quake" by direct waiting of vsync. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (I don't why - but SMART_SWITCH is always disabled on my card) Benchmarks: [SRC] VIDEO: [DIV3] 624x356 24bpp 24.00 fps 497.3 kbps (60.7 kbyte/s) [DEST] 1024x768@32 70fps (-xvidix -fs -zoom) -vc ffdivx -double: BENCHMARKs: V: 3.838s VO: 7.305s A: 0.555s Sys: 18.264s = 29.962s BENCHMARK%: V: 12.8110% VO: 24.3808% A: 1.8518% Sys: 60.9564% = 100.0000% total video time: 11.143s -vc ffdivx -nodouble: BENCHMARKs: V: 3.846s VO: 1.668s A: 0.539s Sys: 23.869s = 29.922s BENCHMARK%: V: 12.8525% VO: 5.5744% A: 1.8015% Sys: 79.7716% = 100.0000% total video time: 5.514s -vc divxds -double (direct rendering) BENCHMARKs: V: 8.275s VO: 5.750s A: 0.532s Sys: 15.414s = 29.971s BENCHMARK%: V: 27.6115% VO: 19.1850% A: 1.7737% Sys: 51.4298% = 100.0000% total video time: 14.070s -vc divxds -nodouble (direct rendering) BENCHMARKs: V: 7.353s VO: 0.002s A: 0.521s Sys: 22.083s = 29.958s BENCHMARK%: V: 24.5433% VO: 0.0052% A: 1.7382% Sys: 73.7133% = 100.0000% total video time: 7.355s Unfortunately we have dramatic lost of performance (100%) :(
author nick
date Wed, 13 Feb 2002 08:24:13 +0000
parents a4721884eaf5
children 07f1e7669772
line wrap: on
line source

/*
 * imdct_mlib.c
 * Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org>
 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
 *
 * This file is part of a52dec, a free ATSC A-52 stream decoder.
 * See http://liba52.sourceforge.net/ for updates.
 *
 * a52dec is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * a52dec is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "config.h"

#ifdef LIBA52_MLIB

#include <mlib_types.h>
#include <mlib_status.h>
#include <mlib_signal.h>
#include <string.h>
#include <inttypes.h>

#include "a52.h"
#include "a52_internal.h"

extern sample_t imdct_window[];

void
imdct_do_512_mlib(sample_t data[], sample_t delay[], sample_t bias)
{
	sample_t *buf_real;
	sample_t *buf_imag;
	sample_t *data_ptr;
	sample_t *delay_ptr;
	sample_t *window_ptr;
	sample_t tmp[256] __attribute__((aligned(16)));
	int i;
	
	memcpy(tmp, data, 256 * sizeof(sample_t));
	mlib_SignalIMDCT_F32(tmp);
  
	buf_real = tmp;
	buf_imag = tmp + 128;
	data_ptr = data;
	delay_ptr = delay;
	window_ptr = imdct_window;

	/* Window and convert to real valued signal */
	for(i=0; i< 64; i++) 
	{ 
		*data_ptr++ = -buf_imag[64+i]   * *window_ptr++ + *delay_ptr++ + bias; 
		*data_ptr++ =  buf_real[64-i-1] * *window_ptr++ + *delay_ptr++ + bias; 
	}

	for(i=0; i< 64; i++) 
	{ 
		*data_ptr++ = -buf_real[i]       * *window_ptr++ + *delay_ptr++ + bias; 
		*data_ptr++ =  buf_imag[128-i-1] * *window_ptr++ + *delay_ptr++ + bias; 
	}
	
	/* The trailing edge of the window goes into the delay line */
	delay_ptr = delay;

	for(i=0; i< 64; i++) 
	{ 
		*delay_ptr++ = -buf_real[64+i]   * *--window_ptr; 
		*delay_ptr++ =  buf_imag[64-i-1] * *--window_ptr; 
	}

	for(i=0; i<64; i++) 
	{
		*delay_ptr++ =  buf_imag[i]       * *--window_ptr; 
		*delay_ptr++ = -buf_real[128-i-1] * *--window_ptr; 
	}
}

void
imdct_do_256_mlib(sample_t data[], sample_t delay[], sample_t bias)
{
	sample_t *buf1_real, *buf1_imag;
	sample_t *buf2_real, *buf2_imag;
	sample_t *data_ptr;
	sample_t *delay_ptr;
	sample_t *window_ptr;
	sample_t tmp[256] __attribute__((aligned(16)));
	int i;
	
	memcpy(tmp, data, 256 * sizeof(sample_t));
	mlib_SignalIMDCTSplit_F32(tmp);
  
	buf1_real = tmp;
	buf1_imag = tmp + 128 + 64;
	buf2_real = tmp + 64;
	buf2_imag = tmp + 128;
	data_ptr = data;
	delay_ptr = delay;
	window_ptr = imdct_window;

	/* Window and convert to real valued signal */
	for(i=0; i< 64; i++) 
	{
		*data_ptr++ = -buf1_imag[i]      * *window_ptr++ + *delay_ptr++ + bias;
		*data_ptr++ =  buf1_real[64-i-1] * *window_ptr++ + *delay_ptr++ + bias;
	}

	for(i=0; i< 64; i++) 
	{
		*data_ptr++ = -buf1_real[i]      * *window_ptr++ + *delay_ptr++ + bias;
		*data_ptr++ =  buf1_imag[64-i-1] * *window_ptr++ + *delay_ptr++ + bias;
	}
	
	delay_ptr = delay;

	for(i=0; i< 64; i++) 
	{
		*delay_ptr++ = -buf2_real[i]      * *--window_ptr;
		*delay_ptr++ =  buf2_imag[64-i-1] * *--window_ptr;
	}

	for(i=0; i< 64; i++) 
	{
		*delay_ptr++ =  buf2_imag[i]      * *--window_ptr;
		*delay_ptr++ = -buf2_real[64-i-1] * *--window_ptr;
	}
}

#endif