Mercurial > mplayer.hg
view mp3lib/dct64.c @ 28992:947ef23ba798
Test if create_vdp_decoder() might succeed by calling it from config()
with a small value for max_reference_frames.
This does not make automatic recovery by using software decoder possible,
but lets MPlayer fail more graciously on - actually existing - buggy
hardware that does not support certain H264 widths when using
hardware accelerated decoding (784, 864, 944, 1024, 1808, 1888 pixels on
NVIDIA G98) and if the user tries to hardware-decode more samples at
the same time than supported.
Might break playback of H264 Intra-Only samples on hardware with very
little video memory.
author | cehoyos |
---|---|
date | Sat, 21 Mar 2009 20:11:05 +0000 |
parents | 0783dd397f74 |
children | 32725ca88fed |
line wrap: on
line source
/* * Modified for use with MPlayer, for details see the changelog at * http://svn.mplayerhq.hu/mplayer/trunk/ * $Id$ */ /* * Discrete Cosine Tansform (DCT) for subband synthesis * optimized for machines with no auto-increment. * The performance is highly compiler dependend. Maybe * the dct64.c version for 'normal' processor may be faster * even for Intel processors. */ static void dct64_1(real *out0,real *out1,real *b1,real *b2,real *samples) { { register real *costab = mp3lib_pnts[0]; b1[0x00] = samples[0x00] + samples[0x1F]; b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0]; b1[0x01] = samples[0x01] + samples[0x1E]; b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1]; b1[0x02] = samples[0x02] + samples[0x1D]; b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2]; b1[0x03] = samples[0x03] + samples[0x1C]; b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3]; b1[0x04] = samples[0x04] + samples[0x1B]; b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4]; b1[0x05] = samples[0x05] + samples[0x1A]; b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5]; b1[0x06] = samples[0x06] + samples[0x19]; b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6]; b1[0x07] = samples[0x07] + samples[0x18]; b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7]; b1[0x08] = samples[0x08] + samples[0x17]; b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8]; b1[0x09] = samples[0x09] + samples[0x16]; b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9]; b1[0x0A] = samples[0x0A] + samples[0x15]; b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA]; b1[0x0B] = samples[0x0B] + samples[0x14]; b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB]; b1[0x0C] = samples[0x0C] + samples[0x13]; b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC]; b1[0x0D] = samples[0x0D] + samples[0x12]; b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD]; b1[0x0E] = samples[0x0E] + samples[0x11]; b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE]; b1[0x0F] = samples[0x0F] + samples[0x10]; b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF]; } { register real *costab = mp3lib_pnts[1]; b2[0x00] = b1[0x00] + b1[0x0F]; b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0]; b2[0x01] = b1[0x01] + b1[0x0E]; b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1]; b2[0x02] = b1[0x02] + b1[0x0D]; b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2]; b2[0x03] = b1[0x03] + b1[0x0C]; b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3]; b2[0x04] = b1[0x04] + b1[0x0B]; b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4]; b2[0x05] = b1[0x05] + b1[0x0A]; b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5]; b2[0x06] = b1[0x06] + b1[0x09]; b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6]; b2[0x07] = b1[0x07] + b1[0x08]; b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7]; b2[0x10] = b1[0x10] + b1[0x1F]; b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0]; b2[0x11] = b1[0x11] + b1[0x1E]; b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1]; b2[0x12] = b1[0x12] + b1[0x1D]; b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2]; b2[0x13] = b1[0x13] + b1[0x1C]; b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3]; b2[0x14] = b1[0x14] + b1[0x1B]; b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4]; b2[0x15] = b1[0x15] + b1[0x1A]; b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5]; b2[0x16] = b1[0x16] + b1[0x19]; b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6]; b2[0x17] = b1[0x17] + b1[0x18]; b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7]; } { register real *costab = mp3lib_pnts[2]; b1[0x00] = b2[0x00] + b2[0x07]; b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0]; b1[0x01] = b2[0x01] + b2[0x06]; b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1]; b1[0x02] = b2[0x02] + b2[0x05]; b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2]; b1[0x03] = b2[0x03] + b2[0x04]; b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3]; b1[0x08] = b2[0x08] + b2[0x0F]; b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0]; b1[0x09] = b2[0x09] + b2[0x0E]; b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1]; b1[0x0A] = b2[0x0A] + b2[0x0D]; b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2]; b1[0x0B] = b2[0x0B] + b2[0x0C]; b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3]; b1[0x10] = b2[0x10] + b2[0x17]; b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0]; b1[0x11] = b2[0x11] + b2[0x16]; b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1]; b1[0x12] = b2[0x12] + b2[0x15]; b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2]; b1[0x13] = b2[0x13] + b2[0x14]; b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3]; b1[0x18] = b2[0x18] + b2[0x1F]; b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0]; b1[0x19] = b2[0x19] + b2[0x1E]; b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1]; b1[0x1A] = b2[0x1A] + b2[0x1D]; b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2]; b1[0x1B] = b2[0x1B] + b2[0x1C]; b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3]; } { register real const cos0 = mp3lib_pnts[3][0]; register real const cos1 = mp3lib_pnts[3][1]; b2[0x00] = b1[0x00] + b1[0x03]; b2[0x03] = (b1[0x00] - b1[0x03]) * cos0; b2[0x01] = b1[0x01] + b1[0x02]; b2[0x02] = (b1[0x01] - b1[0x02]) * cos1; b2[0x04] = b1[0x04] + b1[0x07]; b2[0x07] = (b1[0x07] - b1[0x04]) * cos0; b2[0x05] = b1[0x05] + b1[0x06]; b2[0x06] = (b1[0x06] - b1[0x05]) * cos1; b2[0x08] = b1[0x08] + b1[0x0B]; b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0; b2[0x09] = b1[0x09] + b1[0x0A]; b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1; b2[0x0C] = b1[0x0C] + b1[0x0F]; b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0; b2[0x0D] = b1[0x0D] + b1[0x0E]; b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1; b2[0x10] = b1[0x10] + b1[0x13]; b2[0x13] = (b1[0x10] - b1[0x13]) * cos0; b2[0x11] = b1[0x11] + b1[0x12]; b2[0x12] = (b1[0x11] - b1[0x12]) * cos1; b2[0x14] = b1[0x14] + b1[0x17]; b2[0x17] = (b1[0x17] - b1[0x14]) * cos0; b2[0x15] = b1[0x15] + b1[0x16]; b2[0x16] = (b1[0x16] - b1[0x15]) * cos1; b2[0x18] = b1[0x18] + b1[0x1B]; b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0; b2[0x19] = b1[0x19] + b1[0x1A]; b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1; b2[0x1C] = b1[0x1C] + b1[0x1F]; b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0; b2[0x1D] = b1[0x1D] + b1[0x1E]; b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1; } { register real const cos0 = mp3lib_pnts[4][0]; b1[0x00] = b2[0x00] + b2[0x01]; b1[0x01] = (b2[0x00] - b2[0x01]) * cos0; b1[0x02] = b2[0x02] + b2[0x03]; b1[0x03] = (b2[0x03] - b2[0x02]) * cos0; b1[0x02] += b1[0x03]; b1[0x04] = b2[0x04] + b2[0x05]; b1[0x05] = (b2[0x04] - b2[0x05]) * cos0; b1[0x06] = b2[0x06] + b2[0x07]; b1[0x07] = (b2[0x07] - b2[0x06]) * cos0; b1[0x06] += b1[0x07]; b1[0x04] += b1[0x06]; b1[0x06] += b1[0x05]; b1[0x05] += b1[0x07]; b1[0x08] = b2[0x08] + b2[0x09]; b1[0x09] = (b2[0x08] - b2[0x09]) * cos0; b1[0x0A] = b2[0x0A] + b2[0x0B]; b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0; b1[0x0A] += b1[0x0B]; b1[0x0C] = b2[0x0C] + b2[0x0D]; b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0; b1[0x0E] = b2[0x0E] + b2[0x0F]; b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0; b1[0x0E] += b1[0x0F]; b1[0x0C] += b1[0x0E]; b1[0x0E] += b1[0x0D]; b1[0x0D] += b1[0x0F]; b1[0x10] = b2[0x10] + b2[0x11]; b1[0x11] = (b2[0x10] - b2[0x11]) * cos0; b1[0x12] = b2[0x12] + b2[0x13]; b1[0x13] = (b2[0x13] - b2[0x12]) * cos0; b1[0x12] += b1[0x13]; b1[0x14] = b2[0x14] + b2[0x15]; b1[0x15] = (b2[0x14] - b2[0x15]) * cos0; b1[0x16] = b2[0x16] + b2[0x17]; b1[0x17] = (b2[0x17] - b2[0x16]) * cos0; b1[0x16] += b1[0x17]; b1[0x14] += b1[0x16]; b1[0x16] += b1[0x15]; b1[0x15] += b1[0x17]; b1[0x18] = b2[0x18] + b2[0x19]; b1[0x19] = (b2[0x18] - b2[0x19]) * cos0; b1[0x1A] = b2[0x1A] + b2[0x1B]; b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0; b1[0x1A] += b1[0x1B]; b1[0x1C] = b2[0x1C] + b2[0x1D]; b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0; b1[0x1E] = b2[0x1E] + b2[0x1F]; b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0; b1[0x1E] += b1[0x1F]; b1[0x1C] += b1[0x1E]; b1[0x1E] += b1[0x1D]; b1[0x1D] += b1[0x1F]; } out0[0x10*16] = b1[0x00]; out0[0x10*12] = b1[0x04]; out0[0x10* 8] = b1[0x02]; out0[0x10* 4] = b1[0x06]; out0[0x10* 0] = b1[0x01]; out1[0x10* 0] = b1[0x01]; out1[0x10* 4] = b1[0x05]; out1[0x10* 8] = b1[0x03]; out1[0x10*12] = b1[0x07]; b1[0x08] += b1[0x0C]; out0[0x10*14] = b1[0x08]; b1[0x0C] += b1[0x0a]; out0[0x10*10] = b1[0x0C]; b1[0x0A] += b1[0x0E]; out0[0x10* 6] = b1[0x0A]; b1[0x0E] += b1[0x09]; out0[0x10* 2] = b1[0x0E]; b1[0x09] += b1[0x0D]; out1[0x10* 2] = b1[0x09]; b1[0x0D] += b1[0x0B]; out1[0x10* 6] = b1[0x0D]; b1[0x0B] += b1[0x0F]; out1[0x10*10] = b1[0x0B]; out1[0x10*14] = b1[0x0F]; b1[0x18] += b1[0x1C]; out0[0x10*15] = b1[0x10] + b1[0x18]; out0[0x10*13] = b1[0x18] + b1[0x14]; b1[0x1C] += b1[0x1a]; out0[0x10*11] = b1[0x14] + b1[0x1C]; out0[0x10* 9] = b1[0x1C] + b1[0x12]; b1[0x1A] += b1[0x1E]; out0[0x10* 7] = b1[0x12] + b1[0x1A]; out0[0x10* 5] = b1[0x1A] + b1[0x16]; b1[0x1E] += b1[0x19]; out0[0x10* 3] = b1[0x16] + b1[0x1E]; out0[0x10* 1] = b1[0x1E] + b1[0x11]; b1[0x19] += b1[0x1D]; out1[0x10* 1] = b1[0x11] + b1[0x19]; out1[0x10* 3] = b1[0x19] + b1[0x15]; b1[0x1D] += b1[0x1B]; out1[0x10* 5] = b1[0x15] + b1[0x1D]; out1[0x10* 7] = b1[0x1D] + b1[0x13]; b1[0x1B] += b1[0x1F]; out1[0x10* 9] = b1[0x13] + b1[0x1B]; out1[0x10*11] = b1[0x1B] + b1[0x17]; out1[0x10*13] = b1[0x17] + b1[0x1F]; out1[0x10*15] = b1[0x1F]; } /* * the call via dct64 is a trick to force GCC to use * (new) registers for the b1,b2 pointer to the bufs[xx] field */ static void dct64(real *a,real *b,real *c) { real bufs[0x40]; dct64_1(a,b,bufs,bufs+0x20,c); } void mp3lib_dct64(real *a,real *b,real *c) { real bufs[0x40]; dct64_1(a,b,bufs,bufs+0x20,c); }