Mercurial > mplayer.hg
view mp3lib/dct64_i386.c @ 34478:8e09f1cb3ecd
Fix vo_gl unsharp filter for chroma.
The syntax is a bit strange, since for inputs the components
indicate swizzles, while for outputs it is only a write mask,
thus the result must be at the correct position regardless
of the component specified for the output.
So use a 3-component vector for the constant factor.
Also make the input swizzles explicit in an attempt to make
the code less confusing (that part does change what the code
actually does).
Previous code would result in a filter strength of 0 always
being used for chroma.
author | reimar |
---|---|
date | Sat, 14 Jan 2012 15:49:54 +0000 |
parents | 0783dd397f74 |
children |
line wrap: on
line source
/* * Modified for use with MPlayer, for details see the changelog at * http://svn.mplayerhq.hu/mplayer/trunk/ * $Id$ */ /* * Discrete Cosine Tansform (DCT) for subband synthesis * optimized for machines with no auto-increment. * The performance is highly compiler dependend. Maybe * the mpg123_dct64.c version for 'normal' processor may be faster * even for Intel processors. */ #include "mpg123.h" static void mpg123_dct64_1(real * out0, real * out1, real * b1, real * b2, real * samples) { { register real *costab = mp3lib_pnts[0]; b1[0x00] = samples[0x00] + samples[0x1F]; b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0]; b1[0x01] = samples[0x01] + samples[0x1E]; b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1]; b1[0x02] = samples[0x02] + samples[0x1D]; b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2]; b1[0x03] = samples[0x03] + samples[0x1C]; b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3]; b1[0x04] = samples[0x04] + samples[0x1B]; b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4]; b1[0x05] = samples[0x05] + samples[0x1A]; b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5]; b1[0x06] = samples[0x06] + samples[0x19]; b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6]; b1[0x07] = samples[0x07] + samples[0x18]; b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7]; b1[0x08] = samples[0x08] + samples[0x17]; b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8]; b1[0x09] = samples[0x09] + samples[0x16]; b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9]; b1[0x0A] = samples[0x0A] + samples[0x15]; b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA]; b1[0x0B] = samples[0x0B] + samples[0x14]; b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB]; b1[0x0C] = samples[0x0C] + samples[0x13]; b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC]; b1[0x0D] = samples[0x0D] + samples[0x12]; b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD]; b1[0x0E] = samples[0x0E] + samples[0x11]; b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE]; b1[0x0F] = samples[0x0F] + samples[0x10]; b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF]; } { register real *costab = mp3lib_pnts[1]; b2[0x00] = b1[0x00] + b1[0x0F]; b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0]; b2[0x01] = b1[0x01] + b1[0x0E]; b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1]; b2[0x02] = b1[0x02] + b1[0x0D]; b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2]; b2[0x03] = b1[0x03] + b1[0x0C]; b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3]; b2[0x04] = b1[0x04] + b1[0x0B]; b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4]; b2[0x05] = b1[0x05] + b1[0x0A]; b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5]; b2[0x06] = b1[0x06] + b1[0x09]; b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6]; b2[0x07] = b1[0x07] + b1[0x08]; b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7]; b2[0x10] = b1[0x10] + b1[0x1F]; b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0]; b2[0x11] = b1[0x11] + b1[0x1E]; b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1]; b2[0x12] = b1[0x12] + b1[0x1D]; b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2]; b2[0x13] = b1[0x13] + b1[0x1C]; b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3]; b2[0x14] = b1[0x14] + b1[0x1B]; b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4]; b2[0x15] = b1[0x15] + b1[0x1A]; b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5]; b2[0x16] = b1[0x16] + b1[0x19]; b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6]; b2[0x17] = b1[0x17] + b1[0x18]; b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7]; } { register real *costab = mp3lib_pnts[2]; b1[0x00] = b2[0x00] + b2[0x07]; b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0]; b1[0x01] = b2[0x01] + b2[0x06]; b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1]; b1[0x02] = b2[0x02] + b2[0x05]; b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2]; b1[0x03] = b2[0x03] + b2[0x04]; b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3]; b1[0x08] = b2[0x08] + b2[0x0F]; b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0]; b1[0x09] = b2[0x09] + b2[0x0E]; b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1]; b1[0x0A] = b2[0x0A] + b2[0x0D]; b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2]; b1[0x0B] = b2[0x0B] + b2[0x0C]; b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3]; b1[0x10] = b2[0x10] + b2[0x17]; b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0]; b1[0x11] = b2[0x11] + b2[0x16]; b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1]; b1[0x12] = b2[0x12] + b2[0x15]; b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2]; b1[0x13] = b2[0x13] + b2[0x14]; b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3]; b1[0x18] = b2[0x18] + b2[0x1F]; b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0]; b1[0x19] = b2[0x19] + b2[0x1E]; b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1]; b1[0x1A] = b2[0x1A] + b2[0x1D]; b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2]; b1[0x1B] = b2[0x1B] + b2[0x1C]; b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3]; } { register real const cos0 = mp3lib_pnts[3][0]; register real const cos1 = mp3lib_pnts[3][1]; b2[0x00] = b1[0x00] + b1[0x03]; b2[0x03] = (b1[0x00] - b1[0x03]) * cos0; b2[0x01] = b1[0x01] + b1[0x02]; b2[0x02] = (b1[0x01] - b1[0x02]) * cos1; b2[0x04] = b1[0x04] + b1[0x07]; b2[0x07] = (b1[0x07] - b1[0x04]) * cos0; b2[0x05] = b1[0x05] + b1[0x06]; b2[0x06] = (b1[0x06] - b1[0x05]) * cos1; b2[0x08] = b1[0x08] + b1[0x0B]; b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0; b2[0x09] = b1[0x09] + b1[0x0A]; b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1; b2[0x0C] = b1[0x0C] + b1[0x0F]; b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0; b2[0x0D] = b1[0x0D] + b1[0x0E]; b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1; b2[0x10] = b1[0x10] + b1[0x13]; b2[0x13] = (b1[0x10] - b1[0x13]) * cos0; b2[0x11] = b1[0x11] + b1[0x12]; b2[0x12] = (b1[0x11] - b1[0x12]) * cos1; b2[0x14] = b1[0x14] + b1[0x17]; b2[0x17] = (b1[0x17] - b1[0x14]) * cos0; b2[0x15] = b1[0x15] + b1[0x16]; b2[0x16] = (b1[0x16] - b1[0x15]) * cos1; b2[0x18] = b1[0x18] + b1[0x1B]; b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0; b2[0x19] = b1[0x19] + b1[0x1A]; b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1; b2[0x1C] = b1[0x1C] + b1[0x1F]; b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0; b2[0x1D] = b1[0x1D] + b1[0x1E]; b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1; } { register real const cos0 = mp3lib_pnts[4][0]; b1[0x00] = b2[0x00] + b2[0x01]; b1[0x01] = (b2[0x00] - b2[0x01]) * cos0; b1[0x02] = b2[0x02] + b2[0x03]; b1[0x03] = (b2[0x03] - b2[0x02]) * cos0; b1[0x02] += b1[0x03]; b1[0x04] = b2[0x04] + b2[0x05]; b1[0x05] = (b2[0x04] - b2[0x05]) * cos0; b1[0x06] = b2[0x06] + b2[0x07]; b1[0x07] = (b2[0x07] - b2[0x06]) * cos0; b1[0x06] += b1[0x07]; b1[0x04] += b1[0x06]; b1[0x06] += b1[0x05]; b1[0x05] += b1[0x07]; b1[0x08] = b2[0x08] + b2[0x09]; b1[0x09] = (b2[0x08] - b2[0x09]) * cos0; b1[0x0A] = b2[0x0A] + b2[0x0B]; b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0; b1[0x0A] += b1[0x0B]; b1[0x0C] = b2[0x0C] + b2[0x0D]; b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0; b1[0x0E] = b2[0x0E] + b2[0x0F]; b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0; b1[0x0E] += b1[0x0F]; b1[0x0C] += b1[0x0E]; b1[0x0E] += b1[0x0D]; b1[0x0D] += b1[0x0F]; b1[0x10] = b2[0x10] + b2[0x11]; b1[0x11] = (b2[0x10] - b2[0x11]) * cos0; b1[0x12] = b2[0x12] + b2[0x13]; b1[0x13] = (b2[0x13] - b2[0x12]) * cos0; b1[0x12] += b1[0x13]; b1[0x14] = b2[0x14] + b2[0x15]; b1[0x15] = (b2[0x14] - b2[0x15]) * cos0; b1[0x16] = b2[0x16] + b2[0x17]; b1[0x17] = (b2[0x17] - b2[0x16]) * cos0; b1[0x16] += b1[0x17]; b1[0x14] += b1[0x16]; b1[0x16] += b1[0x15]; b1[0x15] += b1[0x17]; b1[0x18] = b2[0x18] + b2[0x19]; b1[0x19] = (b2[0x18] - b2[0x19]) * cos0; b1[0x1A] = b2[0x1A] + b2[0x1B]; b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0; b1[0x1A] += b1[0x1B]; b1[0x1C] = b2[0x1C] + b2[0x1D]; b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0; b1[0x1E] = b2[0x1E] + b2[0x1F]; b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0; b1[0x1E] += b1[0x1F]; b1[0x1C] += b1[0x1E]; b1[0x1E] += b1[0x1D]; b1[0x1D] += b1[0x1F]; } out0[0x10 * 16] = b1[0x00]; out0[0x10 * 12] = b1[0x04]; out0[0x10 * 8] = b1[0x02]; out0[0x10 * 4] = b1[0x06]; out0[0x10 * 0] = b1[0x01]; out1[0x10 * 0] = b1[0x01]; out1[0x10 * 4] = b1[0x05]; out1[0x10 * 8] = b1[0x03]; out1[0x10 * 12] = b1[0x07]; b1[0x08] += b1[0x0C]; out0[0x10 * 14] = b1[0x08]; b1[0x0C] += b1[0x0a]; out0[0x10 * 10] = b1[0x0C]; b1[0x0A] += b1[0x0E]; out0[0x10 * 6] = b1[0x0A]; b1[0x0E] += b1[0x09]; out0[0x10 * 2] = b1[0x0E]; b1[0x09] += b1[0x0D]; out1[0x10 * 2] = b1[0x09]; b1[0x0D] += b1[0x0B]; out1[0x10 * 6] = b1[0x0D]; b1[0x0B] += b1[0x0F]; out1[0x10 * 10] = b1[0x0B]; out1[0x10 * 14] = b1[0x0F]; b1[0x18] += b1[0x1C]; out0[0x10 * 15] = b1[0x10] + b1[0x18]; out0[0x10 * 13] = b1[0x18] + b1[0x14]; b1[0x1C] += b1[0x1a]; out0[0x10 * 11] = b1[0x14] + b1[0x1C]; out0[0x10 * 9] = b1[0x1C] + b1[0x12]; b1[0x1A] += b1[0x1E]; out0[0x10 * 7] = b1[0x12] + b1[0x1A]; out0[0x10 * 5] = b1[0x1A] + b1[0x16]; b1[0x1E] += b1[0x19]; out0[0x10 * 3] = b1[0x16] + b1[0x1E]; out0[0x10 * 1] = b1[0x1E] + b1[0x11]; b1[0x19] += b1[0x1D]; out1[0x10 * 1] = b1[0x11] + b1[0x19]; out1[0x10 * 3] = b1[0x19] + b1[0x15]; b1[0x1D] += b1[0x1B]; out1[0x10 * 5] = b1[0x15] + b1[0x1D]; out1[0x10 * 7] = b1[0x1D] + b1[0x13]; b1[0x1B] += b1[0x1F]; out1[0x10 * 9] = b1[0x13] + b1[0x1B]; out1[0x10 * 11] = b1[0x1B] + b1[0x17]; out1[0x10 * 13] = b1[0x17] + b1[0x1F]; out1[0x10 * 15] = b1[0x1F]; } /* * the call via mpg123_dct64 is a trick to force GCC to use * (new) registers for the b1,b2 pointer to the bufs[xx] field */ void mpg123_dct64(real * a, real * b, real * c) { real bufs[0x40]; mpg123_dct64_1(a, b, bufs, bufs + 0x20, c); }