Mercurial > mplayer.hg
view mp3lib/dct64.c @ 30795:1001c606f94c
Make emulated Win32 critical sections thread safe.
Earlier, cs->locked was accessed outside the mutex to get around
the problem that default pthread mutexes are not recursive
(ie., you cannot do a double-lock from the same thread), causing
a thread-safety problem, as both detected by Helgrind and showing
up in some multithreaded codecs.
The ideal solution here would be to simply use recursive pthread
mutexes, but there were concerns about reduced debuggability and
possibly portability. Thus, instead, rewrite the critical sections
to be a simple lock count (with owner) protected by a regular mutex.
Whenever a thread wants to enter the critical section and lock_count
is not 0, it sleeps on a special event that tells it when the
critical section is available.
author | sesse |
---|---|
date | Thu, 04 Mar 2010 15:57:08 +0000 |
parents | 32725ca88fed |
children |
line wrap: on
line source
/* * Modified for use with MPlayer, for details see the changelog at * http://svn.mplayerhq.hu/mplayer/trunk/ * $Id$ */ /* * Discrete Cosine Tansform (DCT) for subband synthesis * optimized for machines with no auto-increment. * The performance is highly compiler dependend. Maybe * the dct64.c version for 'normal' processor may be faster * even for Intel processors. */ static void dct64_1(real *out0,real *out1,real *b1,real *b2,real *samples) { { register real *costab = mp3lib_pnts[0]; b1[0x00] = samples[0x00] + samples[0x1F]; b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0]; b1[0x01] = samples[0x01] + samples[0x1E]; b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1]; b1[0x02] = samples[0x02] + samples[0x1D]; b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2]; b1[0x03] = samples[0x03] + samples[0x1C]; b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3]; b1[0x04] = samples[0x04] + samples[0x1B]; b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4]; b1[0x05] = samples[0x05] + samples[0x1A]; b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5]; b1[0x06] = samples[0x06] + samples[0x19]; b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6]; b1[0x07] = samples[0x07] + samples[0x18]; b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7]; b1[0x08] = samples[0x08] + samples[0x17]; b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8]; b1[0x09] = samples[0x09] + samples[0x16]; b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9]; b1[0x0A] = samples[0x0A] + samples[0x15]; b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA]; b1[0x0B] = samples[0x0B] + samples[0x14]; b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB]; b1[0x0C] = samples[0x0C] + samples[0x13]; b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC]; b1[0x0D] = samples[0x0D] + samples[0x12]; b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD]; b1[0x0E] = samples[0x0E] + samples[0x11]; b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE]; b1[0x0F] = samples[0x0F] + samples[0x10]; b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF]; } { register real *costab = mp3lib_pnts[1]; b2[0x00] = b1[0x00] + b1[0x0F]; b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0]; b2[0x01] = b1[0x01] + b1[0x0E]; b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1]; b2[0x02] = b1[0x02] + b1[0x0D]; b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2]; b2[0x03] = b1[0x03] + b1[0x0C]; b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3]; b2[0x04] = b1[0x04] + b1[0x0B]; b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4]; b2[0x05] = b1[0x05] + b1[0x0A]; b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5]; b2[0x06] = b1[0x06] + b1[0x09]; b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6]; b2[0x07] = b1[0x07] + b1[0x08]; b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7]; b2[0x10] = b1[0x10] + b1[0x1F]; b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0]; b2[0x11] = b1[0x11] + b1[0x1E]; b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1]; b2[0x12] = b1[0x12] + b1[0x1D]; b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2]; b2[0x13] = b1[0x13] + b1[0x1C]; b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3]; b2[0x14] = b1[0x14] + b1[0x1B]; b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4]; b2[0x15] = b1[0x15] + b1[0x1A]; b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5]; b2[0x16] = b1[0x16] + b1[0x19]; b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6]; b2[0x17] = b1[0x17] + b1[0x18]; b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7]; } { register real *costab = mp3lib_pnts[2]; b1[0x00] = b2[0x00] + b2[0x07]; b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0]; b1[0x01] = b2[0x01] + b2[0x06]; b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1]; b1[0x02] = b2[0x02] + b2[0x05]; b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2]; b1[0x03] = b2[0x03] + b2[0x04]; b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3]; b1[0x08] = b2[0x08] + b2[0x0F]; b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0]; b1[0x09] = b2[0x09] + b2[0x0E]; b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1]; b1[0x0A] = b2[0x0A] + b2[0x0D]; b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2]; b1[0x0B] = b2[0x0B] + b2[0x0C]; b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3]; b1[0x10] = b2[0x10] + b2[0x17]; b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0]; b1[0x11] = b2[0x11] + b2[0x16]; b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1]; b1[0x12] = b2[0x12] + b2[0x15]; b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2]; b1[0x13] = b2[0x13] + b2[0x14]; b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3]; b1[0x18] = b2[0x18] + b2[0x1F]; b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0]; b1[0x19] = b2[0x19] + b2[0x1E]; b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1]; b1[0x1A] = b2[0x1A] + b2[0x1D]; b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2]; b1[0x1B] = b2[0x1B] + b2[0x1C]; b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3]; } { register real const cos0 = mp3lib_pnts[3][0]; register real const cos1 = mp3lib_pnts[3][1]; b2[0x00] = b1[0x00] + b1[0x03]; b2[0x03] = (b1[0x00] - b1[0x03]) * cos0; b2[0x01] = b1[0x01] + b1[0x02]; b2[0x02] = (b1[0x01] - b1[0x02]) * cos1; b2[0x04] = b1[0x04] + b1[0x07]; b2[0x07] = (b1[0x07] - b1[0x04]) * cos0; b2[0x05] = b1[0x05] + b1[0x06]; b2[0x06] = (b1[0x06] - b1[0x05]) * cos1; b2[0x08] = b1[0x08] + b1[0x0B]; b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0; b2[0x09] = b1[0x09] + b1[0x0A]; b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1; b2[0x0C] = b1[0x0C] + b1[0x0F]; b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0; b2[0x0D] = b1[0x0D] + b1[0x0E]; b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1; b2[0x10] = b1[0x10] + b1[0x13]; b2[0x13] = (b1[0x10] - b1[0x13]) * cos0; b2[0x11] = b1[0x11] + b1[0x12]; b2[0x12] = (b1[0x11] - b1[0x12]) * cos1; b2[0x14] = b1[0x14] + b1[0x17]; b2[0x17] = (b1[0x17] - b1[0x14]) * cos0; b2[0x15] = b1[0x15] + b1[0x16]; b2[0x16] = (b1[0x16] - b1[0x15]) * cos1; b2[0x18] = b1[0x18] + b1[0x1B]; b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0; b2[0x19] = b1[0x19] + b1[0x1A]; b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1; b2[0x1C] = b1[0x1C] + b1[0x1F]; b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0; b2[0x1D] = b1[0x1D] + b1[0x1E]; b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1; } { register real const cos0 = mp3lib_pnts[4][0]; b1[0x00] = b2[0x00] + b2[0x01]; b1[0x01] = (b2[0x00] - b2[0x01]) * cos0; b1[0x02] = b2[0x02] + b2[0x03]; b1[0x03] = (b2[0x03] - b2[0x02]) * cos0; b1[0x02] += b1[0x03]; b1[0x04] = b2[0x04] + b2[0x05]; b1[0x05] = (b2[0x04] - b2[0x05]) * cos0; b1[0x06] = b2[0x06] + b2[0x07]; b1[0x07] = (b2[0x07] - b2[0x06]) * cos0; b1[0x06] += b1[0x07]; b1[0x04] += b1[0x06]; b1[0x06] += b1[0x05]; b1[0x05] += b1[0x07]; b1[0x08] = b2[0x08] + b2[0x09]; b1[0x09] = (b2[0x08] - b2[0x09]) * cos0; b1[0x0A] = b2[0x0A] + b2[0x0B]; b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0; b1[0x0A] += b1[0x0B]; b1[0x0C] = b2[0x0C] + b2[0x0D]; b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0; b1[0x0E] = b2[0x0E] + b2[0x0F]; b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0; b1[0x0E] += b1[0x0F]; b1[0x0C] += b1[0x0E]; b1[0x0E] += b1[0x0D]; b1[0x0D] += b1[0x0F]; b1[0x10] = b2[0x10] + b2[0x11]; b1[0x11] = (b2[0x10] - b2[0x11]) * cos0; b1[0x12] = b2[0x12] + b2[0x13]; b1[0x13] = (b2[0x13] - b2[0x12]) * cos0; b1[0x12] += b1[0x13]; b1[0x14] = b2[0x14] + b2[0x15]; b1[0x15] = (b2[0x14] - b2[0x15]) * cos0; b1[0x16] = b2[0x16] + b2[0x17]; b1[0x17] = (b2[0x17] - b2[0x16]) * cos0; b1[0x16] += b1[0x17]; b1[0x14] += b1[0x16]; b1[0x16] += b1[0x15]; b1[0x15] += b1[0x17]; b1[0x18] = b2[0x18] + b2[0x19]; b1[0x19] = (b2[0x18] - b2[0x19]) * cos0; b1[0x1A] = b2[0x1A] + b2[0x1B]; b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0; b1[0x1A] += b1[0x1B]; b1[0x1C] = b2[0x1C] + b2[0x1D]; b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0; b1[0x1E] = b2[0x1E] + b2[0x1F]; b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0; b1[0x1E] += b1[0x1F]; b1[0x1C] += b1[0x1E]; b1[0x1E] += b1[0x1D]; b1[0x1D] += b1[0x1F]; } out0[0x10*16] = b1[0x00]; out0[0x10*12] = b1[0x04]; out0[0x10* 8] = b1[0x02]; out0[0x10* 4] = b1[0x06]; out0[0x10* 0] = b1[0x01]; out1[0x10* 0] = b1[0x01]; out1[0x10* 4] = b1[0x05]; out1[0x10* 8] = b1[0x03]; out1[0x10*12] = b1[0x07]; b1[0x08] += b1[0x0C]; out0[0x10*14] = b1[0x08]; b1[0x0C] += b1[0x0a]; out0[0x10*10] = b1[0x0C]; b1[0x0A] += b1[0x0E]; out0[0x10* 6] = b1[0x0A]; b1[0x0E] += b1[0x09]; out0[0x10* 2] = b1[0x0E]; b1[0x09] += b1[0x0D]; out1[0x10* 2] = b1[0x09]; b1[0x0D] += b1[0x0B]; out1[0x10* 6] = b1[0x0D]; b1[0x0B] += b1[0x0F]; out1[0x10*10] = b1[0x0B]; out1[0x10*14] = b1[0x0F]; b1[0x18] += b1[0x1C]; out0[0x10*15] = b1[0x10] + b1[0x18]; out0[0x10*13] = b1[0x18] + b1[0x14]; b1[0x1C] += b1[0x1a]; out0[0x10*11] = b1[0x14] + b1[0x1C]; out0[0x10* 9] = b1[0x1C] + b1[0x12]; b1[0x1A] += b1[0x1E]; out0[0x10* 7] = b1[0x12] + b1[0x1A]; out0[0x10* 5] = b1[0x1A] + b1[0x16]; b1[0x1E] += b1[0x19]; out0[0x10* 3] = b1[0x16] + b1[0x1E]; out0[0x10* 1] = b1[0x1E] + b1[0x11]; b1[0x19] += b1[0x1D]; out1[0x10* 1] = b1[0x11] + b1[0x19]; out1[0x10* 3] = b1[0x19] + b1[0x15]; b1[0x1D] += b1[0x1B]; out1[0x10* 5] = b1[0x15] + b1[0x1D]; out1[0x10* 7] = b1[0x1D] + b1[0x13]; b1[0x1B] += b1[0x1F]; out1[0x10* 9] = b1[0x13] + b1[0x1B]; out1[0x10*11] = b1[0x1B] + b1[0x17]; out1[0x10*13] = b1[0x17] + b1[0x1F]; out1[0x10*15] = b1[0x1F]; } /* * the call via dct64 is a trick to force GCC to use * (new) registers for the b1,b2 pointer to the bufs[xx] field */ static void dct64(real *a,real *b,real *c) { real bufs[0x40]; dct64_1(a,b,bufs,bufs+0x20,c); } void mp3lib_dct64(real *a,real *b,real *c) { real bufs[0x40]; dct64_1(a,b,bufs,bufs+0x20,c); }