view mp3lib/decode_mmx.c @ 31139:9a2e299dc256

Add final missing bits of CineForm HD support on Linux (via the Windows DirectShow codec). Required changes: - codecs.conf entry (of course). - Allow opening files with ¡È.col¡É in the file name, just like ¡Èvp3¡É and ¡È.fpf¡É already was allowed. (CineForm expects to be able to do this, presumably for some color management code.) - In registry.c, fake a few registry keys that the codec expects the installer to have written. Also, change a few magic numbers (0, 2) to the appropriate constants (ERROR_SUCCESS, ERROR_FILE_NOT_FOUND) where appropriate, so the code is easier to follow. SMP works fine, but seemingly performs suboptimally (e.g., on my dual-core laptop, CineForm performs better if I lie to it and tell it I have four cores). I don't know if this is inherent in the codec, or some inefficiency in the emulated synchronization primitives.
author sesse
date Sun, 23 May 2010 16:01:12 +0000
parents 0ad2da052b2e
children
line wrap: on
line source

/*
 * this code comes under GPL
 * This code was taken from http://www.mpg123.org
 * See ChangeLog of mpg123-0.59s-pre.1 for detail
 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
 *
 * Local ChangeLog:
 * - Partial loops unrolling and removing MOVW insn from loops
*/
#include "config.h"
#include "mangle.h"
#include "mpg123.h"

static const unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
static const unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
const unsigned int __attribute__((aligned(16))) costab_mmx[] =
{
    1056974725,
    1057056395,
    1057223771,
    1057485416,
    1057855544,
    1058356026,
    1059019886,
    1059897405,
    1061067246,
    1062657950,
    1064892987,
    1066774581,
    1069414683,
    1073984175,
    1079645762,
    1092815430,
    1057005197,
    1057342072,
    1058087743,
    1059427869,
    1061799040,
    1065862217,
    1071413542,
    1084439708,
    1057128951,
    1058664893,
    1063675095,
    1076102863,
    1057655764,
    1067924853,
    1060439283,
};

/**
  This array of magic numbers were calculated by the pure function
  make_decode_tables_MMX(32768), which had been implemented in (deleted since
  r23383) tabinit_MMX.c.
  */
static const short __attribute__((aligned(8))) mp3lib_decwins[] =
{
        0,      7,     54,    114,    510,   1288,   1644,   9372,
    18760,  -9373,   1644,  -1289,    510,   -115,     54,     -8,
        0,      7,     54,    114,    510,   1288,   1644,   9372,
    18760,  -9373,   1644,  -1289,    510,   -115,     54,     -8,
        0,      7,     55,    129,    500,   1379,   1490,   9834,
    18748,  -8910,   1784,  -1197,    516,   -101,     52,     -7,
        0,      7,     55,    129,    500,   1379,   1490,   9834,
    18748,  -8910,   1784,  -1197,    516,   -101,     52,     -7,
        0,      8,     56,    145,    488,   1469,   1322,  10294,
    18714,  -8448,   1910,  -1107,    520,    -87,     51,     -6,
        0,      8,     56,    145,    488,   1469,   1322,  10294,
    18714,  -8448,   1910,  -1107,    520,    -87,     51,     -6,
        0,      9,     57,    161,    474,   1559,   1141,  10751,
    18658,  -7987,   2023,  -1016,    522,    -74,     49,     -6,
        0,      9,     57,    161,    474,   1559,   1141,  10751,
    18658,  -7987,   2023,  -1016,    522,    -74,     49,     -6,
        0,     10,     57,    177,    456,   1647,    944,  11205,
    18579,  -7528,   2123,   -927,    522,    -61,     48,     -5,
        0,     10,     57,    177,    456,   1647,    944,  11205,
    18579,  -7528,   2123,   -927,    522,    -61,     48,     -5,
        0,     11,     57,    194,    435,   1733,    734,  11654,
    18477,  -7073,   2210,   -838,    519,    -50,     46,     -5,
        0,     11,     57,    194,    435,   1733,    734,  11654,
    18477,  -7073,   2210,   -838,    519,    -50,     46,     -5,
        0,     12,     57,    212,    411,   1817,    510,  12097,
    18354,  -6621,   2285,   -751,    515,    -39,     44,     -4,
        0,     12,     57,    212,    411,   1817,    510,  12097,
    18354,  -6621,   2285,   -751,    515,    -39,     44,     -4,
        0,     13,     57,    229,    384,   1899,    271,  12534,
    18209,  -6174,   2348,   -666,    508,    -28,     43,     -4,
        0,     13,     57,    229,    384,   1899,    271,  12534,
    18209,  -6174,   2348,   -666,    508,    -28,     43,     -4,
        0,     14,     56,    247,    354,   1977,     18,  12963,
    18043,  -5733,   2398,   -583,    501,    -18,     41,     -4,
        0,     14,     56,    247,    354,   1977,     18,  12963,
    18043,  -5733,   2398,   -583,    501,    -18,     41,     -4,
        0,     15,     56,    266,    320,   2052,   -249,  13383,
    17855,  -5298,   2438,   -502,    491,     -9,     39,     -3,
        0,     15,     56,    266,    320,   2052,   -249,  13383,
    17855,  -5298,   2438,   -502,    491,     -9,     39,     -3,
        0,     17,     54,    284,    283,   2122,   -530,  13794,
    17648,  -4870,   2466,   -423,    480,     -1,     37,     -3,
        0,     17,     54,    284,    283,   2122,   -530,  13794,
    17648,  -4870,   2466,   -423,    480,     -1,     37,     -3,
        0,     18,     52,    302,    243,   2188,   -825,  14194,
    17420,  -4450,   2484,   -347,    468,      7,     35,     -3,
        0,     18,     52,    302,    243,   2188,   -825,  14194,
    17420,  -4450,   2484,   -347,    468,      7,     35,     -3,
        0,     19,     50,    320,    199,   2249,  -1133,  14583,
    17173,  -4039,   2492,   -274,    455,     14,     33,     -2,
        0,     19,     50,    320,    199,   2249,  -1133,  14583,
    17173,  -4039,   2492,   -274,    455,     14,     33,     -2,
       -1,     21,     48,    339,    152,   2304,  -1454,  14959,
    16908,  -3637,   2490,   -204,    440,     20,     32,     -2,
       -1,     21,     48,    339,    152,   2304,  -1454,  14959,
    16908,  -3637,   2490,   -204,    440,     20,     32,     -2,
       -1,     22,     45,    357,    101,   2354,  -1788,  15322,
    16624,  -3245,   2479,   -137,    425,     26,     30,     -2,
       -1,     22,     45,    357,    101,   2354,  -1788,  15322,
    16624,  -3245,   2479,   -137,    425,     26,     30,     -2,
       -1,     24,     41,    374,     47,   2396,  -2135,  15671,
    16323,  -2864,   2460,    -72,    409,     31,     28,     -2,
       -1,     24,     41,    374,     47,   2396,  -2135,  15671,
    16323,  -2864,   2460,    -72,    409,     31,     28,     -2,
       -1,     26,     37,    391,    -11,   2431,  -2493,  16004,
    16005,  -2494,   2432,    -12,    392,     36,     26,     -2,
       -1,     26,     37,    391,    -11,   2431,  -2493,  16004,
    16005,  -2494,   2432,    -12,    392,     36,     26,     -2,
       -2,    -28,     31,   -409,    -72,  -2460,  -2864, -16323,
    15671,   2135,   2396,    -47,    374,    -41,     24,      1,
       -2,    -28,     31,   -409,    -72,  -2460,  -2864, -16323,
    15671,   2135,   2396,    -47,    374,    -41,     24,      1,
       -2,    -30,     26,   -425,   -137,  -2479,  -3245, -16624,
    15322,   1788,   2354,   -101,    357,    -45,     22,      1,
       -2,    -30,     26,   -425,   -137,  -2479,  -3245, -16624,
    15322,   1788,   2354,   -101,    357,    -45,     22,      1,
       -2,    -32,     20,   -440,   -204,  -2490,  -3637, -16908,
    14959,   1454,   2304,   -152,    339,    -48,     21,      1,
       -2,    -32,     20,   -440,   -204,  -2490,  -3637, -16908,
    14959,   1454,   2304,   -152,    339,    -48,     21,      1,
       -2,    -33,     14,   -455,   -274,  -2492,  -4039, -17173,
    14583,   1133,   2249,   -199,    320,    -50,     19,      0,
       -2,    -33,     14,   -455,   -274,  -2492,  -4039, -17173,
    14583,   1133,   2249,   -199,    320,    -50,     19,      0,
       -3,    -35,      7,   -468,   -347,  -2484,  -4450, -17420,
    14194,    825,   2188,   -243,    302,    -52,     18,      0,
       -3,    -35,      7,   -468,   -347,  -2484,  -4450, -17420,
    14194,    825,   2188,   -243,    302,    -52,     18,      0,
       -3,    -37,     -1,   -480,   -423,  -2466,  -4870, -17648,
    13794,    530,   2122,   -283,    284,    -54,     17,      0,
       -3,    -37,     -1,   -480,   -423,  -2466,  -4870, -17648,
    13794,    530,   2122,   -283,    284,    -54,     17,      0,
       -3,    -39,     -9,   -491,   -502,  -2438,  -5298, -17855,
    13383,    249,   2052,   -320,    266,    -56,     15,      0,
       -3,    -39,     -9,   -491,   -502,  -2438,  -5298, -17855,
    13383,    249,   2052,   -320,    266,    -56,     15,      0,
       -4,    -41,    -18,   -501,   -583,  -2398,  -5733, -18043,
    12963,    -18,   1977,   -354,    247,    -56,     14,      0,
       -4,    -41,    -18,   -501,   -583,  -2398,  -5733, -18043,
    12963,    -18,   1977,   -354,    247,    -56,     14,      0,
       -4,    -43,    -28,   -508,   -666,  -2348,  -6174, -18209,
    12534,   -271,   1899,   -384,    229,    -57,     13,      0,
       -4,    -43,    -28,   -508,   -666,  -2348,  -6174, -18209,
    12534,   -271,   1899,   -384,    229,    -57,     13,      0,
       -4,    -44,    -39,   -515,   -751,  -2285,  -6621, -18354,
    12097,   -510,   1817,   -411,    212,    -57,     12,      0,
       -4,    -44,    -39,   -515,   -751,  -2285,  -6621, -18354,
    12097,   -510,   1817,   -411,    212,    -57,     12,      0,
       -5,    -46,    -50,   -519,   -838,  -2210,  -7073, -18477,
    11654,   -734,   1733,   -435,    194,    -57,     11,      0,
       -5,    -46,    -50,   -519,   -838,  -2210,  -7073, -18477,
    11654,   -734,   1733,   -435,    194,    -57,     11,      0,
       -5,    -48,    -61,   -522,   -927,  -2123,  -7528, -18579,
    11205,   -944,   1647,   -456,    177,    -57,     10,      0,
       -5,    -48,    -61,   -522,   -927,  -2123,  -7528, -18579,
    11205,   -944,   1647,   -456,    177,    -57,     10,      0,
       -6,    -49,    -74,   -522,  -1016,  -2023,  -7987, -18658,
    10751,  -1141,   1559,   -474,    161,    -57,      9,      0,
       -6,    -49,    -74,   -522,  -1016,  -2023,  -7987, -18658,
    10751,  -1141,   1559,   -474,    161,    -57,      9,      0,
       -6,    -51,    -87,   -520,  -1107,  -1910,  -8448, -18714,
    10294,  -1322,   1469,   -488,    145,    -56,      8,      0,
       -6,    -51,    -87,   -520,  -1107,  -1910,  -8448, -18714,
    10294,  -1322,   1469,   -488,    145,    -56,      8,      0,
       -7,    -52,   -101,   -516,  -1197,  -1784,  -8910, -18748,
     9834,  -1490,   1379,   -500,    129,    -55,      7,      0,
       -7,    -52,   -101,   -516,  -1197,  -1784,  -8910, -18748,
     9834,  -1490,   1379,   -500,    129,    -55,      7,      0,
};

int synth_1to1_MMX(real *bandPtr, int channel, short *samples)
{
    static short buffs[2][2][0x110] __attribute__((aligned(8)));
    static int bo = 1;
    short *b0, (*buf)[0x110], *a, *b;
    const short* window;
    int bo1, i = 8;

    if (channel == 0) {
        bo = (bo - 1) & 0xf;
        buf = buffs[1];
    } else {
        samples++;
        buf = buffs[0];
    }

    if (bo & 1) {
        b0 = buf[1];
        bo1 = bo + 1;
        a = buf[0] + bo;
        b = buf[1] + ((bo + 1) & 0xf);
    } else {
        b0 = buf[0];
        bo1 = bo;
        b = buf[0] + bo;
        a = buf[1] + ((bo + 1) & 0xf);
    }

    dct64_MMX_func(a, b, bandPtr);
    window = mp3lib_decwins + 16 - bo1;
    //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1);
__asm__ volatile(
ASMALIGN(4)
"0:\n\t"
        "movq  (%1),%%mm0\n\t"
        "movq  64(%1),%%mm4\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "pmaddwd 32(%2),%%mm4\n\t"
        "movq  8(%1),%%mm1\n\t"
        "movq  72(%1),%%mm5\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "pmaddwd 40(%2),%%mm5\n\t"
        "movq  16(%1),%%mm2\n\t"
        "movq  80(%1),%%mm6\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "pmaddwd 48(%2),%%mm6\n\t"
        "movq  24(%1),%%mm3\n\t"
        "movq  88(%1),%%mm7\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "pmaddwd 56(%2),%%mm7\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm5,%%mm4\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm6,%%mm4\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "paddd %%mm7,%%mm4\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "movq  %%mm4,%%mm5\n\t"
        "psrlq $32,%%mm1\n\t"
        "psrlq $32,%%mm5\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm5,%%mm4\n\t"
        "psrad $13,%%mm0\n\t"
        "psrad $13,%%mm4\n\t"
        "packssdw %%mm0,%%mm0\n\t"
        "packssdw %%mm4,%%mm4\n\t"

        "movq   (%3), %%mm1\n\t"
        "punpckldq %%mm4, %%mm0\n\t"
        "pand   "MANGLE(one_null)", %%mm1\n\t"
        "pand   "MANGLE(null_one)", %%mm0\n\t"
        "por    %%mm0, %%mm1\n\t"
        "movq   %%mm1,(%3)\n\t"

        "add $64,%2\n\t"
        "add $128,%1\n\t"
        "add $8,%3\n\t"

        "decl %0\n\t"
        "jnz  0b\n\t"

        "movq  (%1),%%mm0\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "movq  8(%1),%%mm1\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "movq  16(%1),%%mm2\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "movq  24(%1),%%mm3\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "psrlq $32,%%mm1\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "psrad $13,%%mm0\n\t"
        "packssdw %%mm0,%%mm0\n\t"
        "movd %%mm0,%%eax\n\t"
        "movw %%ax, (%3)\n\t"
        "sub $32,%2\n\t"
        "add $64,%1\n\t"
        "add $4,%3\n\t"

        "movl $7,%0\n\t"
ASMALIGN(4)
"1:\n\t"
        "movq  (%1),%%mm0\n\t"
        "movq  64(%1),%%mm4\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "pmaddwd -32(%2),%%mm4\n\t"
        "movq  8(%1),%%mm1\n\t"
        "movq  72(%1),%%mm5\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "pmaddwd -24(%2),%%mm5\n\t"
        "movq  16(%1),%%mm2\n\t"
        "movq  80(%1),%%mm6\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "pmaddwd -16(%2),%%mm6\n\t"
        "movq  24(%1),%%mm3\n\t"
        "movq  88(%1),%%mm7\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "pmaddwd -8(%2),%%mm7\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm5,%%mm4\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm6,%%mm4\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "paddd %%mm7,%%mm4\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "movq  %%mm4,%%mm5\n\t"
        "psrlq $32,%%mm1\n\t"
        "psrlq $32,%%mm5\n\t"
        "paddd %%mm0,%%mm1\n\t"
        "paddd %%mm4,%%mm5\n\t"
        "psrad $13,%%mm1\n\t"
        "psrad $13,%%mm5\n\t"
        "packssdw %%mm1,%%mm1\n\t"
        "packssdw %%mm5,%%mm5\n\t"
        "psubd %%mm0,%%mm0\n\t"
        "psubd %%mm4,%%mm4\n\t"
        "psubsw %%mm1,%%mm0\n\t"
        "psubsw %%mm5,%%mm4\n\t"

        "movq   (%3), %%mm1\n\t"
        "punpckldq %%mm4, %%mm0\n\t"
        "pand   "MANGLE(one_null)", %%mm1\n\t"
        "pand   "MANGLE(null_one)", %%mm0\n\t"
        "por    %%mm0, %%mm1\n\t"
        "movq   %%mm1,(%3)\n\t"

        "sub $64,%2\n\t"
        "add $128,%1\n\t"
        "add $8,%3\n\t"
        "decl %0\n\t"
        "jnz  1b\n\t"

        "movq  (%1),%%mm0\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "movq  8(%1),%%mm1\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "movq  16(%1),%%mm2\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "movq  24(%1),%%mm3\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "psrlq $32,%%mm1\n\t"
        "paddd %%mm0,%%mm1\n\t"
        "psrad $13,%%mm1\n\t"
        "packssdw %%mm1,%%mm1\n\t"
        "psubd %%mm0,%%mm0\n\t"
        "psubsw %%mm1,%%mm0\n\t"
        "movd %%mm0,%%eax\n\t"
        "movw %%ax,(%3)\n\t"
        "emms\n\t"
        :"+r"(i), "+r"(window), "+r"(b0), "+r"(samples)
        :
        :"memory", "%eax");
    return 0;
}