view mp3lib/decode_mmx.c @ 26146:20a126aaa756

ve_vfw.c: #include aviheader.h instead of wine avifmt.h Compilation was broken after libmpdemux/muxer.h started including libmpdemux/aviheader.h. ve_vfw.c included both muxer.h and loader/wine/avifmt.h, and the latter has definitions that conflict with aviheader.h ones. Fix by removing the avifmt.h include. I did not carefully check that changing the includes doesn't break any ve_vfw.c code. However it at least fixes compilation, and if the avifmt.h versions differ in some significant way then the code is fundamentally broken anyway: ve_vfw cannot use different versions of the avi struct definitions when it also uses shared muxer.h types (those must use the standard definitions to keep the type compatible with what's used in other files).
author uau
date Thu, 06 Mar 2008 01:57:26 +0000
parents 2095f98cf0fa
children 08d18fe9da52
line wrap: on
line source

/*
 * this code comes under GPL
 * This code was taken from http://www.mpg123.org
 * See ChangeLog of mpg123-0.59s-pre.1 for detail
 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
 *
 * Local ChangeLog:
 * - Partial loops unrolling and removing MOVW insn from loops
*/
#include "config.h"
#include "mangle.h"
#define real float /* ugly - but only way */

extern void (*dct64_MMX_func)(short*, short*, real*);
static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
unsigned int __attribute__((aligned(16))) costab_mmx[] =
{
	1056974725,
	1057056395,
	1057223771,
	1057485416,
	1057855544,
	1058356026,
	1059019886,
	1059897405,
	1061067246,
	1062657950,
	1064892987,
	1066774581,
	1069414683,
	1073984175,
	1079645762,
	1092815430,
	1057005197,
	1057342072,
	1058087743,
	1059427869,
	1061799040,
	1065862217,
	1071413542,
	1084439708,
	1057128951,
	1058664893,
	1063675095,
	1076102863,
	1057655764,
	1067924853,
	1060439283,
};

/**
  This array of magic numbers were calculated by the pure function
  make_decode_tables_MMX(32768), which had been implemented in (deleted since
  r23383) tabinit_MMX.c.
  */
static short __attribute__((aligned(8))) mp3lib_decwins[] =
{
	     0,	     7,	    54,	   114,	   510,	  1288,	  1644,	  9372,
	 18760,	 -9373,	  1644,	 -1289,	   510,	  -115,	    54,	    -8,
	     0,	     7,	    54,	   114,	   510,	  1288,	  1644,	  9372,
	 18760,	 -9373,	  1644,	 -1289,	   510,	  -115,	    54,	    -8,
	     0,	     7,	    55,	   129,	   500,	  1379,	  1490,	  9834,
	 18748,	 -8910,	  1784,	 -1197,	   516,	  -101,	    52,	    -7,
	     0,	     7,	    55,	   129,	   500,	  1379,	  1490,	  9834,
	 18748,	 -8910,	  1784,	 -1197,	   516,	  -101,	    52,	    -7,
	     0,	     8,	    56,	   145,	   488,	  1469,	  1322,	 10294,
	 18714,	 -8448,	  1910,	 -1107,	   520,	   -87,	    51,	    -6,
	     0,	     8,	    56,	   145,	   488,	  1469,	  1322,	 10294,
	 18714,	 -8448,	  1910,	 -1107,	   520,	   -87,	    51,	    -6,
	     0,	     9,	    57,	   161,	   474,	  1559,	  1141,	 10751,
	 18658,	 -7987,	  2023,	 -1016,	   522,	   -74,	    49,	    -6,
	     0,	     9,	    57,	   161,	   474,	  1559,	  1141,	 10751,
	 18658,	 -7987,	  2023,	 -1016,	   522,	   -74,	    49,	    -6,
	     0,	    10,	    57,	   177,	   456,	  1647,	   944,	 11205,
	 18579,	 -7528,	  2123,	  -927,	   522,	   -61,	    48,	    -5,
	     0,	    10,	    57,	   177,	   456,	  1647,	   944,	 11205,
	 18579,	 -7528,	  2123,	  -927,	   522,	   -61,	    48,	    -5,
	     0,	    11,	    57,	   194,	   435,	  1733,	   734,	 11654,
	 18477,	 -7073,	  2210,	  -838,	   519,	   -50,	    46,	    -5,
	     0,	    11,	    57,	   194,	   435,	  1733,	   734,	 11654,
	 18477,	 -7073,	  2210,	  -838,	   519,	   -50,	    46,	    -5,
	     0,	    12,	    57,	   212,	   411,	  1817,	   510,	 12097,
	 18354,	 -6621,	  2285,	  -751,	   515,	   -39,	    44,	    -4,
	     0,	    12,	    57,	   212,	   411,	  1817,	   510,	 12097,
	 18354,	 -6621,	  2285,	  -751,	   515,	   -39,	    44,	    -4,
	     0,	    13,	    57,	   229,	   384,	  1899,	   271,	 12534,
	 18209,	 -6174,	  2348,	  -666,	   508,	   -28,	    43,	    -4,
	     0,	    13,	    57,	   229,	   384,	  1899,	   271,	 12534,
	 18209,	 -6174,	  2348,	  -666,	   508,	   -28,	    43,	    -4,
	     0,	    14,	    56,	   247,	   354,	  1977,	    18,	 12963,
	 18043,	 -5733,	  2398,	  -583,	   501,	   -18,	    41,	    -4,
	     0,	    14,	    56,	   247,	   354,	  1977,	    18,	 12963,
	 18043,	 -5733,	  2398,	  -583,	   501,	   -18,	    41,	    -4,
	     0,	    15,	    56,	   266,	   320,	  2052,	  -249,	 13383,
	 17855,	 -5298,	  2438,	  -502,	   491,	    -9,	    39,	    -3,
	     0,	    15,	    56,	   266,	   320,	  2052,	  -249,	 13383,
	 17855,	 -5298,	  2438,	  -502,	   491,	    -9,	    39,	    -3,
	     0,	    17,	    54,	   284,	   283,	  2122,	  -530,	 13794,
	 17648,	 -4870,	  2466,	  -423,	   480,	    -1,	    37,	    -3,
	     0,	    17,	    54,	   284,	   283,	  2122,	  -530,	 13794,
	 17648,	 -4870,	  2466,	  -423,	   480,	    -1,	    37,	    -3,
	     0,	    18,	    52,	   302,	   243,	  2188,	  -825,	 14194,
	 17420,	 -4450,	  2484,	  -347,	   468,	     7,	    35,	    -3,
	     0,	    18,	    52,	   302,	   243,	  2188,	  -825,	 14194,
	 17420,	 -4450,	  2484,	  -347,	   468,	     7,	    35,	    -3,
	     0,	    19,	    50,	   320,	   199,	  2249,	 -1133,	 14583,
	 17173,	 -4039,	  2492,	  -274,	   455,	    14,	    33,	    -2,
	     0,	    19,	    50,	   320,	   199,	  2249,	 -1133,	 14583,
	 17173,	 -4039,	  2492,	  -274,	   455,	    14,	    33,	    -2,
	    -1,	    21,	    48,	   339,	   152,	  2304,	 -1454,	 14959,
	 16908,	 -3637,	  2490,	  -204,	   440,	    20,	    32,	    -2,
	    -1,	    21,	    48,	   339,	   152,	  2304,	 -1454,	 14959,
	 16908,	 -3637,	  2490,	  -204,	   440,	    20,	    32,	    -2,
	    -1,	    22,	    45,	   357,	   101,	  2354,	 -1788,	 15322,
	 16624,	 -3245,	  2479,	  -137,	   425,	    26,	    30,	    -2,
	    -1,	    22,	    45,	   357,	   101,	  2354,	 -1788,	 15322,
	 16624,	 -3245,	  2479,	  -137,	   425,	    26,	    30,	    -2,
	    -1,	    24,	    41,	   374,	    47,	  2396,	 -2135,	 15671,
	 16323,	 -2864,	  2460,	   -72,	   409,	    31,	    28,	    -2,
	    -1,	    24,	    41,	   374,	    47,	  2396,	 -2135,	 15671,
	 16323,	 -2864,	  2460,	   -72,	   409,	    31,	    28,	    -2,
	    -1,	    26,	    37,	   391,	   -11,	  2431,	 -2493,	 16004,
	 16005,	 -2494,	  2432,	   -12,	   392,	    36,	    26,	    -2,
	    -1,	    26,	    37,	   391,	   -11,	  2431,	 -2493,	 16004,
	 16005,	 -2494,	  2432,	   -12,	   392,	    36,	    26,	    -2,
	    -2,	   -28,	    31,	  -409,	   -72,	 -2460,	 -2864,	-16323,
	 15671,	  2135,	  2396,	   -47,	   374,	   -41,	    24,	     1,
	    -2,	   -28,	    31,	  -409,	   -72,	 -2460,	 -2864,	-16323,
	 15671,	  2135,	  2396,	   -47,	   374,	   -41,	    24,	     1,
	    -2,	   -30,	    26,	  -425,	  -137,	 -2479,	 -3245,	-16624,
	 15322,	  1788,	  2354,	  -101,	   357,	   -45,	    22,	     1,
	    -2,	   -30,	    26,	  -425,	  -137,	 -2479,	 -3245,	-16624,
	 15322,	  1788,	  2354,	  -101,	   357,	   -45,	    22,	     1,
	    -2,	   -32,	    20,	  -440,	  -204,	 -2490,	 -3637,	-16908,
	 14959,	  1454,	  2304,	  -152,	   339,	   -48,	    21,	     1,
	    -2,	   -32,	    20,	  -440,	  -204,	 -2490,	 -3637,	-16908,
	 14959,	  1454,	  2304,	  -152,	   339,	   -48,	    21,	     1,
	    -2,	   -33,	    14,	  -455,	  -274,	 -2492,	 -4039,	-17173,
	 14583,	  1133,	  2249,	  -199,	   320,	   -50,	    19,	     0,
	    -2,	   -33,	    14,	  -455,	  -274,	 -2492,	 -4039,	-17173,
	 14583,	  1133,	  2249,	  -199,	   320,	   -50,	    19,	     0,
	    -3,	   -35,	     7,	  -468,	  -347,	 -2484,	 -4450,	-17420,
	 14194,	   825,	  2188,	  -243,	   302,	   -52,	    18,	     0,
	    -3,	   -35,	     7,	  -468,	  -347,	 -2484,	 -4450,	-17420,
	 14194,	   825,	  2188,	  -243,	   302,	   -52,	    18,	     0,
	    -3,	   -37,	    -1,	  -480,	  -423,	 -2466,	 -4870,	-17648,
	 13794,	   530,	  2122,	  -283,	   284,	   -54,	    17,	     0,
	    -3,	   -37,	    -1,	  -480,	  -423,	 -2466,	 -4870,	-17648,
	 13794,	   530,	  2122,	  -283,	   284,	   -54,	    17,	     0,
	    -3,	   -39,	    -9,	  -491,	  -502,	 -2438,	 -5298,	-17855,
	 13383,	   249,	  2052,	  -320,	   266,	   -56,	    15,	     0,
	    -3,	   -39,	    -9,	  -491,	  -502,	 -2438,	 -5298,	-17855,
	 13383,	   249,	  2052,	  -320,	   266,	   -56,	    15,	     0,
	    -4,	   -41,	   -18,	  -501,	  -583,	 -2398,	 -5733,	-18043,
	 12963,	   -18,	  1977,	  -354,	   247,	   -56,	    14,	     0,
	    -4,	   -41,	   -18,	  -501,	  -583,	 -2398,	 -5733,	-18043,
	 12963,	   -18,	  1977,	  -354,	   247,	   -56,	    14,	     0,
	    -4,	   -43,	   -28,	  -508,	  -666,	 -2348,	 -6174,	-18209,
	 12534,	  -271,	  1899,	  -384,	   229,	   -57,	    13,	     0,
	    -4,	   -43,	   -28,	  -508,	  -666,	 -2348,	 -6174,	-18209,
	 12534,	  -271,	  1899,	  -384,	   229,	   -57,	    13,	     0,
	    -4,	   -44,	   -39,	  -515,	  -751,	 -2285,	 -6621,	-18354,
	 12097,	  -510,	  1817,	  -411,	   212,	   -57,	    12,	     0,
	    -4,	   -44,	   -39,	  -515,	  -751,	 -2285,	 -6621,	-18354,
	 12097,	  -510,	  1817,	  -411,	   212,	   -57,	    12,	     0,
	    -5,	   -46,	   -50,	  -519,	  -838,	 -2210,	 -7073,	-18477,
	 11654,	  -734,	  1733,	  -435,	   194,	   -57,	    11,	     0,
	    -5,	   -46,	   -50,	  -519,	  -838,	 -2210,	 -7073,	-18477,
	 11654,	  -734,	  1733,	  -435,	   194,	   -57,	    11,	     0,
	    -5,	   -48,	   -61,	  -522,	  -927,	 -2123,	 -7528,	-18579,
	 11205,	  -944,	  1647,	  -456,	   177,	   -57,	    10,	     0,
	    -5,	   -48,	   -61,	  -522,	  -927,	 -2123,	 -7528,	-18579,
	 11205,	  -944,	  1647,	  -456,	   177,	   -57,	    10,	     0,
	    -6,	   -49,	   -74,	  -522,	 -1016,	 -2023,	 -7987,	-18658,
	 10751,	 -1141,	  1559,	  -474,	   161,	   -57,	     9,	     0,
	    -6,	   -49,	   -74,	  -522,	 -1016,	 -2023,	 -7987,	-18658,
	 10751,	 -1141,	  1559,	  -474,	   161,	   -57,	     9,	     0,
	    -6,	   -51,	   -87,	  -520,	 -1107,	 -1910,	 -8448,	-18714,
	 10294,	 -1322,	  1469,	  -488,	   145,	   -56,	     8,	     0,
	    -6,	   -51,	   -87,	  -520,	 -1107,	 -1910,	 -8448,	-18714,
	 10294,	 -1322,	  1469,	  -488,	   145,	   -56,	     8,	     0,
	    -7,	   -52,	  -101,	  -516,	 -1197,	 -1784,	 -8910,	-18748,
	  9834,	 -1490,	  1379,	  -500,	   129,	   -55,	     7,	     0,
	    -7,	   -52,	  -101,	  -516,	 -1197,	 -1784,	 -8910,	-18748,
	  9834,	 -1490,	  1379,	  -500,	   129,	   -55,	     7,	     0,
};

int synth_1to1_MMX(real *bandPtr, int channel, short *samples)
{
    static short buffs[2][2][0x110] __attribute__((aligned(8)));
    static int bo = 1;
    short *b0, (*buf)[0x110], *a, *b;
    short* window;
    int bo1, i = 8;

    if (channel == 0) {
	bo = (bo - 1) & 0xf;
	buf = buffs[1];
    } else {
	samples++;
	buf = buffs[0];
    }

    if (bo & 1) {
	b0 = buf[1];
	bo1 = bo + 1;
       	a = buf[0] + bo;
	b = buf[1] + ((bo + 1) & 0xf);
    } else {
	b0 = buf[0];
	bo1 = bo;
	b = buf[0] + bo;
       	a = buf[1] + ((bo + 1) & 0xf);
    }

    dct64_MMX_func(a, b, bandPtr);
    window = mp3lib_decwins + 16 - bo1;
    //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1);
__asm __volatile(
ASMALIGN(4)
".L03:\n\t"
        "movq  (%1),%%mm0\n\t"
        "movq  64(%1),%%mm4\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "pmaddwd 32(%2),%%mm4\n\t"
        "movq  8(%1),%%mm1\n\t"
        "movq  72(%1),%%mm5\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "pmaddwd 40(%2),%%mm5\n\t"
        "movq  16(%1),%%mm2\n\t"
        "movq  80(%1),%%mm6\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "pmaddwd 48(%2),%%mm6\n\t"
        "movq  24(%1),%%mm3\n\t"
        "movq  88(%1),%%mm7\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "pmaddwd 56(%2),%%mm7\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm5,%%mm4\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm6,%%mm4\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "paddd %%mm7,%%mm4\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "movq  %%mm4,%%mm5\n\t"
        "psrlq $32,%%mm1\n\t"
        "psrlq $32,%%mm5\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm5,%%mm4\n\t"
        "psrad $13,%%mm0\n\t"
        "psrad $13,%%mm4\n\t"
        "packssdw %%mm0,%%mm0\n\t"
        "packssdw %%mm4,%%mm4\n\t"

	"movq	(%3), %%mm1\n\t"
	"punpckldq %%mm4, %%mm0\n\t"
	"pand   "MANGLE(one_null)", %%mm1\n\t"
	"pand   "MANGLE(null_one)", %%mm0\n\t"
	"por    %%mm0, %%mm1\n\t"
	"movq   %%mm1,(%3)\n\t"

        "add $64,%2\n\t"
        "add $128,%1\n\t"
        "add $8,%3\n\t"

	"decl %0\n\t"
        "jnz  .L03\n\t"

        "movq  (%1),%%mm0\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "movq  8(%1),%%mm1\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "movq  16(%1),%%mm2\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "movq  24(%1),%%mm3\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "psrlq $32,%%mm1\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "psrad $13,%%mm0\n\t"
        "packssdw %%mm0,%%mm0\n\t"
        "movd %%mm0,%%eax\n\t"
	"movw %%ax, (%3)\n\t"
        "sub $32,%2\n\t"
        "add $64,%1\n\t"
        "add $4,%3\n\t"

        "movl $7,%0\n\t"
ASMALIGN(4)
".L04:\n\t"
        "movq  (%1),%%mm0\n\t"
        "movq  64(%1),%%mm4\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "pmaddwd -32(%2),%%mm4\n\t"
        "movq  8(%1),%%mm1\n\t"
        "movq  72(%1),%%mm5\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "pmaddwd -24(%2),%%mm5\n\t"
        "movq  16(%1),%%mm2\n\t"
        "movq  80(%1),%%mm6\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "pmaddwd -16(%2),%%mm6\n\t"
        "movq  24(%1),%%mm3\n\t"
        "movq  88(%1),%%mm7\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "pmaddwd -8(%2),%%mm7\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm5,%%mm4\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm6,%%mm4\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "paddd %%mm7,%%mm4\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "movq  %%mm4,%%mm5\n\t"
        "psrlq $32,%%mm1\n\t"
        "psrlq $32,%%mm5\n\t"
        "paddd %%mm0,%%mm1\n\t"
        "paddd %%mm4,%%mm5\n\t"
        "psrad $13,%%mm1\n\t"
        "psrad $13,%%mm5\n\t"
        "packssdw %%mm1,%%mm1\n\t"
        "packssdw %%mm5,%%mm5\n\t"
        "psubd %%mm0,%%mm0\n\t"
        "psubd %%mm4,%%mm4\n\t"
        "psubsw %%mm1,%%mm0\n\t"
        "psubsw %%mm5,%%mm4\n\t"

	"movq	(%3), %%mm1\n\t"
	"punpckldq %%mm4, %%mm0\n\t"
	"pand   "MANGLE(one_null)", %%mm1\n\t"
	"pand   "MANGLE(null_one)", %%mm0\n\t"
	"por    %%mm0, %%mm1\n\t"
	"movq   %%mm1,(%3)\n\t"

        "sub $64,%2\n\t"
        "add $128,%1\n\t"
        "add $8,%3\n\t"
        "decl %0\n\t"
	"jnz  .L04\n\t"

        "movq  (%1),%%mm0\n\t"
        "pmaddwd (%2),%%mm0\n\t"
        "movq  8(%1),%%mm1\n\t"
        "pmaddwd 8(%2),%%mm1\n\t"
        "movq  16(%1),%%mm2\n\t"
        "pmaddwd 16(%2),%%mm2\n\t"
        "movq  24(%1),%%mm3\n\t"
        "pmaddwd 24(%2),%%mm3\n\t"
        "paddd %%mm1,%%mm0\n\t"
        "paddd %%mm2,%%mm0\n\t"
        "paddd %%mm3,%%mm0\n\t"
        "movq  %%mm0,%%mm1\n\t"
        "psrlq $32,%%mm1\n\t"
        "paddd %%mm0,%%mm1\n\t"
        "psrad $13,%%mm1\n\t"
        "packssdw %%mm1,%%mm1\n\t"
        "psubd %%mm0,%%mm0\n\t"
        "psubsw %%mm1,%%mm0\n\t"
        "movd %%mm0,%%eax\n\t"
	"movw %%ax,(%3)\n\t"
	"emms\n\t"
	:"+r"(i), "+r"(window), "+r"(b0), "+r"(samples)
	:
	:"memory", "%eax");
    return 0;
}