Mercurial > mplayer.hg
view mp3lib/decode_mmx.c @ 30884:53901d222e8a
Announce SMP support for Win32.
Don't hardcode dwNumberOfProcessors=1 for Win32 anymore; the mutex/event code
is still far from perfect, but now good enough that I can't find any codecs
that breaks with this (tested on a quad with various codecs). This tells
codecs they can use more than one core if they want to (some already did, by
launching multiple threads even when told there was only a single core).
author | sesse |
---|---|
date | Wed, 17 Mar 2010 23:33:26 +0000 |
parents | fbb18bfa2dfe |
children | 0ad2da052b2e |
line wrap: on
line source
/* * this code comes under GPL * This code was taken from http://www.mpg123.org * See ChangeLog of mpg123-0.59s-pre.1 for detail * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> * * Local ChangeLog: * - Partial loops unrolling and removing MOVW insn from loops */ #include "config.h" #include "mangle.h" #include "mpg123.h" static const unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL; static const unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL; const unsigned int __attribute__((aligned(16))) costab_mmx[] = { 1056974725, 1057056395, 1057223771, 1057485416, 1057855544, 1058356026, 1059019886, 1059897405, 1061067246, 1062657950, 1064892987, 1066774581, 1069414683, 1073984175, 1079645762, 1092815430, 1057005197, 1057342072, 1058087743, 1059427869, 1061799040, 1065862217, 1071413542, 1084439708, 1057128951, 1058664893, 1063675095, 1076102863, 1057655764, 1067924853, 1060439283, }; /** This array of magic numbers were calculated by the pure function make_decode_tables_MMX(32768), which had been implemented in (deleted since r23383) tabinit_MMX.c. */ static const short __attribute__((aligned(8))) mp3lib_decwins[] = { 0, 7, 54, 114, 510, 1288, 1644, 9372, 18760, -9373, 1644, -1289, 510, -115, 54, -8, 0, 7, 54, 114, 510, 1288, 1644, 9372, 18760, -9373, 1644, -1289, 510, -115, 54, -8, 0, 7, 55, 129, 500, 1379, 1490, 9834, 18748, -8910, 1784, -1197, 516, -101, 52, -7, 0, 7, 55, 129, 500, 1379, 1490, 9834, 18748, -8910, 1784, -1197, 516, -101, 52, -7, 0, 8, 56, 145, 488, 1469, 1322, 10294, 18714, -8448, 1910, -1107, 520, -87, 51, -6, 0, 8, 56, 145, 488, 1469, 1322, 10294, 18714, -8448, 1910, -1107, 520, -87, 51, -6, 0, 9, 57, 161, 474, 1559, 1141, 10751, 18658, -7987, 2023, -1016, 522, -74, 49, -6, 0, 9, 57, 161, 474, 1559, 1141, 10751, 18658, -7987, 2023, -1016, 522, -74, 49, -6, 0, 10, 57, 177, 456, 1647, 944, 11205, 18579, -7528, 2123, -927, 522, -61, 48, -5, 0, 10, 57, 177, 456, 1647, 944, 11205, 18579, -7528, 2123, -927, 522, -61, 48, -5, 0, 11, 57, 194, 435, 1733, 734, 11654, 18477, -7073, 2210, -838, 519, -50, 46, -5, 0, 11, 57, 194, 435, 1733, 734, 11654, 18477, -7073, 2210, -838, 519, -50, 46, -5, 0, 12, 57, 212, 411, 1817, 510, 12097, 18354, -6621, 2285, -751, 515, -39, 44, -4, 0, 12, 57, 212, 411, 1817, 510, 12097, 18354, -6621, 2285, -751, 515, -39, 44, -4, 0, 13, 57, 229, 384, 1899, 271, 12534, 18209, -6174, 2348, -666, 508, -28, 43, -4, 0, 13, 57, 229, 384, 1899, 271, 12534, 18209, -6174, 2348, -666, 508, -28, 43, -4, 0, 14, 56, 247, 354, 1977, 18, 12963, 18043, -5733, 2398, -583, 501, -18, 41, -4, 0, 14, 56, 247, 354, 1977, 18, 12963, 18043, -5733, 2398, -583, 501, -18, 41, -4, 0, 15, 56, 266, 320, 2052, -249, 13383, 17855, -5298, 2438, -502, 491, -9, 39, -3, 0, 15, 56, 266, 320, 2052, -249, 13383, 17855, -5298, 2438, -502, 491, -9, 39, -3, 0, 17, 54, 284, 283, 2122, -530, 13794, 17648, -4870, 2466, -423, 480, -1, 37, -3, 0, 17, 54, 284, 283, 2122, -530, 13794, 17648, -4870, 2466, -423, 480, -1, 37, -3, 0, 18, 52, 302, 243, 2188, -825, 14194, 17420, -4450, 2484, -347, 468, 7, 35, -3, 0, 18, 52, 302, 243, 2188, -825, 14194, 17420, -4450, 2484, -347, 468, 7, 35, -3, 0, 19, 50, 320, 199, 2249, -1133, 14583, 17173, -4039, 2492, -274, 455, 14, 33, -2, 0, 19, 50, 320, 199, 2249, -1133, 14583, 17173, -4039, 2492, -274, 455, 14, 33, -2, -1, 21, 48, 339, 152, 2304, -1454, 14959, 16908, -3637, 2490, -204, 440, 20, 32, -2, -1, 21, 48, 339, 152, 2304, -1454, 14959, 16908, -3637, 2490, -204, 440, 20, 32, -2, -1, 22, 45, 357, 101, 2354, -1788, 15322, 16624, -3245, 2479, -137, 425, 26, 30, -2, -1, 22, 45, 357, 101, 2354, -1788, 15322, 16624, -3245, 2479, -137, 425, 26, 30, -2, -1, 24, 41, 374, 47, 2396, -2135, 15671, 16323, -2864, 2460, -72, 409, 31, 28, -2, -1, 24, 41, 374, 47, 2396, -2135, 15671, 16323, -2864, 2460, -72, 409, 31, 28, -2, -1, 26, 37, 391, -11, 2431, -2493, 16004, 16005, -2494, 2432, -12, 392, 36, 26, -2, -1, 26, 37, 391, -11, 2431, -2493, 16004, 16005, -2494, 2432, -12, 392, 36, 26, -2, -2, -28, 31, -409, -72, -2460, -2864, -16323, 15671, 2135, 2396, -47, 374, -41, 24, 1, -2, -28, 31, -409, -72, -2460, -2864, -16323, 15671, 2135, 2396, -47, 374, -41, 24, 1, -2, -30, 26, -425, -137, -2479, -3245, -16624, 15322, 1788, 2354, -101, 357, -45, 22, 1, -2, -30, 26, -425, -137, -2479, -3245, -16624, 15322, 1788, 2354, -101, 357, -45, 22, 1, -2, -32, 20, -440, -204, -2490, -3637, -16908, 14959, 1454, 2304, -152, 339, -48, 21, 1, -2, -32, 20, -440, -204, -2490, -3637, -16908, 14959, 1454, 2304, -152, 339, -48, 21, 1, -2, -33, 14, -455, -274, -2492, -4039, -17173, 14583, 1133, 2249, -199, 320, -50, 19, 0, -2, -33, 14, -455, -274, -2492, -4039, -17173, 14583, 1133, 2249, -199, 320, -50, 19, 0, -3, -35, 7, -468, -347, -2484, -4450, -17420, 14194, 825, 2188, -243, 302, -52, 18, 0, -3, -35, 7, -468, -347, -2484, -4450, -17420, 14194, 825, 2188, -243, 302, -52, 18, 0, -3, -37, -1, -480, -423, -2466, -4870, -17648, 13794, 530, 2122, -283, 284, -54, 17, 0, -3, -37, -1, -480, -423, -2466, -4870, -17648, 13794, 530, 2122, -283, 284, -54, 17, 0, -3, -39, -9, -491, -502, -2438, -5298, -17855, 13383, 249, 2052, -320, 266, -56, 15, 0, -3, -39, -9, -491, -502, -2438, -5298, -17855, 13383, 249, 2052, -320, 266, -56, 15, 0, -4, -41, -18, -501, -583, -2398, -5733, -18043, 12963, -18, 1977, -354, 247, -56, 14, 0, -4, -41, -18, -501, -583, -2398, -5733, -18043, 12963, -18, 1977, -354, 247, -56, 14, 0, -4, -43, -28, -508, -666, -2348, -6174, -18209, 12534, -271, 1899, -384, 229, -57, 13, 0, -4, -43, -28, -508, -666, -2348, -6174, -18209, 12534, -271, 1899, -384, 229, -57, 13, 0, -4, -44, -39, -515, -751, -2285, -6621, -18354, 12097, -510, 1817, -411, 212, -57, 12, 0, -4, -44, -39, -515, -751, -2285, -6621, -18354, 12097, -510, 1817, -411, 212, -57, 12, 0, -5, -46, -50, -519, -838, -2210, -7073, -18477, 11654, -734, 1733, -435, 194, -57, 11, 0, -5, -46, -50, -519, -838, -2210, -7073, -18477, 11654, -734, 1733, -435, 194, -57, 11, 0, -5, -48, -61, -522, -927, -2123, -7528, -18579, 11205, -944, 1647, -456, 177, -57, 10, 0, -5, -48, -61, -522, -927, -2123, -7528, -18579, 11205, -944, 1647, -456, 177, -57, 10, 0, -6, -49, -74, -522, -1016, -2023, -7987, -18658, 10751, -1141, 1559, -474, 161, -57, 9, 0, -6, -49, -74, -522, -1016, -2023, -7987, -18658, 10751, -1141, 1559, -474, 161, -57, 9, 0, -6, -51, -87, -520, -1107, -1910, -8448, -18714, 10294, -1322, 1469, -488, 145, -56, 8, 0, -6, -51, -87, -520, -1107, -1910, -8448, -18714, 10294, -1322, 1469, -488, 145, -56, 8, 0, -7, -52, -101, -516, -1197, -1784, -8910, -18748, 9834, -1490, 1379, -500, 129, -55, 7, 0, -7, -52, -101, -516, -1197, -1784, -8910, -18748, 9834, -1490, 1379, -500, 129, -55, 7, 0, }; int synth_1to1_MMX(real *bandPtr, int channel, short *samples) { static short buffs[2][2][0x110] __attribute__((aligned(8))); static int bo = 1; short *b0, (*buf)[0x110], *a, *b; const short* window; int bo1, i = 8; if (channel == 0) { bo = (bo - 1) & 0xf; buf = buffs[1]; } else { samples++; buf = buffs[0]; } if (bo & 1) { b0 = buf[1]; bo1 = bo + 1; a = buf[0] + bo; b = buf[1] + ((bo + 1) & 0xf); } else { b0 = buf[0]; bo1 = bo; b = buf[0] + bo; a = buf[1] + ((bo + 1) & 0xf); } dct64_MMX_func(a, b, bandPtr); window = mp3lib_decwins + 16 - bo1; //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1); __asm__ volatile( ASMALIGN(4) "0:\n\t" "movq (%1),%%mm0\n\t" "movq 64(%1),%%mm4\n\t" "pmaddwd (%2),%%mm0\n\t" "pmaddwd 32(%2),%%mm4\n\t" "movq 8(%1),%%mm1\n\t" "movq 72(%1),%%mm5\n\t" "pmaddwd 8(%2),%%mm1\n\t" "pmaddwd 40(%2),%%mm5\n\t" "movq 16(%1),%%mm2\n\t" "movq 80(%1),%%mm6\n\t" "pmaddwd 16(%2),%%mm2\n\t" "pmaddwd 48(%2),%%mm6\n\t" "movq 24(%1),%%mm3\n\t" "movq 88(%1),%%mm7\n\t" "pmaddwd 24(%2),%%mm3\n\t" "pmaddwd 56(%2),%%mm7\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm5,%%mm4\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm6,%%mm4\n\t" "paddd %%mm3,%%mm0\n\t" "paddd %%mm7,%%mm4\n\t" "movq %%mm0,%%mm1\n\t" "movq %%mm4,%%mm5\n\t" "psrlq $32,%%mm1\n\t" "psrlq $32,%%mm5\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm5,%%mm4\n\t" "psrad $13,%%mm0\n\t" "psrad $13,%%mm4\n\t" "packssdw %%mm0,%%mm0\n\t" "packssdw %%mm4,%%mm4\n\t" "movq (%3), %%mm1\n\t" "punpckldq %%mm4, %%mm0\n\t" "pand "MANGLE(one_null)", %%mm1\n\t" "pand "MANGLE(null_one)", %%mm0\n\t" "por %%mm0, %%mm1\n\t" "movq %%mm1,(%3)\n\t" "add $64,%2\n\t" "add $128,%1\n\t" "add $8,%3\n\t" "decl %0\n\t" "jnz 0b\n\t" "movq (%1),%%mm0\n\t" "pmaddwd (%2),%%mm0\n\t" "movq 8(%1),%%mm1\n\t" "pmaddwd 8(%2),%%mm1\n\t" "movq 16(%1),%%mm2\n\t" "pmaddwd 16(%2),%%mm2\n\t" "movq 24(%1),%%mm3\n\t" "pmaddwd 24(%2),%%mm3\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm3,%%mm0\n\t" "movq %%mm0,%%mm1\n\t" "psrlq $32,%%mm1\n\t" "paddd %%mm1,%%mm0\n\t" "psrad $13,%%mm0\n\t" "packssdw %%mm0,%%mm0\n\t" "movd %%mm0,%%eax\n\t" "movw %%ax, (%3)\n\t" "sub $32,%2\n\t" "add $64,%1\n\t" "add $4,%3\n\t" "movl $7,%0\n\t" ASMALIGN(4) "1:\n\t" "movq (%1),%%mm0\n\t" "movq 64(%1),%%mm4\n\t" "pmaddwd (%2),%%mm0\n\t" "pmaddwd -32(%2),%%mm4\n\t" "movq 8(%1),%%mm1\n\t" "movq 72(%1),%%mm5\n\t" "pmaddwd 8(%2),%%mm1\n\t" "pmaddwd -24(%2),%%mm5\n\t" "movq 16(%1),%%mm2\n\t" "movq 80(%1),%%mm6\n\t" "pmaddwd 16(%2),%%mm2\n\t" "pmaddwd -16(%2),%%mm6\n\t" "movq 24(%1),%%mm3\n\t" "movq 88(%1),%%mm7\n\t" "pmaddwd 24(%2),%%mm3\n\t" "pmaddwd -8(%2),%%mm7\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm5,%%mm4\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm6,%%mm4\n\t" "paddd %%mm3,%%mm0\n\t" "paddd %%mm7,%%mm4\n\t" "movq %%mm0,%%mm1\n\t" "movq %%mm4,%%mm5\n\t" "psrlq $32,%%mm1\n\t" "psrlq $32,%%mm5\n\t" "paddd %%mm0,%%mm1\n\t" "paddd %%mm4,%%mm5\n\t" "psrad $13,%%mm1\n\t" "psrad $13,%%mm5\n\t" "packssdw %%mm1,%%mm1\n\t" "packssdw %%mm5,%%mm5\n\t" "psubd %%mm0,%%mm0\n\t" "psubd %%mm4,%%mm4\n\t" "psubsw %%mm1,%%mm0\n\t" "psubsw %%mm5,%%mm4\n\t" "movq (%3), %%mm1\n\t" "punpckldq %%mm4, %%mm0\n\t" "pand "MANGLE(one_null)", %%mm1\n\t" "pand "MANGLE(null_one)", %%mm0\n\t" "por %%mm0, %%mm1\n\t" "movq %%mm1,(%3)\n\t" "sub $64,%2\n\t" "add $128,%1\n\t" "add $8,%3\n\t" "decl %0\n\t" "jnz 1b\n\t" "movq (%1),%%mm0\n\t" "pmaddwd (%2),%%mm0\n\t" "movq 8(%1),%%mm1\n\t" "pmaddwd 8(%2),%%mm1\n\t" "movq 16(%1),%%mm2\n\t" "pmaddwd 16(%2),%%mm2\n\t" "movq 24(%1),%%mm3\n\t" "pmaddwd 24(%2),%%mm3\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm3,%%mm0\n\t" "movq %%mm0,%%mm1\n\t" "psrlq $32,%%mm1\n\t" "paddd %%mm0,%%mm1\n\t" "psrad $13,%%mm1\n\t" "packssdw %%mm1,%%mm1\n\t" "psubd %%mm0,%%mm0\n\t" "psubsw %%mm1,%%mm0\n\t" "movd %%mm0,%%eax\n\t" "movw %%ax,(%3)\n\t" "emms\n\t" :"+r"(i), "+r"(window), "+r"(b0), "+r"(samples) : :"memory", "%eax"); return 0; }