Mercurial > mplayer.hg
view mp3lib/decode_MMX.c @ 24892:80180dc13565
Change decode_audio() interface
Rewrite decode_audio to better deal with filters that handle input in
large blocks. It now always places output in sh_audio->a_out_buffer
(which was always given as a parameter before) and reallocates the
buffer if needed. After the changes filters can return arbitrarily
large blocks of data without some of it being lost. The new version
also allows simplifying some code.
author | uau |
---|---|
date | Thu, 01 Nov 2007 06:52:19 +0000 |
parents | d986b47f1451 |
children |
line wrap: on
line source
/* * this code comes under GPL * This code was taken from http://www.mpg123.org * See ChangeLog of mpg123-0.59s-pre.1 for detail * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> * * Local ChangeLog: * - Partial loops unrolling and removing MOVW insn from loops */ #include "config.h" #include "mangle.h" #define real float /* ugly - but only way */ extern void (*dct64_MMX_func)(short*, short*, real*); static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL; static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL; unsigned int __attribute__((aligned(16))) costab_mmx[] = { 1056974725, 1057056395, 1057223771, 1057485416, 1057855544, 1058356026, 1059019886, 1059897405, 1061067246, 1062657950, 1064892987, 1066774581, 1069414683, 1073984175, 1079645762, 1092815430, 1057005197, 1057342072, 1058087743, 1059427869, 1061799040, 1065862217, 1071413542, 1084439708, 1057128951, 1058664893, 1063675095, 1076102863, 1057655764, 1067924853, 1060439283, }; /** This array of magic numbers were calculated by the pure function make_decode_tables_MMX(32768), which had been implemented in (deleted since r23383) tabinit_MMX.c. */ static short __attribute__((aligned(8))) mp3lib_decwins[] = { 0, 7, 54, 114, 510, 1288, 1644, 9372, 18760, -9373, 1644, -1289, 510, -115, 54, -8, 0, 7, 54, 114, 510, 1288, 1644, 9372, 18760, -9373, 1644, -1289, 510, -115, 54, -8, 0, 7, 55, 129, 500, 1379, 1490, 9834, 18748, -8910, 1784, -1197, 516, -101, 52, -7, 0, 7, 55, 129, 500, 1379, 1490, 9834, 18748, -8910, 1784, -1197, 516, -101, 52, -7, 0, 8, 56, 145, 488, 1469, 1322, 10294, 18714, -8448, 1910, -1107, 520, -87, 51, -6, 0, 8, 56, 145, 488, 1469, 1322, 10294, 18714, -8448, 1910, -1107, 520, -87, 51, -6, 0, 9, 57, 161, 474, 1559, 1141, 10751, 18658, -7987, 2023, -1016, 522, -74, 49, -6, 0, 9, 57, 161, 474, 1559, 1141, 10751, 18658, -7987, 2023, -1016, 522, -74, 49, -6, 0, 10, 57, 177, 456, 1647, 944, 11205, 18579, -7528, 2123, -927, 522, -61, 48, -5, 0, 10, 57, 177, 456, 1647, 944, 11205, 18579, -7528, 2123, -927, 522, -61, 48, -5, 0, 11, 57, 194, 435, 1733, 734, 11654, 18477, -7073, 2210, -838, 519, -50, 46, -5, 0, 11, 57, 194, 435, 1733, 734, 11654, 18477, -7073, 2210, -838, 519, -50, 46, -5, 0, 12, 57, 212, 411, 1817, 510, 12097, 18354, -6621, 2285, -751, 515, -39, 44, -4, 0, 12, 57, 212, 411, 1817, 510, 12097, 18354, -6621, 2285, -751, 515, -39, 44, -4, 0, 13, 57, 229, 384, 1899, 271, 12534, 18209, -6174, 2348, -666, 508, -28, 43, -4, 0, 13, 57, 229, 384, 1899, 271, 12534, 18209, -6174, 2348, -666, 508, -28, 43, -4, 0, 14, 56, 247, 354, 1977, 18, 12963, 18043, -5733, 2398, -583, 501, -18, 41, -4, 0, 14, 56, 247, 354, 1977, 18, 12963, 18043, -5733, 2398, -583, 501, -18, 41, -4, 0, 15, 56, 266, 320, 2052, -249, 13383, 17855, -5298, 2438, -502, 491, -9, 39, -3, 0, 15, 56, 266, 320, 2052, -249, 13383, 17855, -5298, 2438, -502, 491, -9, 39, -3, 0, 17, 54, 284, 283, 2122, -530, 13794, 17648, -4870, 2466, -423, 480, -1, 37, -3, 0, 17, 54, 284, 283, 2122, -530, 13794, 17648, -4870, 2466, -423, 480, -1, 37, -3, 0, 18, 52, 302, 243, 2188, -825, 14194, 17420, -4450, 2484, -347, 468, 7, 35, -3, 0, 18, 52, 302, 243, 2188, -825, 14194, 17420, -4450, 2484, -347, 468, 7, 35, -3, 0, 19, 50, 320, 199, 2249, -1133, 14583, 17173, -4039, 2492, -274, 455, 14, 33, -2, 0, 19, 50, 320, 199, 2249, -1133, 14583, 17173, -4039, 2492, -274, 455, 14, 33, -2, -1, 21, 48, 339, 152, 2304, -1454, 14959, 16908, -3637, 2490, -204, 440, 20, 32, -2, -1, 21, 48, 339, 152, 2304, -1454, 14959, 16908, -3637, 2490, -204, 440, 20, 32, -2, -1, 22, 45, 357, 101, 2354, -1788, 15322, 16624, -3245, 2479, -137, 425, 26, 30, -2, -1, 22, 45, 357, 101, 2354, -1788, 15322, 16624, -3245, 2479, -137, 425, 26, 30, -2, -1, 24, 41, 374, 47, 2396, -2135, 15671, 16323, -2864, 2460, -72, 409, 31, 28, -2, -1, 24, 41, 374, 47, 2396, -2135, 15671, 16323, -2864, 2460, -72, 409, 31, 28, -2, -1, 26, 37, 391, -11, 2431, -2493, 16004, 16005, -2494, 2432, -12, 392, 36, 26, -2, -1, 26, 37, 391, -11, 2431, -2493, 16004, 16005, -2494, 2432, -12, 392, 36, 26, -2, -2, -28, 31, -409, -72, -2460, -2864, -16323, 15671, 2135, 2396, -47, 374, -41, 24, 1, -2, -28, 31, -409, -72, -2460, -2864, -16323, 15671, 2135, 2396, -47, 374, -41, 24, 1, -2, -30, 26, -425, -137, -2479, -3245, -16624, 15322, 1788, 2354, -101, 357, -45, 22, 1, -2, -30, 26, -425, -137, -2479, -3245, -16624, 15322, 1788, 2354, -101, 357, -45, 22, 1, -2, -32, 20, -440, -204, -2490, -3637, -16908, 14959, 1454, 2304, -152, 339, -48, 21, 1, -2, -32, 20, -440, -204, -2490, -3637, -16908, 14959, 1454, 2304, -152, 339, -48, 21, 1, -2, -33, 14, -455, -274, -2492, -4039, -17173, 14583, 1133, 2249, -199, 320, -50, 19, 0, -2, -33, 14, -455, -274, -2492, -4039, -17173, 14583, 1133, 2249, -199, 320, -50, 19, 0, -3, -35, 7, -468, -347, -2484, -4450, -17420, 14194, 825, 2188, -243, 302, -52, 18, 0, -3, -35, 7, -468, -347, -2484, -4450, -17420, 14194, 825, 2188, -243, 302, -52, 18, 0, -3, -37, -1, -480, -423, -2466, -4870, -17648, 13794, 530, 2122, -283, 284, -54, 17, 0, -3, -37, -1, -480, -423, -2466, -4870, -17648, 13794, 530, 2122, -283, 284, -54, 17, 0, -3, -39, -9, -491, -502, -2438, -5298, -17855, 13383, 249, 2052, -320, 266, -56, 15, 0, -3, -39, -9, -491, -502, -2438, -5298, -17855, 13383, 249, 2052, -320, 266, -56, 15, 0, -4, -41, -18, -501, -583, -2398, -5733, -18043, 12963, -18, 1977, -354, 247, -56, 14, 0, -4, -41, -18, -501, -583, -2398, -5733, -18043, 12963, -18, 1977, -354, 247, -56, 14, 0, -4, -43, -28, -508, -666, -2348, -6174, -18209, 12534, -271, 1899, -384, 229, -57, 13, 0, -4, -43, -28, -508, -666, -2348, -6174, -18209, 12534, -271, 1899, -384, 229, -57, 13, 0, -4, -44, -39, -515, -751, -2285, -6621, -18354, 12097, -510, 1817, -411, 212, -57, 12, 0, -4, -44, -39, -515, -751, -2285, -6621, -18354, 12097, -510, 1817, -411, 212, -57, 12, 0, -5, -46, -50, -519, -838, -2210, -7073, -18477, 11654, -734, 1733, -435, 194, -57, 11, 0, -5, -46, -50, -519, -838, -2210, -7073, -18477, 11654, -734, 1733, -435, 194, -57, 11, 0, -5, -48, -61, -522, -927, -2123, -7528, -18579, 11205, -944, 1647, -456, 177, -57, 10, 0, -5, -48, -61, -522, -927, -2123, -7528, -18579, 11205, -944, 1647, -456, 177, -57, 10, 0, -6, -49, -74, -522, -1016, -2023, -7987, -18658, 10751, -1141, 1559, -474, 161, -57, 9, 0, -6, -49, -74, -522, -1016, -2023, -7987, -18658, 10751, -1141, 1559, -474, 161, -57, 9, 0, -6, -51, -87, -520, -1107, -1910, -8448, -18714, 10294, -1322, 1469, -488, 145, -56, 8, 0, -6, -51, -87, -520, -1107, -1910, -8448, -18714, 10294, -1322, 1469, -488, 145, -56, 8, 0, -7, -52, -101, -516, -1197, -1784, -8910, -18748, 9834, -1490, 1379, -500, 129, -55, 7, 0, -7, -52, -101, -516, -1197, -1784, -8910, -18748, 9834, -1490, 1379, -500, 129, -55, 7, 0, }; int synth_1to1_MMX(real *bandPtr, int channel, short *samples) { static short buffs[2][2][0x110] __attribute__((aligned(8))); static int bo = 1; short *b0, (*buf)[0x110], *a, *b; short* window; int bo1, i = 8; if (channel == 0) { bo = (bo - 1) & 0xf; buf = buffs[1]; } else { samples++; buf = buffs[0]; } if (bo & 1) { b0 = buf[1]; bo1 = bo + 1; a = buf[0] + bo; b = buf[1] + ((bo + 1) & 0xf); } else { b0 = buf[0]; bo1 = bo; b = buf[0] + bo; a = buf[1] + ((bo + 1) & 0xf); } dct64_MMX_func(a, b, bandPtr); window = mp3lib_decwins + 16 - bo1; //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1); __asm __volatile( ASMALIGN(4) ".L03:\n\t" "movq (%1),%%mm0\n\t" "movq 64(%1),%%mm4\n\t" "pmaddwd (%2),%%mm0\n\t" "pmaddwd 32(%2),%%mm4\n\t" "movq 8(%1),%%mm1\n\t" "movq 72(%1),%%mm5\n\t" "pmaddwd 8(%2),%%mm1\n\t" "pmaddwd 40(%2),%%mm5\n\t" "movq 16(%1),%%mm2\n\t" "movq 80(%1),%%mm6\n\t" "pmaddwd 16(%2),%%mm2\n\t" "pmaddwd 48(%2),%%mm6\n\t" "movq 24(%1),%%mm3\n\t" "movq 88(%1),%%mm7\n\t" "pmaddwd 24(%2),%%mm3\n\t" "pmaddwd 56(%2),%%mm7\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm5,%%mm4\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm6,%%mm4\n\t" "paddd %%mm3,%%mm0\n\t" "paddd %%mm7,%%mm4\n\t" "movq %%mm0,%%mm1\n\t" "movq %%mm4,%%mm5\n\t" "psrlq $32,%%mm1\n\t" "psrlq $32,%%mm5\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm5,%%mm4\n\t" "psrad $13,%%mm0\n\t" "psrad $13,%%mm4\n\t" "packssdw %%mm0,%%mm0\n\t" "packssdw %%mm4,%%mm4\n\t" "movq (%3), %%mm1\n\t" "punpckldq %%mm4, %%mm0\n\t" "pand "MANGLE(one_null)", %%mm1\n\t" "pand "MANGLE(null_one)", %%mm0\n\t" "por %%mm0, %%mm1\n\t" "movq %%mm1,(%3)\n\t" "add $64,%2\n\t" "add $128,%1\n\t" "add $8,%3\n\t" "decl %0\n\t" "jnz .L03\n\t" "movq (%1),%%mm0\n\t" "pmaddwd (%2),%%mm0\n\t" "movq 8(%1),%%mm1\n\t" "pmaddwd 8(%2),%%mm1\n\t" "movq 16(%1),%%mm2\n\t" "pmaddwd 16(%2),%%mm2\n\t" "movq 24(%1),%%mm3\n\t" "pmaddwd 24(%2),%%mm3\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm3,%%mm0\n\t" "movq %%mm0,%%mm1\n\t" "psrlq $32,%%mm1\n\t" "paddd %%mm1,%%mm0\n\t" "psrad $13,%%mm0\n\t" "packssdw %%mm0,%%mm0\n\t" "movd %%mm0,%%eax\n\t" "movw %%ax, (%3)\n\t" "sub $32,%2\n\t" "add $64,%1\n\t" "add $4,%3\n\t" "movl $7,%0\n\t" ASMALIGN(4) ".L04:\n\t" "movq (%1),%%mm0\n\t" "movq 64(%1),%%mm4\n\t" "pmaddwd (%2),%%mm0\n\t" "pmaddwd -32(%2),%%mm4\n\t" "movq 8(%1),%%mm1\n\t" "movq 72(%1),%%mm5\n\t" "pmaddwd 8(%2),%%mm1\n\t" "pmaddwd -24(%2),%%mm5\n\t" "movq 16(%1),%%mm2\n\t" "movq 80(%1),%%mm6\n\t" "pmaddwd 16(%2),%%mm2\n\t" "pmaddwd -16(%2),%%mm6\n\t" "movq 24(%1),%%mm3\n\t" "movq 88(%1),%%mm7\n\t" "pmaddwd 24(%2),%%mm3\n\t" "pmaddwd -8(%2),%%mm7\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm5,%%mm4\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm6,%%mm4\n\t" "paddd %%mm3,%%mm0\n\t" "paddd %%mm7,%%mm4\n\t" "movq %%mm0,%%mm1\n\t" "movq %%mm4,%%mm5\n\t" "psrlq $32,%%mm1\n\t" "psrlq $32,%%mm5\n\t" "paddd %%mm0,%%mm1\n\t" "paddd %%mm4,%%mm5\n\t" "psrad $13,%%mm1\n\t" "psrad $13,%%mm5\n\t" "packssdw %%mm1,%%mm1\n\t" "packssdw %%mm5,%%mm5\n\t" "psubd %%mm0,%%mm0\n\t" "psubd %%mm4,%%mm4\n\t" "psubsw %%mm1,%%mm0\n\t" "psubsw %%mm5,%%mm4\n\t" "movq (%3), %%mm1\n\t" "punpckldq %%mm4, %%mm0\n\t" "pand "MANGLE(one_null)", %%mm1\n\t" "pand "MANGLE(null_one)", %%mm0\n\t" "por %%mm0, %%mm1\n\t" "movq %%mm1,(%3)\n\t" "sub $64,%2\n\t" "add $128,%1\n\t" "add $8,%3\n\t" "decl %0\n\t" "jnz .L04\n\t" "movq (%1),%%mm0\n\t" "pmaddwd (%2),%%mm0\n\t" "movq 8(%1),%%mm1\n\t" "pmaddwd 8(%2),%%mm1\n\t" "movq 16(%1),%%mm2\n\t" "pmaddwd 16(%2),%%mm2\n\t" "movq 24(%1),%%mm3\n\t" "pmaddwd 24(%2),%%mm3\n\t" "paddd %%mm1,%%mm0\n\t" "paddd %%mm2,%%mm0\n\t" "paddd %%mm3,%%mm0\n\t" "movq %%mm0,%%mm1\n\t" "psrlq $32,%%mm1\n\t" "paddd %%mm0,%%mm1\n\t" "psrad $13,%%mm1\n\t" "packssdw %%mm1,%%mm1\n\t" "psubd %%mm0,%%mm0\n\t" "psubsw %%mm1,%%mm0\n\t" "movd %%mm0,%%eax\n\t" "movw %%ax,(%3)\n\t" "emms\n\t" :"+r"(i), "+r"(window), "+r"(b0), "+r"(samples) : :"memory", "%eax"); return 0; }