annotate liba52/imdct.c @ 29690:b5e23411a51e

Make -nosub work to disable subtitles even if -slang and dvd streams are used. This still needs some additional checking that subtitle selection via dvdnav works.
author reimar
date Tue, 29 Sep 2009 06:44:10 +0000
parents cd3ae709054f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1 /*
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
2 * imdct.c
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
5 *
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
6 * The ifft algorithms in this file have been largely inspired by Dan
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
7 * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
8 *
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
9 * This file is part of a52dec, a free ATSC A-52 stream decoder.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
10 * See http://liba52.sourceforge.net/ for updates.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
11 *
14991
07f1e7669772 Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents: 12303
diff changeset
12 * Modified for use with MPlayer, changes contained in liba52_changes.diff.
18783
0783dd397f74 CVS --> Subversion in copyright notices
diego
parents: 18721
diff changeset
13 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
14991
07f1e7669772 Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents: 12303
diff changeset
14 * $Id$
07f1e7669772 Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents: 12303
diff changeset
15 *
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
16 * a52dec is free software; you can redistribute it and/or modify
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
17 * it under the terms of the GNU General Public License as published by
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
18 * the Free Software Foundation; either version 2 of the License, or
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
19 * (at your option) any later version.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
20 *
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
21 * a52dec is distributed in the hope that it will be useful,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
24 * GNU General Public License for more details.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
25 *
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
26 * You should have received a copy of the GNU General Public License
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
27 * along with this program; if not, write to the Free Software
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
29 *
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
30 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
3884
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
31 * 3DNOW optimizations from Nick Kurshev <nickols_k@mail.ru>
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
32 * michael did port them from libac3 (untested, perhaps totally broken)
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
33 * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org)
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
34 */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
35
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
36 #include "config.h"
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
37
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
38 #include <math.h>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
39 #include <stdio.h>
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
40 #ifdef LIBA52_DJBFFT
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
41 #include <fftc4.h>
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
42 #endif
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
43 #ifndef M_PI
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
44 #define M_PI 3.1415926535897932384626433832795029
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
45 #endif
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
46 #include <inttypes.h>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
47
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
48 #include "a52.h"
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
49 #include "a52_internal.h"
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
50 #include "mm_accel.h"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3908
diff changeset
51 #include "mangle.h"
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
52
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
53 void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
54
29114
06540eb5ef6a Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents: 28361
diff changeset
55 #if CONFIG_RUNTIME_CPUDETECT
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28326
diff changeset
56 #undef HAVE_AMD3DNOWEXT
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28326
diff changeset
57 #define HAVE_AMD3DNOWEXT 0
3884
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
58 #endif
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
59
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
60 typedef struct complex_s {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
61 sample_t real;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
62 sample_t imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
63 } complex_t;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
64
12303
f881c918739b attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents: 9122
diff changeset
65 static const int pm128[128] attribute_used __attribute__((aligned(16))) =
3884
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
66 {
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
67 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
68 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
69 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
70 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
71 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
72 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
73 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
74 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
75 };
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
76
12303
f881c918739b attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents: 9122
diff changeset
77 static uint8_t attribute_used bit_reverse_512[] = {
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
78 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
79 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
80 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
81 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
82 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
83 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
84 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
85 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
86 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
87 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
88 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
89 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
90 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
91 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
92 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
93 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
94
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
95 static uint8_t fftorder[] = {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
96 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
97 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
98 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180,
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
99 252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172,
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
100 2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178,
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
101 10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90,
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
102 254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174,
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
103 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
104 };
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
105
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
106 static complex_t __attribute__((aligned(16))) buf[128];
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
107
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
108 /* Twiddle factor LUT */
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
109 static complex_t __attribute__((aligned(16))) w_1[1];
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
110 static complex_t __attribute__((aligned(16))) w_2[2];
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
111 static complex_t __attribute__((aligned(16))) w_4[4];
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
112 static complex_t __attribute__((aligned(16))) w_8[8];
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
113 static complex_t __attribute__((aligned(16))) w_16[16];
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
114 static complex_t __attribute__((aligned(16))) w_32[32];
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
115 static complex_t __attribute__((aligned(16))) w_64[64];
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
116 static complex_t __attribute__((aligned(16))) * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64};
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
117
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
118 /* Twiddle factors for IMDCT */
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
119 static sample_t __attribute__((aligned(16))) xcos1[128];
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
120 static sample_t __attribute__((aligned(16))) xsin1[128];
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
121
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
122 #if ARCH_X86 || ARCH_X86_64
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
123 // NOTE: SSE needs 16byte alignment or it will segfault
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
124 //
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
125 static float __attribute__((aligned(16))) sseSinCos1c[256];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
126 static float __attribute__((aligned(16))) sseSinCos1d[256];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
127 static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
128 //static float __attribute__((aligned(16))) sseW0[4];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
129 static float __attribute__((aligned(16))) sseW1[8];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
130 static float __attribute__((aligned(16))) sseW2[16];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
131 static float __attribute__((aligned(16))) sseW3[32];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
132 static float __attribute__((aligned(16))) sseW4[64];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
133 static float __attribute__((aligned(16))) sseW5[128];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
134 static float __attribute__((aligned(16))) sseW6[256];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
135 static float __attribute__((aligned(16))) *sseW[7]=
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
136 {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6};
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
137 static float __attribute__((aligned(16))) sseWindow[512];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
138 #endif
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
139
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
140 /* Root values for IFFT */
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
141 static sample_t roots16[3];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
142 static sample_t roots32[7];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
143 static sample_t roots64[15];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
144 static sample_t roots128[31];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
145
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
146 /* Twiddle factors for IMDCT */
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
147 static complex_t pre1[128];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
148 static complex_t post1[64];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
149 static complex_t pre2[64];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
150 static complex_t post2[32];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
151
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
152 static sample_t a52_imdct_window[256];
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
153
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
154 static void (* ifft128) (complex_t * buf);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
155 static void (* ifft64) (complex_t * buf);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
156
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
157 static inline void ifft2 (complex_t * buf)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
158 {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
159 double r, i;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
160
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
161 r = buf[0].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
162 i = buf[0].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
163 buf[0].real += buf[1].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
164 buf[0].imag += buf[1].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
165 buf[1].real = r - buf[1].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
166 buf[1].imag = i - buf[1].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
167 }
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
168
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
169 static inline void ifft4 (complex_t * buf)
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
170 {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
171 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
172
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
173 tmp1 = buf[0].real + buf[1].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
174 tmp2 = buf[3].real + buf[2].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
175 tmp3 = buf[0].imag + buf[1].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
176 tmp4 = buf[2].imag + buf[3].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
177 tmp5 = buf[0].real - buf[1].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
178 tmp6 = buf[0].imag - buf[1].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
179 tmp7 = buf[2].imag - buf[3].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
180 tmp8 = buf[3].real - buf[2].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
181
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
182 buf[0].real = tmp1 + tmp2;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
183 buf[0].imag = tmp3 + tmp4;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
184 buf[2].real = tmp1 - tmp2;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
185 buf[2].imag = tmp3 - tmp4;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
186 buf[1].real = tmp5 + tmp7;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
187 buf[1].imag = tmp6 + tmp8;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
188 buf[3].real = tmp5 - tmp7;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
189 buf[3].imag = tmp6 - tmp8;
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
190 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
191
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
192 /* the basic split-radix ifft butterfly */
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
193
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
194 #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
195 tmp5 = a2.real * wr + a2.imag * wi; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
196 tmp6 = a2.imag * wr - a2.real * wi; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
197 tmp7 = a3.real * wr - a3.imag * wi; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
198 tmp8 = a3.imag * wr + a3.real * wi; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
199 tmp1 = tmp5 + tmp7; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
200 tmp2 = tmp6 + tmp8; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
201 tmp3 = tmp6 - tmp8; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
202 tmp4 = tmp7 - tmp5; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
203 a2.real = a0.real - tmp1; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
204 a2.imag = a0.imag - tmp2; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
205 a3.real = a1.real - tmp3; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
206 a3.imag = a1.imag - tmp4; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
207 a0.real += tmp1; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
208 a0.imag += tmp2; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
209 a1.real += tmp3; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
210 a1.imag += tmp4; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
211 } while (0)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
212
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
213 /* split-radix ifft butterfly, specialized for wr=1 wi=0 */
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
214
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
215 #define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
216 tmp1 = a2.real + a3.real; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
217 tmp2 = a2.imag + a3.imag; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
218 tmp3 = a2.imag - a3.imag; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
219 tmp4 = a3.real - a2.real; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
220 a2.real = a0.real - tmp1; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
221 a2.imag = a0.imag - tmp2; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
222 a3.real = a1.real - tmp3; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
223 a3.imag = a1.imag - tmp4; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
224 a0.real += tmp1; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
225 a0.imag += tmp2; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
226 a1.real += tmp3; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
227 a1.imag += tmp4; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
228 } while (0)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
229
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
230 /* split-radix ifft butterfly, specialized for wr=wi */
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
231
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
232 #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
233 tmp5 = (a2.real + a2.imag) * w; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
234 tmp6 = (a2.imag - a2.real) * w; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
235 tmp7 = (a3.real - a3.imag) * w; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
236 tmp8 = (a3.imag + a3.real) * w; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
237 tmp1 = tmp5 + tmp7; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
238 tmp2 = tmp6 + tmp8; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
239 tmp3 = tmp6 - tmp8; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
240 tmp4 = tmp7 - tmp5; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
241 a2.real = a0.real - tmp1; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
242 a2.imag = a0.imag - tmp2; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
243 a3.real = a1.real - tmp3; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
244 a3.imag = a1.imag - tmp4; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
245 a0.real += tmp1; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
246 a0.imag += tmp2; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
247 a1.real += tmp3; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
248 a1.imag += tmp4; \
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
249 } while (0)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
250
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
251 static inline void ifft8 (complex_t * buf)
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
252 {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
253 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
254
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
255 ifft4 (buf);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
256 ifft2 (buf + 4);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
257 ifft2 (buf + 6);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
258 BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
259 BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]);
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
260 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
261
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
262 static void ifft_pass (complex_t * buf, sample_t * weight, int n)
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
263 {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
264 complex_t * buf1;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
265 complex_t * buf2;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
266 complex_t * buf3;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
267 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
8254
772d6d27fd66 warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents: 4497
diff changeset
268 int i;
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
269
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
270 buf++;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
271 buf1 = buf + n;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
272 buf2 = buf + 2 * n;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
273 buf3 = buf + 3 * n;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
274
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
275 BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
276
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
277 i = n - 1;
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
278
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
279 do {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
280 BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], weight[n], weight[2*i]);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
281 buf++;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
282 buf1++;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
283 buf2++;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
284 buf3++;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
285 weight++;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
286 } while (--i);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
287 }
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
288
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
289 static void ifft16 (complex_t * buf)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
290 {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
291 ifft8 (buf);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
292 ifft4 (buf + 8);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
293 ifft4 (buf + 12);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
294 ifft_pass (buf, roots16 - 4, 4);
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
295 }
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
296
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
297 static void ifft32 (complex_t * buf)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
298 {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
299 ifft16 (buf);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
300 ifft8 (buf + 16);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
301 ifft8 (buf + 24);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
302 ifft_pass (buf, roots32 - 8, 8);
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
303 }
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
304
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
305 static void ifft64_c (complex_t * buf)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
306 {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
307 ifft32 (buf);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
308 ifft16 (buf + 32);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
309 ifft16 (buf + 48);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
310 ifft_pass (buf, roots64 - 16, 16);
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
311 }
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
312
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
313 static void ifft128_c (complex_t * buf)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
314 {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
315 ifft32 (buf);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
316 ifft16 (buf + 32);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
317 ifft16 (buf + 48);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
318 ifft_pass (buf, roots64 - 16, 16);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
319
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
320 ifft32 (buf + 64);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
321 ifft32 (buf + 96);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
322 ifft_pass (buf, roots128 - 32, 32);
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
323 }
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
324
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
325 void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
326 {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
327 int i, k;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
328 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
329 const sample_t * window = a52_imdct_window;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
330 complex_t buf[128];
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
331
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
332 for (i = 0; i < 128; i++) {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
333 k = fftorder[i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
334 t_r = pre1[i].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
335 t_i = pre1[i].imag;
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
336
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
337 buf[i].real = t_i * data[255-k] + t_r * data[k];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
338 buf[i].imag = t_r * data[255-k] - t_i * data[k];
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
339 }
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
340
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
341 ifft128 (buf);
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
342
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
343 /* Post IFFT complex multiply plus IFFT complex conjugate*/
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
344 /* Window and convert to real valued signal */
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
345 for (i = 0; i < 64; i++) {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
346 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
347 t_r = post1[i].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
348 t_i = post1[i].imag;
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
349
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
350 a_r = t_r * buf[i].real + t_i * buf[i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
351 a_i = t_i * buf[i].real - t_r * buf[i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
352 b_r = t_i * buf[127-i].real + t_r * buf[127-i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
353 b_i = t_r * buf[127-i].real - t_i * buf[127-i].imag;
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
354
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
355 w_1 = window[2*i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
356 w_2 = window[255-2*i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
357 data[2*i] = delay[2*i] * w_2 - a_r * w_1 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
358 data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
359 delay[2*i] = a_i;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
360
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
361 w_1 = window[2*i+1];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
362 w_2 = window[254-2*i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
363 data[2*i+1] = delay[2*i+1] * w_2 + b_r * w_1 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
364 data[254-2*i] = delay[2*i+1] * w_1 - b_r * w_2 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
365 delay[2*i+1] = b_i;
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
366 }
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
367 }
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
368
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
369 #if HAVE_ALTIVEC
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
370
25328
6f0309e575e0 There is a check for altivec.h in configure so use the preprocessor directive
diego
parents: 25327
diff changeset
371 #ifdef HAVE_ALTIVEC_H
9122
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9001
diff changeset
372 #include <altivec.h>
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9001
diff changeset
373 #endif
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9001
diff changeset
374
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
375 // used to build registers permutation vectors (vcprm)
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
376 // the 's' are for words in the _s_econd vector
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
377 #define WORD_0 0x00,0x01,0x02,0x03
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
378 #define WORD_1 0x04,0x05,0x06,0x07
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
379 #define WORD_2 0x08,0x09,0x0a,0x0b
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
380 #define WORD_3 0x0c,0x0d,0x0e,0x0f
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
381 #define WORD_s0 0x10,0x11,0x12,0x13
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
382 #define WORD_s1 0x14,0x15,0x16,0x17
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
383 #define WORD_s2 0x18,0x19,0x1a,0x1b
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
384 #define WORD_s3 0x1c,0x1d,0x1e,0x1f
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
385
27318
bb5ed9aa34fc Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 26895
diff changeset
386 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
bb5ed9aa34fc Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 26895
diff changeset
387 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
388
27318
bb5ed9aa34fc Remove AltiVec vector declaration compiler compatibility macros.
diego
parents: 26895
diff changeset
389 #define FOUROF(a) {a,a,a,a}
25995
236ab58453f7 Refactor AltiVec macros as done for FFmpeg.
diego
parents: 25328
diff changeset
390
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
391 // vcprmle is used to keep the same index as in the SSE version.
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
392 // it's the same as vcprm, with the index inversed
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
393 // ('le' is Little Endian)
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
394 #define vcprmle(a,b,c,d) vcprm(d,c,b,a)
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
395
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
396 // used to build inverse/identity vectors (vcii)
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
397 // n is _n_egative, p is _p_ositive
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
398 #define FLOAT_n -1.
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
399 #define FLOAT_p 1.
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
400
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
401
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
402 void
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
403 imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
404 {
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
405 int i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
406 int k;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
407 int p,q;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
408 int m;
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
409 long two_m;
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
410 long two_m_plus_one;
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
411
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
412 sample_t tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
413 sample_t tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
414 sample_t tmp_a_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
415 sample_t tmp_a_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
416
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
417 sample_t *data_ptr;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
418 sample_t *delay_ptr;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
419 sample_t *window_ptr;
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
420
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
421 /* 512 IMDCT with source and dest data in 'data' */
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
422
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
423 /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
424 for( i=0; i < 128; i++) {
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
425 /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
426 int j= bit_reverse_512[i];
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
427 buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
428 buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j]));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
429 }
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
430
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
431 /* 1. iteration */
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
432 for(i = 0; i < 128; i += 2) {
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
433 #if 0
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
434 tmp_a_r = buf[i].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
435 tmp_a_i = buf[i].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
436 tmp_b_r = buf[i+1].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
437 tmp_b_i = buf[i+1].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
438 buf[i].real = tmp_a_r + tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
439 buf[i].imag = tmp_a_i + tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
440 buf[i+1].real = tmp_a_r - tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
441 buf[i+1].imag = tmp_a_i - tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
442 #else
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
443 vector float temp, bufv;
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
444
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
445 bufv = vec_ld(i << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
446 temp = vec_perm(bufv, bufv, vcprm(2,3,0,1));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
447 bufv = vec_madd(bufv, vcii(p,p,n,n), temp);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
448 vec_st(bufv, i << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
449 #endif
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
450 }
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
451
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
452 /* 2. iteration */
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
453 // Note w[1]={{1,0}, {0,-1}}
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
454 for(i = 0; i < 128; i += 4) {
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
455 #if 0
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
456 tmp_a_r = buf[i].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
457 tmp_a_i = buf[i].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
458 tmp_b_r = buf[i+2].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
459 tmp_b_i = buf[i+2].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
460 buf[i].real = tmp_a_r + tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
461 buf[i].imag = tmp_a_i + tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
462 buf[i+2].real = tmp_a_r - tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
463 buf[i+2].imag = tmp_a_i - tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
464 tmp_a_r = buf[i+1].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
465 tmp_a_i = buf[i+1].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
466 /* WARNING: im <-> re here ! */
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
467 tmp_b_r = buf[i+3].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
468 tmp_b_i = buf[i+3].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
469 buf[i+1].real = tmp_a_r + tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
470 buf[i+1].imag = tmp_a_i - tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
471 buf[i+3].real = tmp_a_r - tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
472 buf[i+3].imag = tmp_a_i + tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
473 #else
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
474 vector float buf01, buf23, temp1, temp2;
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
475
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
476 buf01 = vec_ld((i + 0) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
477 buf23 = vec_ld((i + 2) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
478 buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
479
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
480 temp1 = vec_madd(buf23, vcii(p,p,p,n), buf01);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
481 temp2 = vec_madd(buf23, vcii(n,n,n,p), buf01);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
482
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
483 vec_st(temp1, (i + 0) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
484 vec_st(temp2, (i + 2) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
485 #endif
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
486 }
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
487
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
488 /* 3. iteration */
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
489 for(i = 0; i < 128; i += 8) {
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
490 #if 0
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
491 tmp_a_r = buf[i].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
492 tmp_a_i = buf[i].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
493 tmp_b_r = buf[i+4].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
494 tmp_b_i = buf[i+4].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
495 buf[i].real = tmp_a_r + tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
496 buf[i].imag = tmp_a_i + tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
497 buf[i+4].real = tmp_a_r - tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
498 buf[i+4].imag = tmp_a_i - tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
499 tmp_a_r = buf[1+i].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
500 tmp_a_i = buf[1+i].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
501 tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
502 tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
503 buf[1+i].real = tmp_a_r + tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
504 buf[1+i].imag = tmp_a_i + tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
505 buf[i+5].real = tmp_a_r - tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
506 buf[i+5].imag = tmp_a_i - tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
507 tmp_a_r = buf[i+2].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
508 tmp_a_i = buf[i+2].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
509 /* WARNING re <-> im & sign */
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
510 tmp_b_r = buf[i+6].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
511 tmp_b_i = - buf[i+6].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
512 buf[i+2].real = tmp_a_r + tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
513 buf[i+2].imag = tmp_a_i + tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
514 buf[i+6].real = tmp_a_r - tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
515 buf[i+6].imag = tmp_a_i - tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
516 tmp_a_r = buf[i+3].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
517 tmp_a_i = buf[i+3].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
518 tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
519 tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
520 buf[i+3].real = tmp_a_r + tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
521 buf[i+3].imag = tmp_a_i + tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
522 buf[i+7].real = tmp_a_r - tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
523 buf[i+7].imag = tmp_a_i - tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
524 #else
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
525 vector float buf01, buf23, buf45, buf67;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
526
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
527 buf01 = vec_ld((i + 0) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
528 buf23 = vec_ld((i + 2) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
529
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
530 tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
531 tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
532 buf[i+5].real = tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
533 buf[i+5].imag = tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
534 tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
535 tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
536 buf[i+7].real = tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
537 buf[i+7].imag = tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
538
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
539 buf23 = vec_ld((i + 2) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
540 buf45 = vec_ld((i + 4) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
541 buf67 = vec_ld((i + 6) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
542 buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3));
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
543
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
544 vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
545 vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
546 vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
547 vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
548 #endif
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
549 }
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
550
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
551 /* 4-7. iterations */
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
552 for (m=3; m < 7; m++) {
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
553 two_m = (1 << m);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
554
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
555 two_m_plus_one = two_m<<1;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
556
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
557 for(i = 0; i < 128; i += two_m_plus_one) {
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
558 for(k = 0; k < two_m; k+=2) {
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
559 #if 0
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
560 int p = k + i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
561 int q = p + two_m;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
562 tmp_a_r = buf[p].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
563 tmp_a_i = buf[p].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
564 tmp_b_r =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
565 buf[q].real * w[m][k].real -
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
566 buf[q].imag * w[m][k].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
567 tmp_b_i =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
568 buf[q].imag * w[m][k].real +
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
569 buf[q].real * w[m][k].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
570 buf[p].real = tmp_a_r + tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
571 buf[p].imag = tmp_a_i + tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
572 buf[q].real = tmp_a_r - tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
573 buf[q].imag = tmp_a_i - tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
574
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
575 tmp_a_r = buf[(p + 1)].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
576 tmp_a_i = buf[(p + 1)].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
577 tmp_b_r =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
578 buf[(q + 1)].real * w[m][(k + 1)].real -
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
579 buf[(q + 1)].imag * w[m][(k + 1)].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
580 tmp_b_i =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
581 buf[(q + 1)].imag * w[m][(k + 1)].real +
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
582 buf[(q + 1)].real * w[m][(k + 1)].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
583 buf[(p + 1)].real = tmp_a_r + tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
584 buf[(p + 1)].imag = tmp_a_i + tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
585 buf[(q + 1)].real = tmp_a_r - tmp_b_r;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
586 buf[(q + 1)].imag = tmp_a_i - tmp_b_i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
587 #else
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
588 int p = k + i;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
589 int q = p + two_m;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
590 vector float vecp, vecq, vecw, temp1, temp2, temp3, temp4;
9122
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9001
diff changeset
591 const vector float vczero = (const vector float)FOUROF(0.);
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
592 // first compute buf[q] and buf[q+1]
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
593 vecq = vec_ld(q << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
594 vecw = vec_ld(0, (float*)&(w[m][k]));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
595 temp1 = vec_madd(vecq, vecw, vczero);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
596 temp2 = vec_perm(vecq, vecq, vcprm(1,0,3,2));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
597 temp2 = vec_madd(temp2, vecw, vczero);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
598 temp3 = vec_perm(temp1, temp2, vcprm(0,s0,2,s2));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
599 temp4 = vec_perm(temp1, temp2, vcprm(1,s1,3,s3));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
600 vecq = vec_madd(temp4, vcii(n,p,n,p), temp3);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
601 // then butterfly with buf[p] and buf[p+1]
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
602 vecp = vec_ld(p << 3, (float*)buf);
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
603
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
604 temp1 = vec_add(vecp, vecq);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
605 temp2 = vec_sub(vecp, vecq);
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
606
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
607 vec_st(temp1, p << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
608 vec_st(temp2, q << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
609 #endif
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
610 }
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
611 }
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
612 }
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
613
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
614 /* Post IFFT complex multiply plus IFFT complex conjugate*/
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
615 for( i=0; i < 128; i+=4) {
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
616 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
617 #if 0
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
618 tmp_a_r = buf[(i + 0)].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
619 tmp_a_i = -1.0 * buf[(i + 0)].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
620 buf[(i + 0)].real =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
621 (tmp_a_r * xcos1[(i + 0)]) - (tmp_a_i * xsin1[(i + 0)]);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
622 buf[(i + 0)].imag =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
623 (tmp_a_r * xsin1[(i + 0)]) + (tmp_a_i * xcos1[(i + 0)]);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
624
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
625 tmp_a_r = buf[(i + 1)].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
626 tmp_a_i = -1.0 * buf[(i + 1)].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
627 buf[(i + 1)].real =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
628 (tmp_a_r * xcos1[(i + 1)]) - (tmp_a_i * xsin1[(i + 1)]);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
629 buf[(i + 1)].imag =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
630 (tmp_a_r * xsin1[(i + 1)]) + (tmp_a_i * xcos1[(i + 1)]);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
631
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
632 tmp_a_r = buf[(i + 2)].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
633 tmp_a_i = -1.0 * buf[(i + 2)].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
634 buf[(i + 2)].real =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
635 (tmp_a_r * xcos1[(i + 2)]) - (tmp_a_i * xsin1[(i + 2)]);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
636 buf[(i + 2)].imag =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
637 (tmp_a_r * xsin1[(i + 2)]) + (tmp_a_i * xcos1[(i + 2)]);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
638
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
639 tmp_a_r = buf[(i + 3)].real;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
640 tmp_a_i = -1.0 * buf[(i + 3)].imag;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
641 buf[(i + 3)].real =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
642 (tmp_a_r * xcos1[(i + 3)]) - (tmp_a_i * xsin1[(i + 3)]);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
643 buf[(i + 3)].imag =
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
644 (tmp_a_r * xsin1[(i + 3)]) + (tmp_a_i * xcos1[(i + 3)]);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
645 #else
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
646 vector float bufv_0, bufv_2, cosv, sinv, temp1, temp2;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
647 vector float temp0022, temp1133, tempCS01;
9122
5ba896a38d75 The two attached patches *should* allow for proper
arpi
parents: 9001
diff changeset
648 const vector float vczero = (const vector float)FOUROF(0.);
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
649
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
650 bufv_0 = vec_ld((i + 0) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
651 bufv_2 = vec_ld((i + 2) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
652
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
653 cosv = vec_ld(i << 2, xcos1);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
654 sinv = vec_ld(i << 2, xsin1);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
655
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
656 temp0022 = vec_perm(bufv_0, bufv_0, vcprm(0,0,2,2));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
657 temp1133 = vec_perm(bufv_0, bufv_0, vcprm(1,1,3,3));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
658 tempCS01 = vec_perm(cosv, sinv, vcprm(0,s0,1,s1));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
659 temp1 = vec_madd(temp0022, tempCS01, vczero);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
660 tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
661 temp2 = vec_madd(temp1133, tempCS01, vczero);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
662 bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1);
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
663
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
664 vec_st(bufv_0, (i + 0) << 3, (float*)buf);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
665
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
666 /* idem with bufv_2 and high-order cosv/sinv */
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
667
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
668 temp0022 = vec_perm(bufv_2, bufv_2, vcprm(0,0,2,2));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
669 temp1133 = vec_perm(bufv_2, bufv_2, vcprm(1,1,3,3));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
670 tempCS01 = vec_perm(cosv, sinv, vcprm(2,s2,3,s3));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
671 temp1 = vec_madd(temp0022, tempCS01, vczero);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
672 tempCS01 = vec_perm(cosv, sinv, vcprm(s2,2,s3,3));
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
673 temp2 = vec_madd(temp1133, tempCS01, vczero);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
674 bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1);
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
675
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
676 vec_st(bufv_2, (i + 2) << 3, (float*)buf);
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
677
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
678 #endif
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
679 }
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
680
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
681 data_ptr = data;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
682 delay_ptr = delay;
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
683 window_ptr = a52_imdct_window;
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
684
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
685 /* Window and convert to real valued signal */
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
686 for(i=0; i< 64; i++) {
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
687 *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias;
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
688 *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
689 }
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
690
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
691 for(i=0; i< 64; i++) {
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
692 *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias;
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
693 *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
694 }
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
695
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
696 /* The trailing edge of the window goes into the delay line */
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
697 delay_ptr = delay;
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
698
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
699 for(i=0; i< 64; i++) {
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
700 *delay_ptr++ = -buf[64+i].real * *--window_ptr;
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
701 *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
702 }
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
703
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
704 for(i=0; i<64; i++) {
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
705 *delay_ptr++ = buf[i].imag * *--window_ptr;
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
706 *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
707 }
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
708 }
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
709 #endif
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
710
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
711
4497
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
712 // Stuff below this line is borrowed from libac3
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
713 #include "srfftp.h"
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
714 #if ARCH_X86 || ARCH_X86_64
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28326
diff changeset
715 #undef HAVE_AMD3DNOW
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28326
diff changeset
716 #define HAVE_AMD3DNOW 1
3884
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
717 #include "srfftp_3dnow.h"
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
718
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
719 const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }};
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
720 const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }};
3884
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
721 const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 };
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
722
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28326
diff changeset
723 #undef HAVE_AMD3DNOWEXT
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28326
diff changeset
724 #define HAVE_AMD3DNOWEXT 0
4497
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
725 #include "imdct_3dnow.h"
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28326
diff changeset
726 #undef HAVE_AMD3DNOWEXT
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28326
diff changeset
727 #define HAVE_AMD3DNOWEXT 1
4497
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
728 #include "imdct_3dnow.h"
3884
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
729
29601
cd3ae709054f Disable liba52 SSE imdct for x86_64+PIC (e.g. OSX 64 bit) since it will
reimar
parents: 29264
diff changeset
730 #if !ARCH_X86_64 || !defined(PIC)
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
731 void
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
732 imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
733 {
8254
772d6d27fd66 warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents: 4497
diff changeset
734 /* int i,k;
772d6d27fd66 warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents: 4497
diff changeset
735 int p,q;*/
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
736 int m;
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
737 long two_m;
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
738 long two_m_plus_one;
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
739 long two_m_plus_one_shl3;
15617
130dd060f723 one bugfix and a few gcc4 bug workaorunds by (Gianluigi Tiesi: mplayer, netfarm it)
michael
parents: 14991
diff changeset
740 complex_t *buf_offset;
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
741
8254
772d6d27fd66 warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents: 4497
diff changeset
742 /* sample_t tmp_a_i;
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
743 sample_t tmp_a_r;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
744 sample_t tmp_b_i;
8254
772d6d27fd66 warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents: 4497
diff changeset
745 sample_t tmp_b_r;*/
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
746
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
747 sample_t *data_ptr;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
748 sample_t *delay_ptr;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
749 sample_t *window_ptr;
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
750
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
751 /* 512 IMDCT with source and dest data in 'data' */
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
752 /* see the c version (dct_do_512()), its allmost identical, just in C */
3623
3f1c2c06d0d8 adding some comments
michael
parents: 3584
diff changeset
753
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
754 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
755 /* Bit reversed shuffling */
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 27318
diff changeset
756 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
757 "xor %%"REG_S", %%"REG_S" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
758 "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
759 "mov $1008, %%"REG_D" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
760 "push %%"REG_BP" \n\t" //use ebp without telling gcc
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18783
diff changeset
761 ASMALIGN(4)
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
762 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
763 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
764 "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
765 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
766 "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi
3584
7c4046c04be3 removing unnecessary sse sin/cos LUT
michael
parents: 3581
diff changeset
767 "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
768 "movaps "MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm2\n\t"
3584
7c4046c04be3 removing unnecessary sse sin/cos LUT
michael
parents: 3581
diff changeset
769 "mulps %%xmm0, %%xmm2 \n\t"
7c4046c04be3 removing unnecessary sse sin/cos LUT
michael
parents: 3581
diff changeset
770 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
771 "mulps "MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t"
3584
7c4046c04be3 removing unnecessary sse sin/cos LUT
michael
parents: 3581
diff changeset
772 "subps %%xmm0, %%xmm2 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
773 "movzb (%%"REG_a"), %%"REG_d" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
774 "movzb 1(%%"REG_a"), %%"REG_BP" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
775 "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
776 "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
777 "add $16, %%"REG_S" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
778 "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
779 "sub $16, %%"REG_D" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
780 "jnc 1b \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
781 "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g*
16189
72764c0dad8a Fixes segfault on IA-32 machines caused by the ASM patch for AMD-64 for a52.
gpoirier
parents: 16173
diff changeset
782 :: "b" (data), "c" (buf)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
783 : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
784 );
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
785
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
786
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
787 /* FFT Merge */
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
788 /* unoptimized variant
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
789 for (m=1; m < 7; m++) {
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
790 if(m)
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
791 two_m = (1 << m);
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
792 else
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
793 two_m = 1;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
794
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
795 two_m_plus_one = (1 << (m+1));
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
796
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
797 for(i = 0; i < 128; i += two_m_plus_one) {
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
798 for(k = 0; k < two_m; k++) {
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
799 p = k + i;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
800 q = p + two_m;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
801 tmp_a_r = buf[p].real;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
802 tmp_a_i = buf[p].imag;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
803 tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
804 tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
805 buf[p].real = tmp_a_r + tmp_b_r;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
806 buf[p].imag = tmp_a_i + tmp_b_i;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
807 buf[q].real = tmp_a_r - tmp_b_r;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
808 buf[q].imag = tmp_a_i - tmp_b_i;
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
809 }
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
810 }
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
811 }
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
812 */
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
813
3623
3f1c2c06d0d8 adding some comments
michael
parents: 3584
diff changeset
814 /* 1. iteration */
3549
2e21accd86a8 cleanup
michael
parents: 3546
diff changeset
815 // Note w[0][0]={1,0}
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 27318
diff changeset
816 __asm__ volatile(
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
817 "xorps %%xmm1, %%xmm1 \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
818 "xorps %%xmm2, %%xmm2 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
819 "mov %0, %%"REG_S" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18783
diff changeset
820 ASMALIGN(4)
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
821 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
822 "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p]
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
823 "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q]
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
824 "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p]
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
825 "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q]
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
826 "addps %%xmm1, %%xmm0 \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
827 "subps %%xmm2, %%xmm0 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
828 "movaps %%xmm0, (%%"REG_S")\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
829 "add $16, %%"REG_S" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
830 "cmp %1, %%"REG_S" \n\t"
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
831 " jb 1b \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
832 :: "g" (buf), "r" (buf + 128)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
833 : "%"REG_S
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
834 );
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
835
3623
3f1c2c06d0d8 adding some comments
michael
parents: 3584
diff changeset
836 /* 2. iteration */
3512
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
837 // Note w[1]={{1,0}, {0,-1}}
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 27318
diff changeset
838 __asm__ volatile(
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3908
diff changeset
839 "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
840 "mov %0, %%"REG_S" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18783
diff changeset
841 ASMALIGN(4)
3512
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
842 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
843 "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3
3512
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
844 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
845 "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
846 "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
847 "movaps (%%"REG_S"), %%xmm1 \n\t" //r0,i0,r1,i1
3512
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
848 "addps %%xmm2, %%xmm0 \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
849 "subps %%xmm2, %%xmm1 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
850 "movaps %%xmm0, (%%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
851 "movaps %%xmm1, 16(%%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
852 "add $32, %%"REG_S" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
853 "cmp %1, %%"REG_S" \n\t"
3512
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
854 " jb 1b \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
855 :: "g" (buf), "r" (buf + 128)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
856 : "%"REG_S
3512
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
857 );
3549
2e21accd86a8 cleanup
michael
parents: 3546
diff changeset
858
3623
3f1c2c06d0d8 adding some comments
michael
parents: 3584
diff changeset
859 /* 3. iteration */
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
860 /*
3483390a902b sse opt
michael
parents: 3529
diff changeset
861 Note sseW2+0={1,1,sqrt(2),sqrt(2))
3483390a902b sse opt
michael
parents: 3529
diff changeset
862 Note sseW2+16={0,0,sqrt(2),-sqrt(2))
3483390a902b sse opt
michael
parents: 3529
diff changeset
863 Note sseW2+32={0,0,-sqrt(2),-sqrt(2))
3483390a902b sse opt
michael
parents: 3529
diff changeset
864 Note sseW2+48={1,-1,sqrt(2),-sqrt(2))
3483390a902b sse opt
michael
parents: 3529
diff changeset
865 */
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 27318
diff changeset
866 __asm__ volatile(
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
867 "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t"
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
868 "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t"
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
869 "xorps %%xmm5, %%xmm5 \n\t"
3483390a902b sse opt
michael
parents: 3529
diff changeset
870 "xorps %%xmm2, %%xmm2 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
871 "mov %0, %%"REG_S" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18783
diff changeset
872 ASMALIGN(4)
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
873 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
874 "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
875 "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3908
diff changeset
876 "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3908
diff changeset
877 "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7
3537
d7e5a32643c9 C optimizations
michael
parents: 3534
diff changeset
878 "mulps %%xmm2, %%xmm4 \n\t"
d7e5a32643c9 C optimizations
michael
parents: 3534
diff changeset
879 "mulps %%xmm3, %%xmm5 \n\t"
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
880 "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5
3483390a902b sse opt
michael
parents: 3529
diff changeset
881 "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7
3537
d7e5a32643c9 C optimizations
michael
parents: 3534
diff changeset
882 "mulps %%xmm6, %%xmm3 \n\t"
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
883 "mulps %%xmm7, %%xmm2 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
884 "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
885 "movaps 16(%%"REG_S"), %%xmm1 \n\t" //r2,i2,r3,i3
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
886 "addps %%xmm4, %%xmm2 \n\t"
3483390a902b sse opt
michael
parents: 3529
diff changeset
887 "addps %%xmm5, %%xmm3 \n\t"
3483390a902b sse opt
michael
parents: 3529
diff changeset
888 "movaps %%xmm2, %%xmm4 \n\t"
3483390a902b sse opt
michael
parents: 3529
diff changeset
889 "movaps %%xmm3, %%xmm5 \n\t"
3483390a902b sse opt
michael
parents: 3529
diff changeset
890 "addps %%xmm0, %%xmm2 \n\t"
3483390a902b sse opt
michael
parents: 3529
diff changeset
891 "addps %%xmm1, %%xmm3 \n\t"
3483390a902b sse opt
michael
parents: 3529
diff changeset
892 "subps %%xmm4, %%xmm0 \n\t"
3483390a902b sse opt
michael
parents: 3529
diff changeset
893 "subps %%xmm5, %%xmm1 \n\t"
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
894 "movaps %%xmm2, (%%"REG_S") \n\t"
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
895 "movaps %%xmm3, 16(%%"REG_S") \n\t"
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
896 "movaps %%xmm0, 32(%%"REG_S") \n\t"
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
897 "movaps %%xmm1, 48(%%"REG_S") \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
898 "add $64, %%"REG_S" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
899 "cmp %1, %%"REG_S" \n\t"
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
900 " jb 1b \n\t"
3483390a902b sse opt
michael
parents: 3529
diff changeset
901 :: "g" (buf), "r" (buf + 128)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
902 : "%"REG_S
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
903 );
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
904
3623
3f1c2c06d0d8 adding some comments
michael
parents: 3584
diff changeset
905 /* 4-7. iterations */
3546
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
906 for (m=3; m < 7; m++) {
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
907 two_m = (1 << m);
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
908 two_m_plus_one = two_m<<1;
15617
130dd060f723 one bugfix and a few gcc4 bug workaorunds by (Gianluigi Tiesi: mplayer, netfarm it)
michael
parents: 14991
diff changeset
909 two_m_plus_one_shl3 = (two_m_plus_one<<3);
130dd060f723 one bugfix and a few gcc4 bug workaorunds by (Gianluigi Tiesi: mplayer, netfarm it)
michael
parents: 14991
diff changeset
910 buf_offset = buf+128;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 27318
diff changeset
911 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
912 "mov %0, %%"REG_S" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18783
diff changeset
913 ASMALIGN(4)
3546
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
914 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
915 "xor %%"REG_D", %%"REG_D" \n\t" // k
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
916 "lea (%%"REG_S", %3), %%"REG_d" \n\t"
3546
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
917 "2: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
918 "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
919 "movaps (%4, %%"REG_D", 2), %%xmm2 \n\t"
3546
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
920 "mulps %%xmm1, %%xmm2 \n\t"
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
921 "shufps $0xB1, %%xmm1, %%xmm1 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
922 "mulps 16(%4, %%"REG_D", 2), %%xmm1 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
923 "movaps (%%"REG_S", %%"REG_D"), %%xmm0 \n\t"
3546
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
924 "addps %%xmm2, %%xmm1 \n\t"
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
925 "movaps %%xmm1, %%xmm2 \n\t"
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
926 "addps %%xmm0, %%xmm1 \n\t"
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
927 "subps %%xmm2, %%xmm0 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
928 "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
929 "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
930 "add $16, %%"REG_D" \n\t"
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
931 "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
932 "jb 2b \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
933 "add %2, %%"REG_S" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
934 "cmp %1, %%"REG_S" \n\t"
3546
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
935 " jb 1b \n\t"
15617
130dd060f723 one bugfix and a few gcc4 bug workaorunds by (Gianluigi Tiesi: mplayer, netfarm it)
michael
parents: 14991
diff changeset
936 :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3),
3546
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
937 "r" (sseW[m])
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
938 : "%"REG_S, "%"REG_D, "%"REG_d
3546
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
939 );
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
940 }
4e772a3c6b62 sse opt
michael
parents: 3537
diff changeset
941
3623
3f1c2c06d0d8 adding some comments
michael
parents: 3584
diff changeset
942 /* Post IFFT complex multiply plus IFFT complex conjugate*/
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 27318
diff changeset
943 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
944 "mov $-1024, %%"REG_S" \n\t"
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18783
diff changeset
945 ASMALIGN(4)
3581
8ddf654c4871 sse opt
michael
parents: 3579
diff changeset
946 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
947 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
948 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
3581
8ddf654c4871 sse opt
michael
parents: 3579
diff changeset
949 "shufps $0xB1, %%xmm0, %%xmm0 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
950 "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
951 "mulps 1024+"MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t"
3581
8ddf654c4871 sse opt
michael
parents: 3579
diff changeset
952 "addps %%xmm1, %%xmm0 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
953 "movaps %%xmm0, (%0, %%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
954 "add $16, %%"REG_S" \n\t"
3581
8ddf654c4871 sse opt
michael
parents: 3579
diff changeset
955 " jnz 1b \n\t"
8ddf654c4871 sse opt
michael
parents: 3579
diff changeset
956 :: "r" (buf+128)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
957 : "%"REG_S
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
958 );
3581
8ddf654c4871 sse opt
michael
parents: 3579
diff changeset
959
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
960
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
961 data_ptr = data;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
962 delay_ptr = delay;
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
963 window_ptr = a52_imdct_window;
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
964
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
965 /* Window and convert to real valued signal */
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 27318
diff changeset
966 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
967 "xor %%"REG_D", %%"REG_D" \n\t" // 0
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
968 "xor %%"REG_S", %%"REG_S" \n\t" // 0
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
969 "movss %3, %%xmm2 \n\t" // bias
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
970 "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ...
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18783
diff changeset
971 ASMALIGN(4)
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
972 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
973 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ?
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
974 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ?
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
975 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ?
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
976 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ?
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
977 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
978 "mulps "MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
979 "addps (%2, %%"REG_S"), %%xmm0 \n\t"
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
980 "addps %%xmm2, %%xmm0 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
981 "movaps %%xmm0, (%1, %%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
982 "add $16, %%"REG_S" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
983 "sub $16, %%"REG_D" \n\t"
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
984 "cmp $512, %%"REG_S" \n\t"
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
985 " jb 1b \n\t"
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
986 :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
987 : "%"REG_S, "%"REG_D
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
988 );
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
989 data_ptr+=128;
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
990 delay_ptr+=128;
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
991 // window_ptr+=128;
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
992
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 27318
diff changeset
993 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
994 "mov $1024, %%"REG_D" \n\t" // 512
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
995 "xor %%"REG_S", %%"REG_S" \n\t" // 0
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
996 "movss %3, %%xmm2 \n\t" // bias
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
997 "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ...
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18783
diff changeset
998 ASMALIGN(4)
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
999 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1000 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1001 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1002 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1003 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
1004 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1005 "mulps 512+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1006 "addps (%2, %%"REG_S"), %%xmm0 \n\t"
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
1007 "addps %%xmm2, %%xmm0 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1008 "movaps %%xmm0, (%1, %%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1009 "add $16, %%"REG_S" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1010 "sub $16, %%"REG_D" \n\t"
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1011 "cmp $512, %%"REG_S" \n\t"
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
1012 " jb 1b \n\t"
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
1013 :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1014 : "%"REG_S, "%"REG_D
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
1015 );
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
1016 data_ptr+=128;
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1017 // window_ptr+=128;
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1018
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1019 /* The trailing edge of the window goes into the delay line */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1020 delay_ptr = delay;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1021
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 27318
diff changeset
1022 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1023 "xor %%"REG_D", %%"REG_D" \n\t" // 0
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1024 "xor %%"REG_S", %%"REG_S" \n\t" // 0
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18783
diff changeset
1025 ASMALIGN(4)
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1026 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1027 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1028 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1029 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1030 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1031 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1032 "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1033 "movaps %%xmm0, (%1, %%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1034 "add $16, %%"REG_S" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1035 "sub $16, %%"REG_D" \n\t"
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1036 "cmp $512, %%"REG_S" \n\t"
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1037 " jb 1b \n\t"
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1038 :: "r" (buf+64), "r" (delay_ptr)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1039 : "%"REG_S, "%"REG_D
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1040 );
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1041 delay_ptr+=128;
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1042 // window_ptr-=128;
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1043
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 27318
diff changeset
1044 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1045 "mov $1024, %%"REG_D" \n\t" // 1024
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1046 "xor %%"REG_S", %%"REG_S" \n\t" // 0
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18783
diff changeset
1047 ASMALIGN(4)
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1048 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1049 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ?
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1050 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ?
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1051 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ?
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1052 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ?
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1053 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1054 "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1055 "movaps %%xmm0, (%1, %%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1056 "add $16, %%"REG_S" \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1057 "sub $16, %%"REG_D" \n\t"
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1058 "cmp $512, %%"REG_S" \n\t"
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1059 " jb 1b \n\t"
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1060 :: "r" (buf), "r" (delay_ptr)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1061 : "%"REG_S, "%"REG_D
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1062 );
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1063 }
29601
cd3ae709054f Disable liba52 SSE imdct for x86_64+PIC (e.g. OSX 64 bit) since it will
reimar
parents: 29264
diff changeset
1064 #endif
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1065 #endif // ARCH_X86 || ARCH_X86_64
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1066
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1067 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1068 {
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1069 int i, k;
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1070 sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1071 const sample_t * window = a52_imdct_window;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1072 complex_t buf1[64], buf2[64];
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1073
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1074 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1075 for (i = 0; i < 64; i++) {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1076 k = fftorder[i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1077 t_r = pre2[i].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1078 t_i = pre2[i].imag;
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1079
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1080 buf1[i].real = t_i * data[254-k] + t_r * data[k];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1081 buf1[i].imag = t_r * data[254-k] - t_i * data[k];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1082
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1083 buf2[i].real = t_i * data[255-k] + t_r * data[k+1];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1084 buf2[i].imag = t_r * data[255-k] - t_i * data[k+1];
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1085 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1086
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1087 ifft64 (buf1);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1088 ifft64 (buf2);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1089
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1090 /* Post IFFT complex multiply */
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1091 /* Window and convert to real valued signal */
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1092 for (i = 0; i < 32; i++) {
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1093 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1094 t_r = post2[i].real;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1095 t_i = post2[i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1096
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1097 a_r = t_r * buf1[i].real + t_i * buf1[i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1098 a_i = t_i * buf1[i].real - t_r * buf1[i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1099 b_r = t_i * buf1[63-i].real + t_r * buf1[63-i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1100 b_i = t_r * buf1[63-i].real - t_i * buf1[63-i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1101
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1102 c_r = t_r * buf2[i].real + t_i * buf2[i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1103 c_i = t_i * buf2[i].real - t_r * buf2[i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1104 d_r = t_i * buf2[63-i].real + t_r * buf2[63-i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1105 d_i = t_r * buf2[63-i].real - t_i * buf2[63-i].imag;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1106
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1107 w_1 = window[2*i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1108 w_2 = window[255-2*i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1109 data[2*i] = delay[2*i] * w_2 - a_r * w_1 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1110 data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1111 delay[2*i] = c_i;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1112
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1113 w_1 = window[128+2*i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1114 w_2 = window[127-2*i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1115 data[128+2*i] = delay[127-2*i] * w_2 + a_i * w_1 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1116 data[127-2*i] = delay[127-2*i] * w_1 - a_i * w_2 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1117 delay[127-2*i] = c_r;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1118
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1119 w_1 = window[2*i+1];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1120 w_2 = window[254-2*i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1121 data[2*i+1] = delay[2*i+1] * w_2 - b_i * w_1 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1122 data[254-2*i] = delay[2*i+1] * w_1 + b_i * w_2 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1123 delay[2*i+1] = d_r;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1124
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1125 w_1 = window[129+2*i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1126 w_2 = window[126-2*i];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1127 data[129+2*i] = delay[126-2*i] * w_2 + b_r * w_1 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1128 data[126-2*i] = delay[126-2*i] * w_1 - b_r * w_2 + bias;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1129 delay[126-2*i] = d_i;
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1130 }
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1131 }
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1132
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1133 static double besselI0 (double x)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1134 {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1135 double bessel = 1;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1136 int i = 100;
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1137
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1138 do
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1139 bessel = bessel * x / (i * i) + 1;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1140 while (--i);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1141 return bessel;
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1142 }
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1143
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1144 void a52_imdct_init (uint32_t mm_accel)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1145 {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1146 int i, j, k;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1147 double sum;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1148
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1149 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1150 sum = 0;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1151 for (i = 0; i < 256; i++) {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1152 sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1153 a52_imdct_window[i] = sum;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1154 }
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1155 sum++;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1156 for (i = 0; i < 256; i++)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1157 a52_imdct_window[i] = sqrt (a52_imdct_window[i] / sum);
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1158
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1159 for (i = 0; i < 3; i++)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1160 roots16[i] = cos ((M_PI / 8) * (i + 1));
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1161
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1162 for (i = 0; i < 7; i++)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1163 roots32[i] = cos ((M_PI / 16) * (i + 1));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1164
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1165 for (i = 0; i < 15; i++)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1166 roots64[i] = cos ((M_PI / 32) * (i + 1));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1167
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1168 for (i = 0; i < 31; i++)
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1169 roots128[i] = cos ((M_PI / 64) * (i + 1));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1170
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1171 for (i = 0; i < 64; i++) {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1172 k = fftorder[i] / 2 + 64;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1173 pre1[i].real = cos ((M_PI / 256) * (k - 0.25));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1174 pre1[i].imag = sin ((M_PI / 256) * (k - 0.25));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1175 }
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1176
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1177 for (i = 64; i < 128; i++) {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1178 k = fftorder[i] / 2 + 64;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1179 pre1[i].real = -cos ((M_PI / 256) * (k - 0.25));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1180 pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25));
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1181 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1182
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1183 for (i = 0; i < 64; i++) {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1184 post1[i].real = cos ((M_PI / 256) * (i + 0.5));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1185 post1[i].imag = sin ((M_PI / 256) * (i + 0.5));
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1186 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1187
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1188 for (i = 0; i < 64; i++) {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1189 k = fftorder[i] / 4;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1190 pre2[i].real = cos ((M_PI / 128) * (k - 0.25));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1191 pre2[i].imag = sin ((M_PI / 128) * (k - 0.25));
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1192 }
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1193
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1194 for (i = 0; i < 32; i++) {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1195 post2[i].real = cos ((M_PI / 128) * (i + 0.5));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1196 post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1197 }
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1198 for (i = 0; i < 128; i++) {
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1199 xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1200 xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1201 }
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1202 for (i = 0; i < 7; i++) {
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1203 j = 1 << i;
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1204 for (k = 0; k < j; k++) {
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1205 w[i][k].real = cos (-M_PI * k / j);
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1206 w[i][k].imag = sin (-M_PI * k / j);
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1207 }
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1208 }
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
1209 #if ARCH_X86 || ARCH_X86_64
3527
5a88b21cfe8a sse opt
michael
parents: 3512
diff changeset
1210 for (i = 0; i < 128; i++) {
3581
8ddf654c4871 sse opt
michael
parents: 3579
diff changeset
1211 sseSinCos1c[2*i+0]= xcos1[i];
8ddf654c4871 sse opt
michael
parents: 3579
diff changeset
1212 sseSinCos1c[2*i+1]= -xcos1[i];
8ddf654c4871 sse opt
michael
parents: 3579
diff changeset
1213 sseSinCos1d[2*i+0]= xsin1[i];
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1214 sseSinCos1d[2*i+1]= xsin1[i];
3527
5a88b21cfe8a sse opt
michael
parents: 3512
diff changeset
1215 }
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
1216 for (i = 1; i < 7; i++) {
3483390a902b sse opt
michael
parents: 3529
diff changeset
1217 j = 1 << i;
3483390a902b sse opt
michael
parents: 3529
diff changeset
1218 for (k = 0; k < j; k+=2) {
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1219
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
1220 sseW[i][4*k + 0] = w[i][k+0].real;
3483390a902b sse opt
michael
parents: 3529
diff changeset
1221 sseW[i][4*k + 1] = w[i][k+0].real;
3483390a902b sse opt
michael
parents: 3529
diff changeset
1222 sseW[i][4*k + 2] = w[i][k+1].real;
3483390a902b sse opt
michael
parents: 3529
diff changeset
1223 sseW[i][4*k + 3] = w[i][k+1].real;
3483390a902b sse opt
michael
parents: 3529
diff changeset
1224
3483390a902b sse opt
michael
parents: 3529
diff changeset
1225 sseW[i][4*k + 4] = -w[i][k+0].imag;
3483390a902b sse opt
michael
parents: 3529
diff changeset
1226 sseW[i][4*k + 5] = w[i][k+0].imag;
3483390a902b sse opt
michael
parents: 3529
diff changeset
1227 sseW[i][4*k + 6] = -w[i][k+1].imag;
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1228 sseW[i][4*k + 7] = w[i][k+1].imag;
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1229
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
1230 //we multiply more or less uninitalized numbers so we need to use exactly 0.0
3483390a902b sse opt
michael
parents: 3529
diff changeset
1231 if(k==0)
3483390a902b sse opt
michael
parents: 3529
diff changeset
1232 {
3483390a902b sse opt
michael
parents: 3529
diff changeset
1233 // sseW[i][4*k + 0]= sseW[i][4*k + 1]= 1.0;
3483390a902b sse opt
michael
parents: 3529
diff changeset
1234 sseW[i][4*k + 4]= sseW[i][4*k + 5]= 0.0;
3483390a902b sse opt
michael
parents: 3529
diff changeset
1235 }
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1236
3534
3483390a902b sse opt
michael
parents: 3529
diff changeset
1237 if(2*k == j)
3483390a902b sse opt
michael
parents: 3529
diff changeset
1238 {
3483390a902b sse opt
michael
parents: 3529
diff changeset
1239 sseW[i][4*k + 0]= sseW[i][4*k + 1]= 0.0;
3483390a902b sse opt
michael
parents: 3529
diff changeset
1240 // sseW[i][4*k + 4]= -(sseW[i][4*k + 5]= -1.0);
3483390a902b sse opt
michael
parents: 3529
diff changeset
1241 }
3483390a902b sse opt
michael
parents: 3529
diff changeset
1242 }
3483390a902b sse opt
michael
parents: 3529
diff changeset
1243 }
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
1244
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
1245 for(i=0; i<128; i++)
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
1246 {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1247 sseWindow[2*i+0]= -a52_imdct_window[2*i+0];
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1248 sseWindow[2*i+1]= a52_imdct_window[2*i+1];
3552
9ff2e3801027 sse opt
michael
parents: 3549
diff changeset
1249 }
29264
e83eef58b30a Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents: 29114
diff changeset
1250
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1251 for(i=0; i<64; i++)
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1252 {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1253 sseWindow[256 + 2*i+0]= -a52_imdct_window[254 - 2*i+1];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1254 sseWindow[256 + 2*i+1]= a52_imdct_window[254 - 2*i+0];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1255 sseWindow[384 + 2*i+0]= a52_imdct_window[126 - 2*i+1];
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1256 sseWindow[384 + 2*i+1]= -a52_imdct_window[126 - 2*i+0];
3553
a501627fc6db sse opt
michael
parents: 3552
diff changeset
1257 }
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1258 #endif
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1259 a52_imdct_512 = imdct_do_512;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1260 ifft128 = ifft128_c;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1261 ifft64 = ifft64_c;
3579
831860fada69 runtime cpu detection for the idct
michael
parents: 3553
diff changeset
1262
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
1263 #if ARCH_X86 || ARCH_X86_64
29601
cd3ae709054f Disable liba52 SSE imdct for x86_64+PIC (e.g. OSX 64 bit) since it will
reimar
parents: 29264
diff changeset
1264 #if !ARCH_X86_64 || !defined(PIC)
4497
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1265 if(mm_accel & MM_ACCEL_X86_SSE)
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1266 {
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1267 fprintf (stderr, "Using SSE optimized IMDCT transform\n");
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1268 a52_imdct_512 = imdct_do_512_sse;
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1269 }
4497
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1270 else
29601
cd3ae709054f Disable liba52 SSE imdct for x86_64+PIC (e.g. OSX 64 bit) since it will
reimar
parents: 29264
diff changeset
1271 #endif
4497
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1272 if(mm_accel & MM_ACCEL_X86_3DNOWEXT)
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1273 {
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1274 fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n");
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1275 a52_imdct_512 = imdct_do_512_3dnowex;
4497
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1276 }
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1277 else
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1278 if(mm_accel & MM_ACCEL_X86_3DNOW)
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1279 {
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1280 fprintf (stderr, "Using 3DNow optimized IMDCT transform\n");
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1281 a52_imdct_512 = imdct_do_512_3dnow;
4497
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1282 }
d3aedd7db02c Restore K7 support
nick
parents: 4247
diff changeset
1283 else
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 15617
diff changeset
1284 #endif // ARCH_X86 || ARCH_X86_64
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
1285 #if HAVE_ALTIVEC
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
1286 if (mm_accel & MM_ACCEL_PPC_ALTIVEC)
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
1287 {
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
1288 fprintf(stderr, "Using AltiVec optimized IMDCT transform\n");
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1289 a52_imdct_512 = imdct_do_512_altivec;
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
1290 }
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1291 else
9001
01a9cf43074c An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents: 8451
diff changeset
1292 #endif
3884
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
1293
28361
68723bca30ee Revert mistaken #ifdef --> #if change.
diego
parents: 28335
diff changeset
1294 #ifdef LIBA52_DJBFFT
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1295 if (mm_accel & MM_ACCEL_DJBFFT) {
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1296 fprintf (stderr, "Using djbfft for IMDCT transform\n");
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1297 ifft128 = (void (*) (complex_t *)) fftc4_un128;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1298 ifft64 = (void (*) (complex_t *)) fftc4_un64;
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1299 } else
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1300 #endif
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1301 {
18720
4bad7f00556e sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18104
diff changeset
1302 fprintf (stderr, "No accelerated IMDCT transform found\n");
18721
722ac20fac5f sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents: 18720
diff changeset
1303 }
3884
0410677eda4a ported 3dnow(ex) optimizations from libac3
michael
parents: 3720
diff changeset
1304 }