Mercurial > mplayer.hg
annotate liba52/imdct.c @ 29214:a1abd8d51b81
Change VOFW for x86 to 5120, it allows larger images to be scaled and was
not slower. Other archs are not changed as the larger VOFW was slower on PPC.
author | michael |
---|---|
date | Tue, 05 May 2009 01:34:16 +0000 |
parents | 06540eb5ef6a |
children | e83eef58b30a |
rev | line source |
---|---|
3394 | 1 /* |
2 * imdct.c | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> |
3394 | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
5 * | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
6 * The ifft algorithms in this file have been largely inspired by Dan |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
7 * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
8 * |
3394 | 9 * This file is part of a52dec, a free ATSC A-52 stream decoder. |
10 * See http://liba52.sourceforge.net/ for updates. | |
11 * | |
14991
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12303
diff
changeset
|
12 * Modified for use with MPlayer, changes contained in liba52_changes.diff. |
18783 | 13 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/ |
14991
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12303
diff
changeset
|
14 * $Id$ |
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12303
diff
changeset
|
15 * |
3394 | 16 * a52dec is free software; you can redistribute it and/or modify |
17 * it under the terms of the GNU General Public License as published by | |
18 * the Free Software Foundation; either version 2 of the License, or | |
19 * (at your option) any later version. | |
20 * | |
21 * a52dec is distributed in the hope that it will be useful, | |
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
24 * GNU General Public License for more details. | |
25 * | |
26 * You should have received a copy of the GNU General Public License | |
27 * along with this program; if not, write to the Free Software | |
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
3579 | 29 * |
30 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | |
3884 | 31 * 3DNOW optimizations from Nick Kurshev <nickols_k@mail.ru> |
32 * michael did port them from libac3 (untested, perhaps totally broken) | |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
33 * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org) |
3394 | 34 */ |
35 | |
36 #include "config.h" | |
37 | |
38 #include <math.h> | |
39 #include <stdio.h> | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
40 #ifdef LIBA52_DJBFFT |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
41 #include <fftc4.h> |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
42 #endif |
3394 | 43 #ifndef M_PI |
44 #define M_PI 3.1415926535897932384626433832795029 | |
45 #endif | |
46 #include <inttypes.h> | |
47 | |
48 #include "a52.h" | |
49 #include "a52_internal.h" | |
50 #include "mm_accel.h" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3908
diff
changeset
|
51 #include "mangle.h" |
3394 | 52 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
53 void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
54 |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28361
diff
changeset
|
55 #if CONFIG_RUNTIME_CPUDETECT |
28335 | 56 #undef HAVE_AMD3DNOWEXT |
57 #define HAVE_AMD3DNOWEXT 0 | |
3884 | 58 #endif |
59 | |
3394 | 60 typedef struct complex_s { |
61 sample_t real; | |
62 sample_t imag; | |
63 } complex_t; | |
64 | |
12303
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
9122
diff
changeset
|
65 static const int pm128[128] attribute_used __attribute__((aligned(16))) = |
3884 | 66 { |
67 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, | |
68 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124, | |
69 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122, | |
70 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126, | |
71 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121, | |
72 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, | |
73 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, | |
74 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 | |
75 }; | |
3394 | 76 |
12303
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
9122
diff
changeset
|
77 static uint8_t attribute_used bit_reverse_512[] = { |
3394 | 78 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, |
79 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, | |
80 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, | |
81 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, | |
82 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, | |
83 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, | |
84 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, | |
85 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, | |
86 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, | |
87 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, | |
88 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, | |
89 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, | |
90 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, | |
91 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, | |
92 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, | |
93 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f}; | |
94 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
95 static uint8_t fftorder[] = { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
96 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
97 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
98 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
99 252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
100 2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
101 10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
102 254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174, |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
103 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
104 }; |
3394 | 105 |
3508 | 106 static complex_t __attribute__((aligned(16))) buf[128]; |
3394 | 107 |
108 /* Twiddle factor LUT */ | |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
109 static complex_t __attribute__((aligned(16))) w_1[1]; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
110 static complex_t __attribute__((aligned(16))) w_2[2]; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
111 static complex_t __attribute__((aligned(16))) w_4[4]; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
112 static complex_t __attribute__((aligned(16))) w_8[8]; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
113 static complex_t __attribute__((aligned(16))) w_16[16]; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
114 static complex_t __attribute__((aligned(16))) w_32[32]; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
115 static complex_t __attribute__((aligned(16))) w_64[64]; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
116 static complex_t __attribute__((aligned(16))) * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64}; |
3394 | 117 |
118 /* Twiddle factors for IMDCT */ | |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
119 static sample_t __attribute__((aligned(16))) xcos1[128]; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
120 static sample_t __attribute__((aligned(16))) xsin1[128]; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
121 |
28290 | 122 #if ARCH_X86 || ARCH_X86_64 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
123 // NOTE: SSE needs 16byte alignment or it will segfault |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
124 // |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
125 static float __attribute__((aligned(16))) sseSinCos1c[256]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
126 static float __attribute__((aligned(16))) sseSinCos1d[256]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
127 static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1}; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
128 //static float __attribute__((aligned(16))) sseW0[4]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
129 static float __attribute__((aligned(16))) sseW1[8]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
130 static float __attribute__((aligned(16))) sseW2[16]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
131 static float __attribute__((aligned(16))) sseW3[32]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
132 static float __attribute__((aligned(16))) sseW4[64]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
133 static float __attribute__((aligned(16))) sseW5[128]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
134 static float __attribute__((aligned(16))) sseW6[256]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
135 static float __attribute__((aligned(16))) *sseW[7]= |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
136 {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6}; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
137 static float __attribute__((aligned(16))) sseWindow[512]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
138 #endif |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
139 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
140 /* Root values for IFFT */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
141 static sample_t roots16[3]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
142 static sample_t roots32[7]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
143 static sample_t roots64[15]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
144 static sample_t roots128[31]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
145 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
146 /* Twiddle factors for IMDCT */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
147 static complex_t pre1[128]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
148 static complex_t post1[64]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
149 static complex_t pre2[64]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
150 static complex_t post2[32]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
151 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
152 static sample_t a52_imdct_window[256]; |
3394 | 153 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
154 static void (* ifft128) (complex_t * buf); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
155 static void (* ifft64) (complex_t * buf); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
156 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
157 static inline void ifft2 (complex_t * buf) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
158 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
159 double r, i; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
160 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
161 r = buf[0].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
162 i = buf[0].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
163 buf[0].real += buf[1].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
164 buf[0].imag += buf[1].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
165 buf[1].real = r - buf[1].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
166 buf[1].imag = i - buf[1].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
167 } |
3394 | 168 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
169 static inline void ifft4 (complex_t * buf) |
3394 | 170 { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
171 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
3394 | 172 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
173 tmp1 = buf[0].real + buf[1].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
174 tmp2 = buf[3].real + buf[2].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
175 tmp3 = buf[0].imag + buf[1].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
176 tmp4 = buf[2].imag + buf[3].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
177 tmp5 = buf[0].real - buf[1].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
178 tmp6 = buf[0].imag - buf[1].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
179 tmp7 = buf[2].imag - buf[3].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
180 tmp8 = buf[3].real - buf[2].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
181 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
182 buf[0].real = tmp1 + tmp2; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
183 buf[0].imag = tmp3 + tmp4; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
184 buf[2].real = tmp1 - tmp2; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
185 buf[2].imag = tmp3 - tmp4; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
186 buf[1].real = tmp5 + tmp7; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
187 buf[1].imag = tmp6 + tmp8; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
188 buf[3].real = tmp5 - tmp7; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
189 buf[3].imag = tmp6 - tmp8; |
3394 | 190 } |
191 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
192 /* the basic split-radix ifft butterfly */ |
3394 | 193 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
194 #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
195 tmp5 = a2.real * wr + a2.imag * wi; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
196 tmp6 = a2.imag * wr - a2.real * wi; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
197 tmp7 = a3.real * wr - a3.imag * wi; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
198 tmp8 = a3.imag * wr + a3.real * wi; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
199 tmp1 = tmp5 + tmp7; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
200 tmp2 = tmp6 + tmp8; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
201 tmp3 = tmp6 - tmp8; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
202 tmp4 = tmp7 - tmp5; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
203 a2.real = a0.real - tmp1; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
204 a2.imag = a0.imag - tmp2; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
205 a3.real = a1.real - tmp3; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
206 a3.imag = a1.imag - tmp4; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
207 a0.real += tmp1; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
208 a0.imag += tmp2; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
209 a1.real += tmp3; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
210 a1.imag += tmp4; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
211 } while (0) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
212 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
213 /* split-radix ifft butterfly, specialized for wr=1 wi=0 */ |
3394 | 214 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
215 #define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
216 tmp1 = a2.real + a3.real; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
217 tmp2 = a2.imag + a3.imag; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
218 tmp3 = a2.imag - a3.imag; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
219 tmp4 = a3.real - a2.real; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
220 a2.real = a0.real - tmp1; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
221 a2.imag = a0.imag - tmp2; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
222 a3.real = a1.real - tmp3; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
223 a3.imag = a1.imag - tmp4; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
224 a0.real += tmp1; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
225 a0.imag += tmp2; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
226 a1.real += tmp3; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
227 a1.imag += tmp4; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
228 } while (0) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
229 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
230 /* split-radix ifft butterfly, specialized for wr=wi */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
231 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
232 #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
233 tmp5 = (a2.real + a2.imag) * w; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
234 tmp6 = (a2.imag - a2.real) * w; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
235 tmp7 = (a3.real - a3.imag) * w; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
236 tmp8 = (a3.imag + a3.real) * w; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
237 tmp1 = tmp5 + tmp7; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
238 tmp2 = tmp6 + tmp8; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
239 tmp3 = tmp6 - tmp8; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
240 tmp4 = tmp7 - tmp5; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
241 a2.real = a0.real - tmp1; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
242 a2.imag = a0.imag - tmp2; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
243 a3.real = a1.real - tmp3; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
244 a3.imag = a1.imag - tmp4; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
245 a0.real += tmp1; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
246 a0.imag += tmp2; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
247 a1.real += tmp3; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
248 a1.imag += tmp4; \ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
249 } while (0) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
250 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
251 static inline void ifft8 (complex_t * buf) |
3394 | 252 { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
253 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
3394 | 254 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
255 ifft4 (buf); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
256 ifft2 (buf + 4); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
257 ifft2 (buf + 6); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
258 BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
259 BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]); |
3394 | 260 } |
261 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
262 static void ifft_pass (complex_t * buf, sample_t * weight, int n) |
3394 | 263 { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
264 complex_t * buf1; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
265 complex_t * buf2; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
266 complex_t * buf3; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
267 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
8254
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
4497
diff
changeset
|
268 int i; |
3394 | 269 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
270 buf++; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
271 buf1 = buf + n; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
272 buf2 = buf + 2 * n; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
273 buf3 = buf + 3 * n; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
274 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
275 BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
276 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
277 i = n - 1; |
3394 | 278 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
279 do { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
280 BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], weight[n], weight[2*i]); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
281 buf++; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
282 buf1++; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
283 buf2++; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
284 buf3++; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
285 weight++; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
286 } while (--i); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
287 } |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
288 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
289 static void ifft16 (complex_t * buf) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
290 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
291 ifft8 (buf); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
292 ifft4 (buf + 8); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
293 ifft4 (buf + 12); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
294 ifft_pass (buf, roots16 - 4, 4); |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
295 } |
3394 | 296 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
297 static void ifft32 (complex_t * buf) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
298 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
299 ifft16 (buf); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
300 ifft8 (buf + 16); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
301 ifft8 (buf + 24); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
302 ifft_pass (buf, roots32 - 8, 8); |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
303 } |
3579 | 304 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
305 static void ifft64_c (complex_t * buf) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
306 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
307 ifft32 (buf); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
308 ifft16 (buf + 32); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
309 ifft16 (buf + 48); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
310 ifft_pass (buf, roots64 - 16, 16); |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
311 } |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
312 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
313 static void ifft128_c (complex_t * buf) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
314 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
315 ifft32 (buf); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
316 ifft16 (buf + 32); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
317 ifft16 (buf + 48); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
318 ifft_pass (buf, roots64 - 16, 16); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
319 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
320 ifft32 (buf + 64); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
321 ifft32 (buf + 96); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
322 ifft_pass (buf, roots128 - 32, 32); |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
323 } |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
324 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
325 void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
326 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
327 int i, k; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
328 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
329 const sample_t * window = a52_imdct_window; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
330 complex_t buf[128]; |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
331 |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
332 for (i = 0; i < 128; i++) { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
333 k = fftorder[i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
334 t_r = pre1[i].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
335 t_i = pre1[i].imag; |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
336 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
337 buf[i].real = t_i * data[255-k] + t_r * data[k]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
338 buf[i].imag = t_r * data[255-k] - t_i * data[k]; |
3579 | 339 } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
340 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
341 ifft128 (buf); |
3579 | 342 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
343 /* Post IFFT complex multiply plus IFFT complex conjugate*/ |
3579 | 344 /* Window and convert to real valued signal */ |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
345 for (i = 0; i < 64; i++) { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
346 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
347 t_r = post1[i].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
348 t_i = post1[i].imag; |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
349 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
350 a_r = t_r * buf[i].real + t_i * buf[i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
351 a_i = t_i * buf[i].real - t_r * buf[i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
352 b_r = t_i * buf[127-i].real + t_r * buf[127-i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
353 b_i = t_r * buf[127-i].real - t_i * buf[127-i].imag; |
3579 | 354 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
355 w_1 = window[2*i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
356 w_2 = window[255-2*i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
357 data[2*i] = delay[2*i] * w_2 - a_r * w_1 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
358 data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
359 delay[2*i] = a_i; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
360 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
361 w_1 = window[2*i+1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
362 w_2 = window[254-2*i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
363 data[2*i+1] = delay[2*i+1] * w_2 + b_r * w_1 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
364 data[254-2*i] = delay[2*i+1] * w_1 - b_r * w_2 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
365 delay[2*i+1] = b_i; |
3579 | 366 } |
367 } | |
368 | |
28290 | 369 #if HAVE_ALTIVEC |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
370 |
25328
6f0309e575e0
There is a check for altivec.h in configure so use the preprocessor directive
diego
parents:
25327
diff
changeset
|
371 #ifdef HAVE_ALTIVEC_H |
9122 | 372 #include <altivec.h> |
373 #endif | |
374 | |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
375 // used to build registers permutation vectors (vcprm) |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
376 // the 's' are for words in the _s_econd vector |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
377 #define WORD_0 0x00,0x01,0x02,0x03 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
378 #define WORD_1 0x04,0x05,0x06,0x07 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
379 #define WORD_2 0x08,0x09,0x0a,0x0b |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
380 #define WORD_3 0x0c,0x0d,0x0e,0x0f |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
381 #define WORD_s0 0x10,0x11,0x12,0x13 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
382 #define WORD_s1 0x14,0x15,0x16,0x17 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
383 #define WORD_s2 0x18,0x19,0x1a,0x1b |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
384 #define WORD_s3 0x1c,0x1d,0x1e,0x1f |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
385 |
27318
bb5ed9aa34fc
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
26895
diff
changeset
|
386 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} |
bb5ed9aa34fc
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
26895
diff
changeset
|
387 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
388 |
27318
bb5ed9aa34fc
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
26895
diff
changeset
|
389 #define FOUROF(a) {a,a,a,a} |
25995 | 390 |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
391 // vcprmle is used to keep the same index as in the SSE version. |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
392 // it's the same as vcprm, with the index inversed |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
393 // ('le' is Little Endian) |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
394 #define vcprmle(a,b,c,d) vcprm(d,c,b,a) |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
395 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
396 // used to build inverse/identity vectors (vcii) |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
397 // n is _n_egative, p is _p_ositive |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
398 #define FLOAT_n -1. |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
399 #define FLOAT_p 1. |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
400 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
401 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
402 void |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
403 imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias) |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
404 { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
405 int i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
406 int k; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
407 int p,q; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
408 int m; |
16173 | 409 long two_m; |
410 long two_m_plus_one; | |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
411 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
412 sample_t tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
413 sample_t tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
414 sample_t tmp_a_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
415 sample_t tmp_a_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
416 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
417 sample_t *data_ptr; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
418 sample_t *delay_ptr; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
419 sample_t *window_ptr; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
420 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
421 /* 512 IMDCT with source and dest data in 'data' */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
422 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
423 /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
424 for( i=0; i < 128; i++) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
425 /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
426 int j= bit_reverse_512[i]; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
427 buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
428 buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
429 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
430 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
431 /* 1. iteration */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
432 for(i = 0; i < 128; i += 2) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
433 #if 0 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
434 tmp_a_r = buf[i].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
435 tmp_a_i = buf[i].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
436 tmp_b_r = buf[i+1].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
437 tmp_b_i = buf[i+1].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
438 buf[i].real = tmp_a_r + tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
439 buf[i].imag = tmp_a_i + tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
440 buf[i+1].real = tmp_a_r - tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
441 buf[i+1].imag = tmp_a_i - tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
442 #else |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
443 vector float temp, bufv; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
444 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
445 bufv = vec_ld(i << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
446 temp = vec_perm(bufv, bufv, vcprm(2,3,0,1)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
447 bufv = vec_madd(bufv, vcii(p,p,n,n), temp); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
448 vec_st(bufv, i << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
449 #endif |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
450 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
451 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
452 /* 2. iteration */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
453 // Note w[1]={{1,0}, {0,-1}} |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
454 for(i = 0; i < 128; i += 4) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
455 #if 0 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
456 tmp_a_r = buf[i].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
457 tmp_a_i = buf[i].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
458 tmp_b_r = buf[i+2].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
459 tmp_b_i = buf[i+2].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
460 buf[i].real = tmp_a_r + tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
461 buf[i].imag = tmp_a_i + tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
462 buf[i+2].real = tmp_a_r - tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
463 buf[i+2].imag = tmp_a_i - tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
464 tmp_a_r = buf[i+1].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
465 tmp_a_i = buf[i+1].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
466 /* WARNING: im <-> re here ! */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
467 tmp_b_r = buf[i+3].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
468 tmp_b_i = buf[i+3].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
469 buf[i+1].real = tmp_a_r + tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
470 buf[i+1].imag = tmp_a_i - tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
471 buf[i+3].real = tmp_a_r - tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
472 buf[i+3].imag = tmp_a_i + tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
473 #else |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
474 vector float buf01, buf23, temp1, temp2; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
475 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
476 buf01 = vec_ld((i + 0) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
477 buf23 = vec_ld((i + 2) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
478 buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
479 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
480 temp1 = vec_madd(buf23, vcii(p,p,p,n), buf01); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
481 temp2 = vec_madd(buf23, vcii(n,n,n,p), buf01); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
482 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
483 vec_st(temp1, (i + 0) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
484 vec_st(temp2, (i + 2) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
485 #endif |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
486 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
487 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
488 /* 3. iteration */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
489 for(i = 0; i < 128; i += 8) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
490 #if 0 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
491 tmp_a_r = buf[i].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
492 tmp_a_i = buf[i].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
493 tmp_b_r = buf[i+4].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
494 tmp_b_i = buf[i+4].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
495 buf[i].real = tmp_a_r + tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
496 buf[i].imag = tmp_a_i + tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
497 buf[i+4].real = tmp_a_r - tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
498 buf[i+4].imag = tmp_a_i - tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
499 tmp_a_r = buf[1+i].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
500 tmp_a_i = buf[1+i].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
501 tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
502 tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
503 buf[1+i].real = tmp_a_r + tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
504 buf[1+i].imag = tmp_a_i + tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
505 buf[i+5].real = tmp_a_r - tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
506 buf[i+5].imag = tmp_a_i - tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
507 tmp_a_r = buf[i+2].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
508 tmp_a_i = buf[i+2].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
509 /* WARNING re <-> im & sign */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
510 tmp_b_r = buf[i+6].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
511 tmp_b_i = - buf[i+6].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
512 buf[i+2].real = tmp_a_r + tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
513 buf[i+2].imag = tmp_a_i + tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
514 buf[i+6].real = tmp_a_r - tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
515 buf[i+6].imag = tmp_a_i - tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
516 tmp_a_r = buf[i+3].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
517 tmp_a_i = buf[i+3].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
518 tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
519 tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
520 buf[i+3].real = tmp_a_r + tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
521 buf[i+3].imag = tmp_a_i + tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
522 buf[i+7].real = tmp_a_r - tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
523 buf[i+7].imag = tmp_a_i - tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
524 #else |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
525 vector float buf01, buf23, buf45, buf67; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
526 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
527 buf01 = vec_ld((i + 0) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
528 buf23 = vec_ld((i + 2) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
529 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
530 tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
531 tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
532 buf[i+5].real = tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
533 buf[i+5].imag = tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
534 tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
535 tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
536 buf[i+7].real = tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
537 buf[i+7].imag = tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
538 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
539 buf23 = vec_ld((i + 2) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
540 buf45 = vec_ld((i + 4) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
541 buf67 = vec_ld((i + 6) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
542 buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
543 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
544 vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
545 vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
546 vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
547 vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
548 #endif |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
549 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
550 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
551 /* 4-7. iterations */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
552 for (m=3; m < 7; m++) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
553 two_m = (1 << m); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
554 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
555 two_m_plus_one = two_m<<1; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
556 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
557 for(i = 0; i < 128; i += two_m_plus_one) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
558 for(k = 0; k < two_m; k+=2) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
559 #if 0 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
560 int p = k + i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
561 int q = p + two_m; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
562 tmp_a_r = buf[p].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
563 tmp_a_i = buf[p].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
564 tmp_b_r = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
565 buf[q].real * w[m][k].real - |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
566 buf[q].imag * w[m][k].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
567 tmp_b_i = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
568 buf[q].imag * w[m][k].real + |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
569 buf[q].real * w[m][k].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
570 buf[p].real = tmp_a_r + tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
571 buf[p].imag = tmp_a_i + tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
572 buf[q].real = tmp_a_r - tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
573 buf[q].imag = tmp_a_i - tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
574 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
575 tmp_a_r = buf[(p + 1)].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
576 tmp_a_i = buf[(p + 1)].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
577 tmp_b_r = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
578 buf[(q + 1)].real * w[m][(k + 1)].real - |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
579 buf[(q + 1)].imag * w[m][(k + 1)].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
580 tmp_b_i = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
581 buf[(q + 1)].imag * w[m][(k + 1)].real + |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
582 buf[(q + 1)].real * w[m][(k + 1)].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
583 buf[(p + 1)].real = tmp_a_r + tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
584 buf[(p + 1)].imag = tmp_a_i + tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
585 buf[(q + 1)].real = tmp_a_r - tmp_b_r; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
586 buf[(q + 1)].imag = tmp_a_i - tmp_b_i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
587 #else |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
588 int p = k + i; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
589 int q = p + two_m; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
590 vector float vecp, vecq, vecw, temp1, temp2, temp3, temp4; |
9122 | 591 const vector float vczero = (const vector float)FOUROF(0.); |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
592 // first compute buf[q] and buf[q+1] |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
593 vecq = vec_ld(q << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
594 vecw = vec_ld(0, (float*)&(w[m][k])); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
595 temp1 = vec_madd(vecq, vecw, vczero); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
596 temp2 = vec_perm(vecq, vecq, vcprm(1,0,3,2)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
597 temp2 = vec_madd(temp2, vecw, vczero); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
598 temp3 = vec_perm(temp1, temp2, vcprm(0,s0,2,s2)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
599 temp4 = vec_perm(temp1, temp2, vcprm(1,s1,3,s3)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
600 vecq = vec_madd(temp4, vcii(n,p,n,p), temp3); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
601 // then butterfly with buf[p] and buf[p+1] |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
602 vecp = vec_ld(p << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
603 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
604 temp1 = vec_add(vecp, vecq); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
605 temp2 = vec_sub(vecp, vecq); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
606 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
607 vec_st(temp1, p << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
608 vec_st(temp2, q << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
609 #endif |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
610 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
611 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
612 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
613 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
614 /* Post IFFT complex multiply plus IFFT complex conjugate*/ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
615 for( i=0; i < 128; i+=4) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
616 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
617 #if 0 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
618 tmp_a_r = buf[(i + 0)].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
619 tmp_a_i = -1.0 * buf[(i + 0)].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
620 buf[(i + 0)].real = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
621 (tmp_a_r * xcos1[(i + 0)]) - (tmp_a_i * xsin1[(i + 0)]); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
622 buf[(i + 0)].imag = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
623 (tmp_a_r * xsin1[(i + 0)]) + (tmp_a_i * xcos1[(i + 0)]); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
624 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
625 tmp_a_r = buf[(i + 1)].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
626 tmp_a_i = -1.0 * buf[(i + 1)].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
627 buf[(i + 1)].real = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
628 (tmp_a_r * xcos1[(i + 1)]) - (tmp_a_i * xsin1[(i + 1)]); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
629 buf[(i + 1)].imag = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
630 (tmp_a_r * xsin1[(i + 1)]) + (tmp_a_i * xcos1[(i + 1)]); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
631 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
632 tmp_a_r = buf[(i + 2)].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
633 tmp_a_i = -1.0 * buf[(i + 2)].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
634 buf[(i + 2)].real = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
635 (tmp_a_r * xcos1[(i + 2)]) - (tmp_a_i * xsin1[(i + 2)]); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
636 buf[(i + 2)].imag = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
637 (tmp_a_r * xsin1[(i + 2)]) + (tmp_a_i * xcos1[(i + 2)]); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
638 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
639 tmp_a_r = buf[(i + 3)].real; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
640 tmp_a_i = -1.0 * buf[(i + 3)].imag; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
641 buf[(i + 3)].real = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
642 (tmp_a_r * xcos1[(i + 3)]) - (tmp_a_i * xsin1[(i + 3)]); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
643 buf[(i + 3)].imag = |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
644 (tmp_a_r * xsin1[(i + 3)]) + (tmp_a_i * xcos1[(i + 3)]); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
645 #else |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
646 vector float bufv_0, bufv_2, cosv, sinv, temp1, temp2; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
647 vector float temp0022, temp1133, tempCS01; |
9122 | 648 const vector float vczero = (const vector float)FOUROF(0.); |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
649 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
650 bufv_0 = vec_ld((i + 0) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
651 bufv_2 = vec_ld((i + 2) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
652 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
653 cosv = vec_ld(i << 2, xcos1); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
654 sinv = vec_ld(i << 2, xsin1); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
655 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
656 temp0022 = vec_perm(bufv_0, bufv_0, vcprm(0,0,2,2)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
657 temp1133 = vec_perm(bufv_0, bufv_0, vcprm(1,1,3,3)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
658 tempCS01 = vec_perm(cosv, sinv, vcprm(0,s0,1,s1)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
659 temp1 = vec_madd(temp0022, tempCS01, vczero); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
660 tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
661 temp2 = vec_madd(temp1133, tempCS01, vczero); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
662 bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
663 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
664 vec_st(bufv_0, (i + 0) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
665 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
666 /* idem with bufv_2 and high-order cosv/sinv */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
667 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
668 temp0022 = vec_perm(bufv_2, bufv_2, vcprm(0,0,2,2)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
669 temp1133 = vec_perm(bufv_2, bufv_2, vcprm(1,1,3,3)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
670 tempCS01 = vec_perm(cosv, sinv, vcprm(2,s2,3,s3)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
671 temp1 = vec_madd(temp0022, tempCS01, vczero); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
672 tempCS01 = vec_perm(cosv, sinv, vcprm(s2,2,s3,3)); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
673 temp2 = vec_madd(temp1133, tempCS01, vczero); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
674 bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
675 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
676 vec_st(bufv_2, (i + 2) << 3, (float*)buf); |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
677 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
678 #endif |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
679 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
680 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
681 data_ptr = data; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
682 delay_ptr = delay; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
683 window_ptr = a52_imdct_window; |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
684 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
685 /* Window and convert to real valued signal */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
686 for(i=0; i< 64; i++) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
687 *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
688 *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
689 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
690 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
691 for(i=0; i< 64; i++) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
692 *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
693 *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
694 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
695 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
696 /* The trailing edge of the window goes into the delay line */ |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
697 delay_ptr = delay; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
698 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
699 for(i=0; i< 64; i++) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
700 *delay_ptr++ = -buf[64+i].real * *--window_ptr; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
701 *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
702 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
703 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
704 for(i=0; i<64; i++) { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
705 *delay_ptr++ = buf[i].imag * *--window_ptr; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
706 *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
707 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
708 } |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
709 #endif |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
710 |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
711 |
4497 | 712 // Stuff below this line is borrowed from libac3 |
713 #include "srfftp.h" | |
28290 | 714 #if ARCH_X86 || ARCH_X86_64 |
28335 | 715 #undef HAVE_AMD3DNOW |
716 #define HAVE_AMD3DNOW 1 | |
3884 | 717 #include "srfftp_3dnow.h" |
718 | |
8451 | 719 const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; |
720 const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; | |
3884 | 721 const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 }; |
722 | |
28335 | 723 #undef HAVE_AMD3DNOWEXT |
724 #define HAVE_AMD3DNOWEXT 0 | |
4497 | 725 #include "imdct_3dnow.h" |
28335 | 726 #undef HAVE_AMD3DNOWEXT |
727 #define HAVE_AMD3DNOWEXT 1 | |
4497 | 728 #include "imdct_3dnow.h" |
3884 | 729 |
3579 | 730 void |
731 imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) | |
732 { | |
8254
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
4497
diff
changeset
|
733 /* int i,k; |
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
4497
diff
changeset
|
734 int p,q;*/ |
3579 | 735 int m; |
16173 | 736 long two_m; |
737 long two_m_plus_one; | |
738 long two_m_plus_one_shl3; | |
15617
130dd060f723
one bugfix and a few gcc4 bug workaorunds by (Gianluigi Tiesi: mplayer, netfarm it)
michael
parents:
14991
diff
changeset
|
739 complex_t *buf_offset; |
3579 | 740 |
8254
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
4497
diff
changeset
|
741 /* sample_t tmp_a_i; |
3579 | 742 sample_t tmp_a_r; |
743 sample_t tmp_b_i; | |
8254
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
4497
diff
changeset
|
744 sample_t tmp_b_r;*/ |
3579 | 745 |
746 sample_t *data_ptr; | |
747 sample_t *delay_ptr; | |
748 sample_t *window_ptr; | |
749 | |
750 /* 512 IMDCT with source and dest data in 'data' */ | |
3623 | 751 /* see the c version (dct_do_512()), its allmost identical, just in C */ |
752 | |
3579 | 753 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ |
754 /* Bit reversed shuffling */ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27318
diff
changeset
|
755 __asm__ volatile( |
16173 | 756 "xor %%"REG_S", %%"REG_S" \n\t" |
757 "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" | |
758 "mov $1008, %%"REG_D" \n\t" | |
759 "push %%"REG_BP" \n\t" //use ebp without telling gcc | |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
760 ASMALIGN(4) |
3579 | 761 "1: \n\t" |
16173 | 762 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI |
763 "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI | |
764 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi | |
765 "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi | |
3584 | 766 "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR |
16173 | 767 "movaps "MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm2\n\t" |
3584 | 768 "mulps %%xmm0, %%xmm2 \n\t" |
769 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI | |
16173 | 770 "mulps "MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" |
3584 | 771 "subps %%xmm0, %%xmm2 \n\t" |
16173 | 772 "movzb (%%"REG_a"), %%"REG_d" \n\t" |
773 "movzb 1(%%"REG_a"), %%"REG_BP" \n\t" | |
774 "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t" | |
775 "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t" | |
776 "add $16, %%"REG_S" \n\t" | |
777 "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap | |
778 "sub $16, %%"REG_D" \n\t" | |
779 "jnc 1b \n\t" | |
780 "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g* | |
16189
72764c0dad8a
Fixes segfault on IA-32 machines caused by the ASM patch for AMD-64 for a52.
gpoirier
parents:
16173
diff
changeset
|
781 :: "b" (data), "c" (buf) |
16173 | 782 : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d |
3579 | 783 ); |
784 | |
785 | |
786 /* FFT Merge */ | |
787 /* unoptimized variant | |
788 for (m=1; m < 7; m++) { | |
789 if(m) | |
790 two_m = (1 << m); | |
791 else | |
792 two_m = 1; | |
793 | |
794 two_m_plus_one = (1 << (m+1)); | |
795 | |
796 for(i = 0; i < 128; i += two_m_plus_one) { | |
797 for(k = 0; k < two_m; k++) { | |
798 p = k + i; | |
799 q = p + two_m; | |
800 tmp_a_r = buf[p].real; | |
801 tmp_a_i = buf[p].imag; | |
802 tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag; | |
803 tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag; | |
804 buf[p].real = tmp_a_r + tmp_b_r; | |
805 buf[p].imag = tmp_a_i + tmp_b_i; | |
806 buf[q].real = tmp_a_r - tmp_b_r; | |
807 buf[q].imag = tmp_a_i - tmp_b_i; | |
808 } | |
809 } | |
810 } | |
811 */ | |
812 | |
3623 | 813 /* 1. iteration */ |
3549 | 814 // Note w[0][0]={1,0} |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27318
diff
changeset
|
815 __asm__ volatile( |
3508 | 816 "xorps %%xmm1, %%xmm1 \n\t" |
817 "xorps %%xmm2, %%xmm2 \n\t" | |
16173 | 818 "mov %0, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
819 ASMALIGN(4) |
3508 | 820 "1: \n\t" |
16173 | 821 "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p] |
822 "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q] | |
823 "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p] | |
824 "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q] | |
3508 | 825 "addps %%xmm1, %%xmm0 \n\t" |
826 "subps %%xmm2, %%xmm0 \n\t" | |
16173 | 827 "movaps %%xmm0, (%%"REG_S")\n\t" |
828 "add $16, %%"REG_S" \n\t" | |
829 "cmp %1, %%"REG_S" \n\t" | |
3508 | 830 " jb 1b \n\t" |
831 :: "g" (buf), "r" (buf + 128) | |
16173 | 832 : "%"REG_S |
3508 | 833 ); |
3549 | 834 |
3623 | 835 /* 2. iteration */ |
3512 | 836 // Note w[1]={{1,0}, {0,-1}} |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27318
diff
changeset
|
837 __asm__ volatile( |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3908
diff
changeset
|
838 "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 |
16173 | 839 "mov %0, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
840 ASMALIGN(4) |
3512 | 841 "1: \n\t" |
16173 | 842 "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3 |
3512 | 843 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 |
844 "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3 | |
16173 | 845 "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 |
846 "movaps (%%"REG_S"), %%xmm1 \n\t" //r0,i0,r1,i1 | |
3512 | 847 "addps %%xmm2, %%xmm0 \n\t" |
848 "subps %%xmm2, %%xmm1 \n\t" | |
16173 | 849 "movaps %%xmm0, (%%"REG_S") \n\t" |
850 "movaps %%xmm1, 16(%%"REG_S") \n\t" | |
851 "add $32, %%"REG_S" \n\t" | |
852 "cmp %1, %%"REG_S" \n\t" | |
3512 | 853 " jb 1b \n\t" |
854 :: "g" (buf), "r" (buf + 128) | |
16173 | 855 : "%"REG_S |
3512 | 856 ); |
3549 | 857 |
3623 | 858 /* 3. iteration */ |
3534 | 859 /* |
860 Note sseW2+0={1,1,sqrt(2),sqrt(2)) | |
861 Note sseW2+16={0,0,sqrt(2),-sqrt(2)) | |
862 Note sseW2+32={0,0,-sqrt(2),-sqrt(2)) | |
863 Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) | |
864 */ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27318
diff
changeset
|
865 __asm__ volatile( |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3908
diff
changeset
|
866 "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3908
diff
changeset
|
867 "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" |
3534 | 868 "xorps %%xmm5, %%xmm5 \n\t" |
869 "xorps %%xmm2, %%xmm2 \n\t" | |
16173 | 870 "mov %0, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
871 ASMALIGN(4) |
3534 | 872 "1: \n\t" |
16173 | 873 "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5 |
874 "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7 | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3908
diff
changeset
|
875 "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5 |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3908
diff
changeset
|
876 "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7 |
3537 | 877 "mulps %%xmm2, %%xmm4 \n\t" |
878 "mulps %%xmm3, %%xmm5 \n\t" | |
3534 | 879 "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5 |
880 "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7 | |
3537 | 881 "mulps %%xmm6, %%xmm3 \n\t" |
3534 | 882 "mulps %%xmm7, %%xmm2 \n\t" |
16173 | 883 "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 |
884 "movaps 16(%%"REG_S"), %%xmm1 \n\t" //r2,i2,r3,i3 | |
3534 | 885 "addps %%xmm4, %%xmm2 \n\t" |
886 "addps %%xmm5, %%xmm3 \n\t" | |
887 "movaps %%xmm2, %%xmm4 \n\t" | |
888 "movaps %%xmm3, %%xmm5 \n\t" | |
889 "addps %%xmm0, %%xmm2 \n\t" | |
890 "addps %%xmm1, %%xmm3 \n\t" | |
891 "subps %%xmm4, %%xmm0 \n\t" | |
892 "subps %%xmm5, %%xmm1 \n\t" | |
16173 | 893 "movaps %%xmm2, (%%"REG_S") \n\t" |
894 "movaps %%xmm3, 16(%%"REG_S") \n\t" | |
895 "movaps %%xmm0, 32(%%"REG_S") \n\t" | |
896 "movaps %%xmm1, 48(%%"REG_S") \n\t" | |
897 "add $64, %%"REG_S" \n\t" | |
898 "cmp %1, %%"REG_S" \n\t" | |
3534 | 899 " jb 1b \n\t" |
900 :: "g" (buf), "r" (buf + 128) | |
16173 | 901 : "%"REG_S |
3534 | 902 ); |
3508 | 903 |
3623 | 904 /* 4-7. iterations */ |
3546 | 905 for (m=3; m < 7; m++) { |
906 two_m = (1 << m); | |
907 two_m_plus_one = two_m<<1; | |
15617
130dd060f723
one bugfix and a few gcc4 bug workaorunds by (Gianluigi Tiesi: mplayer, netfarm it)
michael
parents:
14991
diff
changeset
|
908 two_m_plus_one_shl3 = (two_m_plus_one<<3); |
130dd060f723
one bugfix and a few gcc4 bug workaorunds by (Gianluigi Tiesi: mplayer, netfarm it)
michael
parents:
14991
diff
changeset
|
909 buf_offset = buf+128; |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27318
diff
changeset
|
910 __asm__ volatile( |
16173 | 911 "mov %0, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
912 ASMALIGN(4) |
3546 | 913 "1: \n\t" |
16173 | 914 "xor %%"REG_D", %%"REG_D" \n\t" // k |
915 "lea (%%"REG_S", %3), %%"REG_d" \n\t" | |
3546 | 916 "2: \n\t" |
16173 | 917 "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t" |
918 "movaps (%4, %%"REG_D", 2), %%xmm2 \n\t" | |
3546 | 919 "mulps %%xmm1, %%xmm2 \n\t" |
920 "shufps $0xB1, %%xmm1, %%xmm1 \n\t" | |
16173 | 921 "mulps 16(%4, %%"REG_D", 2), %%xmm1 \n\t" |
922 "movaps (%%"REG_S", %%"REG_D"), %%xmm0 \n\t" | |
3546 | 923 "addps %%xmm2, %%xmm1 \n\t" |
924 "movaps %%xmm1, %%xmm2 \n\t" | |
925 "addps %%xmm0, %%xmm1 \n\t" | |
926 "subps %%xmm2, %%xmm0 \n\t" | |
16173 | 927 "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" |
928 "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" | |
929 "add $16, %%"REG_D" \n\t" | |
930 "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 | |
931 "jb 2b \n\t" | |
932 "add %2, %%"REG_S" \n\t" | |
933 "cmp %1, %%"REG_S" \n\t" | |
3546 | 934 " jb 1b \n\t" |
15617
130dd060f723
one bugfix and a few gcc4 bug workaorunds by (Gianluigi Tiesi: mplayer, netfarm it)
michael
parents:
14991
diff
changeset
|
935 :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3), |
3546 | 936 "r" (sseW[m]) |
16173 | 937 : "%"REG_S, "%"REG_D, "%"REG_d |
3546 | 938 ); |
939 } | |
940 | |
3623 | 941 /* Post IFFT complex multiply plus IFFT complex conjugate*/ |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27318
diff
changeset
|
942 __asm__ volatile( |
16173 | 943 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
944 ASMALIGN(4) |
3581 | 945 "1: \n\t" |
16173 | 946 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
947 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | |
3581 | 948 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" |
16173 | 949 "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t" |
950 "mulps 1024+"MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" | |
3581 | 951 "addps %%xmm1, %%xmm0 \n\t" |
16173 | 952 "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
953 "add $16, %%"REG_S" \n\t" | |
3581 | 954 " jnz 1b \n\t" |
955 :: "r" (buf+128) | |
16173 | 956 : "%"REG_S |
3581 | 957 ); |
958 | |
3394 | 959 |
960 data_ptr = data; | |
961 delay_ptr = delay; | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
962 window_ptr = a52_imdct_window; |
3394 | 963 |
964 /* Window and convert to real valued signal */ | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27318
diff
changeset
|
965 __asm__ volatile( |
16173 | 966 "xor %%"REG_D", %%"REG_D" \n\t" // 0 |
967 "xor %%"REG_S", %%"REG_S" \n\t" // 0 | |
3552 | 968 "movss %3, %%xmm2 \n\t" // bias |
969 "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... | |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
970 ASMALIGN(4) |
3552 | 971 "1: \n\t" |
16173 | 972 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? |
973 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? | |
974 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? | |
975 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? | |
3552 | 976 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A |
16173 | 977 "mulps "MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
978 "addps (%2, %%"REG_S"), %%xmm0 \n\t" | |
3552 | 979 "addps %%xmm2, %%xmm0 \n\t" |
16173 | 980 "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
981 "add $16, %%"REG_S" \n\t" | |
982 "sub $16, %%"REG_D" \n\t" | |
983 "cmp $512, %%"REG_S" \n\t" | |
3552 | 984 " jb 1b \n\t" |
985 :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias) | |
16173 | 986 : "%"REG_S, "%"REG_D |
3552 | 987 ); |
988 data_ptr+=128; | |
989 delay_ptr+=128; | |
3553 | 990 // window_ptr+=128; |
3579 | 991 |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27318
diff
changeset
|
992 __asm__ volatile( |
16173 | 993 "mov $1024, %%"REG_D" \n\t" // 512 |
994 "xor %%"REG_S", %%"REG_S" \n\t" // 0 | |
3552 | 995 "movss %3, %%xmm2 \n\t" // bias |
996 "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... | |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
997 ASMALIGN(4) |
3552 | 998 "1: \n\t" |
16173 | 999 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A |
1000 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C | |
1001 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C | |
1002 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A | |
3552 | 1003 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A |
16173 | 1004 "mulps 512+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
1005 "addps (%2, %%"REG_S"), %%xmm0 \n\t" | |
3552 | 1006 "addps %%xmm2, %%xmm0 \n\t" |
16173 | 1007 "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
1008 "add $16, %%"REG_S" \n\t" | |
1009 "sub $16, %%"REG_D" \n\t" | |
1010 "cmp $512, %%"REG_S" \n\t" | |
3552 | 1011 " jb 1b \n\t" |
1012 :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias) | |
16173 | 1013 : "%"REG_S, "%"REG_D |
3552 | 1014 ); |
1015 data_ptr+=128; | |
3553 | 1016 // window_ptr+=128; |
3394 | 1017 |
1018 /* The trailing edge of the window goes into the delay line */ | |
1019 delay_ptr = delay; | |
1020 | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27318
diff
changeset
|
1021 __asm__ volatile( |
16173 | 1022 "xor %%"REG_D", %%"REG_D" \n\t" // 0 |
1023 "xor %%"REG_S", %%"REG_S" \n\t" // 0 | |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1024 ASMALIGN(4) |
3553 | 1025 "1: \n\t" |
16173 | 1026 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A |
1027 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C | |
1028 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C | |
1029 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A | |
3553 | 1030 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A |
16173 | 1031 "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
1032 "movaps %%xmm0, (%1, %%"REG_S") \n\t" | |
1033 "add $16, %%"REG_S" \n\t" | |
1034 "sub $16, %%"REG_D" \n\t" | |
1035 "cmp $512, %%"REG_S" \n\t" | |
3553 | 1036 " jb 1b \n\t" |
1037 :: "r" (buf+64), "r" (delay_ptr) | |
16173 | 1038 : "%"REG_S, "%"REG_D |
3553 | 1039 ); |
1040 delay_ptr+=128; | |
1041 // window_ptr-=128; | |
3579 | 1042 |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27318
diff
changeset
|
1043 __asm__ volatile( |
16173 | 1044 "mov $1024, %%"REG_D" \n\t" // 1024 |
1045 "xor %%"REG_S", %%"REG_S" \n\t" // 0 | |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1046 ASMALIGN(4) |
3553 | 1047 "1: \n\t" |
16173 | 1048 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? |
1049 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? | |
1050 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? | |
1051 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? | |
3553 | 1052 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A |
16173 | 1053 "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" |
1054 "movaps %%xmm0, (%1, %%"REG_S") \n\t" | |
1055 "add $16, %%"REG_S" \n\t" | |
1056 "sub $16, %%"REG_D" \n\t" | |
1057 "cmp $512, %%"REG_S" \n\t" | |
3553 | 1058 " jb 1b \n\t" |
1059 :: "r" (buf), "r" (delay_ptr) | |
16173 | 1060 : "%"REG_S, "%"REG_D |
3553 | 1061 ); |
3394 | 1062 } |
16173 | 1063 #endif // ARCH_X86 || ARCH_X86_64 |
3394 | 1064 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1065 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) |
3394 | 1066 { |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1067 int i, k; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1068 sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1069 const sample_t * window = a52_imdct_window; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1070 complex_t buf1[64], buf2[64]; |
3394 | 1071 |
1072 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1073 for (i = 0; i < 64; i++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1074 k = fftorder[i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1075 t_r = pre2[i].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1076 t_i = pre2[i].imag; |
3394 | 1077 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1078 buf1[i].real = t_i * data[254-k] + t_r * data[k]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1079 buf1[i].imag = t_r * data[254-k] - t_i * data[k]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1080 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1081 buf2[i].real = t_i * data[255-k] + t_r * data[k+1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1082 buf2[i].imag = t_r * data[255-k] - t_i * data[k+1]; |
3394 | 1083 } |
1084 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1085 ifft64 (buf1); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1086 ifft64 (buf2); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1087 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1088 /* Post IFFT complex multiply */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1089 /* Window and convert to real valued signal */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1090 for (i = 0; i < 32; i++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1091 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1092 t_r = post2[i].real; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1093 t_i = post2[i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1094 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1095 a_r = t_r * buf1[i].real + t_i * buf1[i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1096 a_i = t_i * buf1[i].real - t_r * buf1[i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1097 b_r = t_i * buf1[63-i].real + t_r * buf1[63-i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1098 b_i = t_r * buf1[63-i].real - t_i * buf1[63-i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1099 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1100 c_r = t_r * buf2[i].real + t_i * buf2[i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1101 c_i = t_i * buf2[i].real - t_r * buf2[i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1102 d_r = t_i * buf2[63-i].real + t_r * buf2[63-i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1103 d_i = t_r * buf2[63-i].real - t_i * buf2[63-i].imag; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1104 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1105 w_1 = window[2*i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1106 w_2 = window[255-2*i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1107 data[2*i] = delay[2*i] * w_2 - a_r * w_1 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1108 data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1109 delay[2*i] = c_i; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1110 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1111 w_1 = window[128+2*i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1112 w_2 = window[127-2*i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1113 data[128+2*i] = delay[127-2*i] * w_2 + a_i * w_1 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1114 data[127-2*i] = delay[127-2*i] * w_1 - a_i * w_2 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1115 delay[127-2*i] = c_r; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1116 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1117 w_1 = window[2*i+1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1118 w_2 = window[254-2*i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1119 data[2*i+1] = delay[2*i+1] * w_2 - b_i * w_1 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1120 data[254-2*i] = delay[2*i+1] * w_1 + b_i * w_2 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1121 delay[2*i+1] = d_r; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1122 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1123 w_1 = window[129+2*i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1124 w_2 = window[126-2*i]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1125 data[129+2*i] = delay[126-2*i] * w_2 + b_r * w_1 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1126 data[126-2*i] = delay[126-2*i] * w_1 - b_r * w_2 + bias; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1127 delay[126-2*i] = d_i; |
3394 | 1128 } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1129 } |
3394 | 1130 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1131 static double besselI0 (double x) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1132 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1133 double bessel = 1; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1134 int i = 100; |
3394 | 1135 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1136 do |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1137 bessel = bessel * x / (i * i) + 1; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1138 while (--i); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1139 return bessel; |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1140 } |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1141 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1142 void a52_imdct_init (uint32_t mm_accel) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1143 { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1144 int i, j, k; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1145 double sum; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1146 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1147 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1148 sum = 0; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1149 for (i = 0; i < 256; i++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1150 sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1151 a52_imdct_window[i] = sum; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1152 } |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1153 sum++; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1154 for (i = 0; i < 256; i++) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1155 a52_imdct_window[i] = sqrt (a52_imdct_window[i] / sum); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1156 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1157 for (i = 0; i < 3; i++) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1158 roots16[i] = cos ((M_PI / 8) * (i + 1)); |
3394 | 1159 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1160 for (i = 0; i < 7; i++) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1161 roots32[i] = cos ((M_PI / 16) * (i + 1)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1162 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1163 for (i = 0; i < 15; i++) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1164 roots64[i] = cos ((M_PI / 32) * (i + 1)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1165 |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1166 for (i = 0; i < 31; i++) |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1167 roots128[i] = cos ((M_PI / 64) * (i + 1)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1168 |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1169 for (i = 0; i < 64; i++) { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1170 k = fftorder[i] / 2 + 64; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1171 pre1[i].real = cos ((M_PI / 256) * (k - 0.25)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1172 pre1[i].imag = sin ((M_PI / 256) * (k - 0.25)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1173 } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1174 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1175 for (i = 64; i < 128; i++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1176 k = fftorder[i] / 2 + 64; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1177 pre1[i].real = -cos ((M_PI / 256) * (k - 0.25)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1178 pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25)); |
3394 | 1179 } |
1180 | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1181 for (i = 0; i < 64; i++) { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1182 post1[i].real = cos ((M_PI / 256) * (i + 0.5)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1183 post1[i].imag = sin ((M_PI / 256) * (i + 0.5)); |
3394 | 1184 } |
1185 | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1186 for (i = 0; i < 64; i++) { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1187 k = fftorder[i] / 4; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1188 pre2[i].real = cos ((M_PI / 128) * (k - 0.25)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1189 pre2[i].imag = sin ((M_PI / 128) * (k - 0.25)); |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1190 } |
3394 | 1191 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1192 for (i = 0; i < 32; i++) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1193 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1194 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1195 } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1196 for (i = 0; i < 128; i++) { |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1197 xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1198 xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1199 } |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1200 for (i = 0; i < 7; i++) { |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1201 j = 1 << i; |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1202 for (k = 0; k < j; k++) { |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1203 w[i][k].real = cos (-M_PI * k / j); |
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1204 w[i][k].imag = sin (-M_PI * k / j); |
3394 | 1205 } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1206 } |
28290 | 1207 #if ARCH_X86 || ARCH_X86_64 |
3527 | 1208 for (i = 0; i < 128; i++) { |
3581 | 1209 sseSinCos1c[2*i+0]= xcos1[i]; |
1210 sseSinCos1c[2*i+1]= -xcos1[i]; | |
1211 sseSinCos1d[2*i+0]= xsin1[i]; | |
1212 sseSinCos1d[2*i+1]= xsin1[i]; | |
3527 | 1213 } |
3534 | 1214 for (i = 1; i < 7; i++) { |
1215 j = 1 << i; | |
1216 for (k = 0; k < j; k+=2) { | |
1217 | |
1218 sseW[i][4*k + 0] = w[i][k+0].real; | |
1219 sseW[i][4*k + 1] = w[i][k+0].real; | |
1220 sseW[i][4*k + 2] = w[i][k+1].real; | |
1221 sseW[i][4*k + 3] = w[i][k+1].real; | |
1222 | |
1223 sseW[i][4*k + 4] = -w[i][k+0].imag; | |
1224 sseW[i][4*k + 5] = w[i][k+0].imag; | |
1225 sseW[i][4*k + 6] = -w[i][k+1].imag; | |
1226 sseW[i][4*k + 7] = w[i][k+1].imag; | |
1227 | |
1228 //we multiply more or less uninitalized numbers so we need to use exactly 0.0 | |
1229 if(k==0) | |
1230 { | |
1231 // sseW[i][4*k + 0]= sseW[i][4*k + 1]= 1.0; | |
1232 sseW[i][4*k + 4]= sseW[i][4*k + 5]= 0.0; | |
1233 } | |
1234 | |
1235 if(2*k == j) | |
1236 { | |
1237 sseW[i][4*k + 0]= sseW[i][4*k + 1]= 0.0; | |
1238 // sseW[i][4*k + 4]= -(sseW[i][4*k + 5]= -1.0); | |
1239 } | |
1240 } | |
1241 } | |
3552 | 1242 |
1243 for(i=0; i<128; i++) | |
1244 { | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1245 sseWindow[2*i+0]= -a52_imdct_window[2*i+0]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1246 sseWindow[2*i+1]= a52_imdct_window[2*i+1]; |
3552 | 1247 } |
3553 | 1248 |
1249 for(i=0; i<64; i++) | |
1250 { | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1251 sseWindow[256 + 2*i+0]= -a52_imdct_window[254 - 2*i+1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1252 sseWindow[256 + 2*i+1]= a52_imdct_window[254 - 2*i+0]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1253 sseWindow[384 + 2*i+0]= a52_imdct_window[126 - 2*i+1]; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1254 sseWindow[384 + 2*i+1]= -a52_imdct_window[126 - 2*i+0]; |
3553 | 1255 } |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1256 #endif |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1257 a52_imdct_512 = imdct_do_512; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1258 ifft128 = ifft128_c; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1259 ifft64 = ifft64_c; |
3579 | 1260 |
28290 | 1261 #if ARCH_X86 || ARCH_X86_64 |
4497 | 1262 if(mm_accel & MM_ACCEL_X86_SSE) |
1263 { | |
1264 fprintf (stderr, "Using SSE optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1265 a52_imdct_512 = imdct_do_512_sse; |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1266 } |
4497 | 1267 else |
1268 if(mm_accel & MM_ACCEL_X86_3DNOWEXT) | |
1269 { | |
1270 fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1271 a52_imdct_512 = imdct_do_512_3dnowex; |
4497 | 1272 } |
1273 else | |
1274 if(mm_accel & MM_ACCEL_X86_3DNOW) | |
1275 { | |
1276 fprintf (stderr, "Using 3DNow optimized IMDCT transform\n"); | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1277 a52_imdct_512 = imdct_do_512_3dnow; |
4497 | 1278 } |
1279 else | |
16173 | 1280 #endif // ARCH_X86 || ARCH_X86_64 |
28290 | 1281 #if HAVE_ALTIVEC |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
1282 if (mm_accel & MM_ACCEL_PPC_ALTIVEC) |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
1283 { |
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
1284 fprintf(stderr, "Using AltiVec optimized IMDCT transform\n"); |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1285 a52_imdct_512 = imdct_do_512_altivec; |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
1286 } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1287 else |
9001
01a9cf43074c
An AltiVec-enhanced IMDCT for liba52 (liba52/imdct.c)
arpi
parents:
8451
diff
changeset
|
1288 #endif |
3884 | 1289 |
28361 | 1290 #ifdef LIBA52_DJBFFT |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1291 if (mm_accel & MM_ACCEL_DJBFFT) { |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1292 fprintf (stderr, "Using djbfft for IMDCT transform\n"); |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1293 ifft128 = (void (*) (complex_t *)) fftc4_un128; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1294 ifft64 = (void (*) (complex_t *)) fftc4_un64; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1295 } else |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1296 #endif |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1297 { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
1298 fprintf (stderr, "No accelerated IMDCT transform found\n"); |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
1299 } |
3884 | 1300 } |