annotate liba52/imdct.c @ 3512:1f166e420b15

a bit more SSE optimizations
author michael
date Sun, 16 Dec 2001 03:06:41 +0000
parents b5220cf63fc3
children 5a88b21cfe8a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1 /*
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
2 * imdct.c
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
3 * Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
5 *
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
6 * This file is part of a52dec, a free ATSC A-52 stream decoder.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
7 * See http://liba52.sourceforge.net/ for updates.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
8 *
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
9 * a52dec is free software; you can redistribute it and/or modify
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
10 * it under the terms of the GNU General Public License as published by
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
11 * the Free Software Foundation; either version 2 of the License, or
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
12 * (at your option) any later version.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
13 *
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
14 * a52dec is distributed in the hope that it will be useful,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
17 * GNU General Public License for more details.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
18 *
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
19 * You should have received a copy of the GNU General Public License
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
20 * along with this program; if not, write to the Free Software
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
22 */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
23
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
24 #include "config.h"
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
25
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
26 #include <math.h>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
27 #include <stdio.h>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
28 #ifndef M_PI
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
29 #define M_PI 3.1415926535897932384626433832795029
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
30 #endif
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
31 #include <inttypes.h>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
32
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
33 #include "a52.h"
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
34 #include "a52_internal.h"
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
35 #include "mm_accel.h"
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
36
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
37 void (* imdct_256) (sample_t data[], sample_t delay[], sample_t bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
38 void (* imdct_512) (sample_t data[], sample_t delay[], sample_t bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
39
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
40 typedef struct complex_s {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
41 sample_t real;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
42 sample_t imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
43 } complex_t;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
44
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
45
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
46 /* 128 point bit-reverse LUT */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
47 static uint8_t bit_reverse_512[] = {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
48 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
49 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
50 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
51 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
52 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
53 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
54 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
55 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
56 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
57 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
58 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
59 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
60 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
61 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
62 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
63 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
64
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
65 static uint8_t bit_reverse_256[] = {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
66 0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
67 0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
68 0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
69 0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
70 0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
71 0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
72 0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
73 0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f};
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
74
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
75 #ifdef HAVE_SSE
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
76 // NOTE: SSE needs 16byte alignment or it will segfault
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
77 static complex_t __attribute__((aligned(16))) buf[128];
3512
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
78 static float __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
79 #else
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
80 static complex_t buf[128];
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
81 #endif
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
82
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
83 /* Twiddle factor LUT */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
84 static complex_t w_1[1];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
85 static complex_t w_2[2];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
86 static complex_t w_4[4];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
87 static complex_t w_8[8];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
88 static complex_t w_16[16];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
89 static complex_t w_32[32];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
90 static complex_t w_64[64];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
91 static complex_t * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64};
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
92
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
93 /* Twiddle factors for IMDCT */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
94 static sample_t xcos1[128];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
95 static sample_t xsin1[128];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
96 static sample_t xcos2[64];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
97 static sample_t xsin2[64];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
98
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
99 /* Windowing function for Modified DCT - Thank you acroread */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
100 sample_t imdct_window[] = {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
101 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
102 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
103 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
104 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
105 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
106 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
107 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
108 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
109 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
110 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
111 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
112 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
113 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
114 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
115 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
116 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
117 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
118 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
119 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
120 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
121 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
122 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
123 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
124 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
125 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
126 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
127 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
128 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
129 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
130 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
131 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
132 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 };
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
133
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
134
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
135 static inline void swap_cmplx(complex_t *a, complex_t *b)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
136 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
137 complex_t tmp;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
138
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
139 tmp = *a;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
140 *a = *b;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
141 *b = tmp;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
142 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
143
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
144
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
145
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
146 static inline complex_t cmplx_mult(complex_t a, complex_t b)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
147 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
148 complex_t ret;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
149
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
150 ret.real = a.real * b.real - a.imag * b.imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
151 ret.imag = a.real * b.imag + a.imag * b.real;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
152
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
153 return ret;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
154 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
155
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
156 void
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
157 imdct_do_512(sample_t data[],sample_t delay[], sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
158 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
159 int i,k;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
160 int p,q;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
161 int m;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
162 int two_m;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
163 int two_m_plus_one;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
164
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
165 sample_t tmp_a_i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
166 sample_t tmp_a_r;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
167 sample_t tmp_b_i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
168 sample_t tmp_b_r;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
169
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
170 sample_t *data_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
171 sample_t *delay_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
172 sample_t *window_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
173
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
174 /* 512 IMDCT with source and dest data in 'data' */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
175
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
176 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
177 for( i=0; i < 128; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
178 /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
179 buf[i].real = (data[256-2*i-1] * xcos1[i]) - (data[2*i] * xsin1[i]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
180 buf[i].imag = -1.0 * ((data[2*i] * xcos1[i]) + (data[256-2*i-1] * xsin1[i]));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
181 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
182
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
183 /* Bit reversed shuffling */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
184 for(i=0; i<128; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
185 k = bit_reverse_512[i];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
186 if (k < i)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
187 swap_cmplx(&buf[i],&buf[k]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
188 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
189
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
190 /* FFT Merge */
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
191 #ifdef HAVE_SSE
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
192 // Note w[0][0]={1,0}
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
193 // C Code for the following asm loop
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
194 /* for(i = 0; i < 128; i += 2) {
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
195 p = 0 + i;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
196 q = p + 1;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
197 tmp_a_r = buf[p].real;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
198 tmp_a_i = buf[p].imag;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
199 tmp_b_r = buf[q].real;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
200 tmp_b_i = buf[q].imag;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
201 buf[p].real = tmp_a_r + tmp_b_r;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
202 buf[p].imag = tmp_a_i + tmp_b_i;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
203 buf[q].real = tmp_a_r - tmp_b_r;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
204 buf[q].imag = tmp_a_i - tmp_b_i;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
205 }*/
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
206 asm volatile(
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
207 "xorps %%xmm1, %%xmm1 \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
208 "xorps %%xmm2, %%xmm2 \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
209 "movl %0, %%esi \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
210 "1: \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
211 "movlps (%%esi), %%xmm0 \n\t" //buf[p]
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
212 "movlps 8(%%esi), %%xmm1\n\t" //buf[q]
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
213 "movhps (%%esi), %%xmm0 \n\t" //buf[p]
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
214 "movhps 8(%%esi), %%xmm2\n\t" //buf[q]
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
215 "addps %%xmm1, %%xmm0 \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
216 "subps %%xmm2, %%xmm0 \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
217 "movaps %%xmm0, (%%esi) \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
218 "addl $16, %%esi \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
219 "cmpl %1, %%esi \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
220 " jb 1b \n\t"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
221 :: "g" (buf), "r" (buf + 128)
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
222 : "%esi"
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
223 );
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
224
3512
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
225 // Note w[1]={{1,0}, {0,-1}}
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
226 // C Code for the following asm loop
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
227 /* for(i = 0; i < 128; i += 4) {
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
228 p = 0 + i;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
229 q = p + 2;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
230 tmp_a_r = buf[p].real;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
231 tmp_a_i = buf[p].imag;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
232 tmp_b_r = buf[q].real;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
233 tmp_b_i = buf[q].imag;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
234 buf[p].real = tmp_a_r + tmp_b_r;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
235 buf[p].imag = tmp_a_i + tmp_b_i;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
236 buf[q].real = tmp_a_r - tmp_b_r;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
237 buf[q].imag = tmp_a_i - tmp_b_i;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
238 tmp_a_r = buf[p+1].real;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
239 tmp_a_i = buf[p+1].imag;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
240 tmp_b_r = buf[q+1].imag;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
241 tmp_b_i = buf[q+1].real;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
242 buf[p+1].real = tmp_a_r + tmp_b_r;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
243 buf[p+1].imag = tmp_a_i - tmp_b_i;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
244 buf[q+1].real = tmp_a_r - tmp_b_r;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
245 buf[q+1].imag = tmp_a_i + tmp_b_i;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
246 }
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
247 */
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
248 asm volatile(
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
249 "movaps ps111_1, %%xmm7 \n\t" // 1,1,1,-1
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
250 "movl %0, %%esi \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
251 "1: \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
252 "movaps 16(%%esi), %%xmm2 \n\t" //r2,i2,r3,i3
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
253 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
254 "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
255 "movaps (%%esi), %%xmm0 \n\t" //r0,i0,r1,i1
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
256 "movaps (%%esi), %%xmm1 \n\t" //r0,i0,r1,i1
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
257 "addps %%xmm2, %%xmm0 \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
258 "subps %%xmm2, %%xmm1 \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
259 "movaps %%xmm0, (%%esi) \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
260 "movaps %%xmm1, 16(%%esi) \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
261 "addl $32, %%esi \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
262 "cmpl %1, %%esi \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
263 " jb 1b \n\t"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
264 :: "g" (buf), "r" (buf + 128)
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
265 : "%esi"
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
266 );
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
267
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
268 m=2;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
269 two_m = 4;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
270
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
271 for(k = 0; k < two_m; k++) {
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
272 for(i = 0; i < 128; i += 8) {
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
273 p = k + i;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
274 q = p + two_m;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
275 tmp_a_r = buf[p].real;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
276 tmp_a_i = buf[p].imag;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
277 tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
278 tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
279 buf[p].real = tmp_a_r + tmp_b_r;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
280 buf[p].imag = tmp_a_i + tmp_b_i;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
281 buf[q].real = tmp_a_r - tmp_b_r;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
282 buf[q].imag = tmp_a_i - tmp_b_i;
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
283 }
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
284 }
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
285
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
286 for (m=3; m < 7; m++) {
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
287 two_m = (1 << m);
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
288
1f166e420b15 a bit more SSE optimizations
michael
parents: 3508
diff changeset
289 two_m_plus_one = two_m<<1;
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
290
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
291 for(k = 0; k < two_m; k++) {
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
292 for(i = 0; i < 128; i += two_m_plus_one) {
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
293 p = k + i;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
294 q = p + two_m;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
295 tmp_a_r = buf[p].real;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
296 tmp_a_i = buf[p].imag;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
297 tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
298 tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
299 buf[p].real = tmp_a_r + tmp_b_r;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
300 buf[p].imag = tmp_a_i + tmp_b_i;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
301 buf[q].real = tmp_a_r - tmp_b_r;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
302 buf[q].imag = tmp_a_i - tmp_b_i;
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
303 }
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
304 }
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
305 }
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
306 #else
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
307 for (m=0; m < 7; m++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
308 if(m)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
309 two_m = (1 << m);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
310 else
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
311 two_m = 1;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
312
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
313 two_m_plus_one = (1 << (m+1));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
314
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
315 for(k = 0; k < two_m; k++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
316 for(i = 0; i < 128; i += two_m_plus_one) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
317 p = k + i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
318 q = p + two_m;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
319 tmp_a_r = buf[p].real;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
320 tmp_a_i = buf[p].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
321 tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
322 tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
323 buf[p].real = tmp_a_r + tmp_b_r;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
324 buf[p].imag = tmp_a_i + tmp_b_i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
325 buf[q].real = tmp_a_r - tmp_b_r;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
326 buf[q].imag = tmp_a_i - tmp_b_i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
327 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
328 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
329 }
3508
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
330 #endif
b5220cf63fc3 some SSE optimizations
michael
parents: 3394
diff changeset
331
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
332 /* Post IFFT complex multiply plus IFFT complex conjugate*/
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
333 for( i=0; i < 128; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
334 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
335 tmp_a_r = buf[i].real;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
336 tmp_a_i = -1.0 * buf[i].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
337 buf[i].real =(tmp_a_r * xcos1[i]) - (tmp_a_i * xsin1[i]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
338 buf[i].imag =(tmp_a_r * xsin1[i]) + (tmp_a_i * xcos1[i]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
339 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
340
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
341 data_ptr = data;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
342 delay_ptr = delay;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
343 window_ptr = imdct_window;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
344
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
345 /* Window and convert to real valued signal */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
346 for(i=0; i< 64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
347 *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
348 *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
349 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
350
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
351 for(i=0; i< 64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
352 *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
353 *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
354 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
355
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
356 /* The trailing edge of the window goes into the delay line */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
357 delay_ptr = delay;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
358
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
359 for(i=0; i< 64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
360 *delay_ptr++ = -buf[64+i].real * *--window_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
361 *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
362 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
363
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
364 for(i=0; i<64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
365 *delay_ptr++ = buf[i].imag * *--window_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
366 *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
367 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
368 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
369
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
370 void
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
371 imdct_do_256(sample_t data[],sample_t delay[],sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
372 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
373 int i,k;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
374 int p,q;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
375 int m;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
376 int two_m;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
377 int two_m_plus_one;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
378
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
379 sample_t tmp_a_i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
380 sample_t tmp_a_r;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
381 sample_t tmp_b_i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
382 sample_t tmp_b_r;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
383
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
384 sample_t *data_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
385 sample_t *delay_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
386 sample_t *window_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
387
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
388 complex_t *buf_1, *buf_2;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
389
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
390 buf_1 = &buf[0];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
391 buf_2 = &buf[64];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
392
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
393 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
394 for(k=0; k<64; k++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
395 /* X1[k] = X[2*k] */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
396 /* X2[k] = X[2*k+1] */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
397
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
398 p = 2 * (128-2*k-1);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
399 q = 2 * (2 * k);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
400
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
401 /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
402 buf_1[k].real = data[p] * xcos2[k] - data[q] * xsin2[k];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
403 buf_1[k].imag = -1.0f * (data[q] * xcos2[k] + data[p] * xsin2[k]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
404 /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
405 buf_2[k].real = data[p + 1] * xcos2[k] - data[q + 1] * xsin2[k];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
406 buf_2[k].imag = -1.0f * ( data[q + 1] * xcos2[k] + data[p + 1] * xsin2[k]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
407 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
408
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
409 /* IFFT Bit reversed shuffling */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
410 for(i=0; i<64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
411 k = bit_reverse_256[i];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
412 if (k < i) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
413 swap_cmplx(&buf_1[i],&buf_1[k]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
414 swap_cmplx(&buf_2[i],&buf_2[k]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
415 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
416 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
417
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
418 /* FFT Merge */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
419 for (m=0; m < 6; m++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
420 two_m = (1 << m);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
421 two_m_plus_one = (1 << (m+1));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
422
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
423 /* FIXME */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
424 if(m)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
425 two_m = (1 << m);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
426 else
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
427 two_m = 1;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
428
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
429 for(k = 0; k < two_m; k++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
430 for(i = 0; i < 64; i += two_m_plus_one) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
431 p = k + i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
432 q = p + two_m;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
433 /* Do block 1 */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
434 tmp_a_r = buf_1[p].real;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
435 tmp_a_i = buf_1[p].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
436 tmp_b_r = buf_1[q].real * w[m][k].real - buf_1[q].imag * w[m][k].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
437 tmp_b_i = buf_1[q].imag * w[m][k].real + buf_1[q].real * w[m][k].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
438 buf_1[p].real = tmp_a_r + tmp_b_r;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
439 buf_1[p].imag = tmp_a_i + tmp_b_i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
440 buf_1[q].real = tmp_a_r - tmp_b_r;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
441 buf_1[q].imag = tmp_a_i - tmp_b_i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
442
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
443 /* Do block 2 */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
444 tmp_a_r = buf_2[p].real;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
445 tmp_a_i = buf_2[p].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
446 tmp_b_r = buf_2[q].real * w[m][k].real - buf_2[q].imag * w[m][k].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
447 tmp_b_i = buf_2[q].imag * w[m][k].real + buf_2[q].real * w[m][k].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
448 buf_2[p].real = tmp_a_r + tmp_b_r;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
449 buf_2[p].imag = tmp_a_i + tmp_b_i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
450 buf_2[q].real = tmp_a_r - tmp_b_r;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
451 buf_2[q].imag = tmp_a_i - tmp_b_i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
452 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
453 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
454 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
455
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
456 /* Post IFFT complex multiply */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
457 for( i=0; i < 64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
458 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
459 tmp_a_r = buf_1[i].real;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
460 tmp_a_i = -buf_1[i].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
461 buf_1[i].real =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
462 buf_1[i].imag =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
463 /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
464 tmp_a_r = buf_2[i].real;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
465 tmp_a_i = -buf_2[i].imag;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
466 buf_2[i].real =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
467 buf_2[i].imag =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
468 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
469
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
470 data_ptr = data;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
471 delay_ptr = delay;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
472 window_ptr = imdct_window;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
473
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
474 /* Window and convert to real valued signal */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
475 for(i=0; i< 64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
476 *data_ptr++ = -buf_1[i].imag * *window_ptr++ + *delay_ptr++ + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
477 *data_ptr++ = buf_1[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
478 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
479
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
480 for(i=0; i< 64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
481 *data_ptr++ = -buf_1[i].real * *window_ptr++ + *delay_ptr++ + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
482 *data_ptr++ = buf_1[64-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
483 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
484
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
485 delay_ptr = delay;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
486
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
487 for(i=0; i< 64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
488 *delay_ptr++ = -buf_2[i].real * *--window_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
489 *delay_ptr++ = buf_2[64-i-1].imag * *--window_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
490 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
491
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
492 for(i=0; i< 64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
493 *delay_ptr++ = buf_2[i].imag * *--window_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
494 *delay_ptr++ = -buf_2[64-i-1].real * *--window_ptr;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
495 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
496 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
497
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
498 void imdct_init (uint32_t mm_accel)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
499 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
500 #ifdef LIBA52_MLIB
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
501 if (mm_accel & MM_ACCEL_MLIB) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
502 fprintf (stderr, "Using mlib for IMDCT transform\n");
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
503 imdct_512 = imdct_do_512_mlib;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
504 imdct_256 = imdct_do_256_mlib;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
505 } else
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
506 #endif
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
507 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
508 int i, j, k;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
509
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
510 fprintf (stderr, "No accelerated IMDCT transform found\n");
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
511
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
512 /* Twiddle factors to turn IFFT into IMDCT */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
513 for (i = 0; i < 128; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
514 xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
515 xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
516 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
517
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
518 /* More twiddle factors to turn IFFT into IMDCT */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
519 for (i = 0; i < 64; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
520 xcos2[i] = -cos ((M_PI / 1024) * (8 * i + 1));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
521 xsin2[i] = -sin ((M_PI / 1024) * (8 * i + 1));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
522 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
523
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
524 for (i = 0; i < 7; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
525 j = 1 << i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
526 for (k = 0; k < j; k++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
527 w[i][k].real = cos (-M_PI * k / j);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
528 w[i][k].imag = sin (-M_PI * k / j);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
529 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
530 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
531 imdct_512 = imdct_do_512;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
532 imdct_256 = imdct_do_256;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
533 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
534 }