1
|
1 /*
|
|
2 // This is an optimized DCT from Jeff Tsay's maplay 1.2+ package.
|
|
3 // Saved one multiplication by doing the 'twiddle factor' stuff
|
|
4 // together with the window mul. (MH)
|
|
5 //
|
|
6 // This uses Byeong Gi Lee's Fast Cosine Transform algorithm, but the
|
|
7 // 9 point IDCT needs to be reduced further. Unfortunately, I don't
|
|
8 // know how to do that, because 9 is not an even number. - Jeff.
|
|
9 //
|
|
10 //////////////////////////////////////////////////////////////////
|
|
11 //
|
|
12 // 9 Point Inverse Discrete Cosine Transform
|
|
13 //
|
|
14 // This piece of code is Copyright 1997 Mikko Tommila and is freely usable
|
|
15 // by anybody. The algorithm itself is of course in the public domain.
|
|
16 //
|
|
17 // Again derived heuristically from the 9-point WFTA.
|
|
18 //
|
|
19 // The algorithm is optimized (?) for speed, not for small rounding errors or
|
|
20 // good readability.
|
|
21 //
|
|
22 // 36 additions, 11 multiplications
|
|
23 //
|
|
24 // Again this is very likely sub-optimal.
|
|
25 //
|
|
26 // The code is optimized to use a minimum number of temporary variables,
|
|
27 // so it should compile quite well even on 8-register Intel x86 processors.
|
|
28 // This makes the code quite obfuscated and very difficult to understand.
|
|
29 //
|
|
30 // References:
|
|
31 // [1] S. Winograd: "On Computing the Discrete Fourier Transform",
|
|
32 // Mathematics of Computation, Volume 32, Number 141, January 1978,
|
|
33 // Pages 175-199
|
|
34 */
|
|
35
|
|
36 /*------------------------------------------------------------------*/
|
|
37 /* */
|
|
38 /* Function: Calculation of the inverse MDCT */
|
|
39 /* */
|
|
40 /*------------------------------------------------------------------*/
|
|
41
|
|
42 static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf)
|
|
43 {
|
|
44 #ifdef NEW_DCT9
|
|
45 real tmp[18];
|
|
46 #endif
|
|
47
|
|
48 {
|
|
49 register real *in = inbuf;
|
|
50
|
|
51 in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14];
|
|
52 in[14]+=in[13]; in[13]+=in[12]; in[12]+=in[11];
|
|
53 in[11]+=in[10]; in[10]+=in[9]; in[9] +=in[8];
|
|
54 in[8] +=in[7]; in[7] +=in[6]; in[6] +=in[5];
|
|
55 in[5] +=in[4]; in[4] +=in[3]; in[3] +=in[2];
|
|
56 in[2] +=in[1]; in[1] +=in[0];
|
|
57
|
|
58 in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
|
|
59 in[9] +=in[7]; in[7] +=in[5]; in[5] +=in[3]; in[3] +=in[1];
|
|
60
|
|
61
|
|
62 #ifdef NEW_DCT9
|
|
63 {
|
|
64 real t0, t1, t2, t3, t4, t5, t6, t7;
|
|
65
|
|
66 t1 = COS6_2 * in[12];
|
|
67 t2 = COS6_2 * (in[8] + in[16] - in[4]);
|
|
68
|
|
69 t3 = in[0] + t1;
|
|
70 t4 = in[0] - t1 - t1;
|
|
71 t5 = t4 - t2;
|
|
72
|
|
73 t0 = cos9[0] * (in[4] + in[8]);
|
|
74 t1 = cos9[1] * (in[8] - in[16]);
|
|
75
|
|
76 tmp[4] = t4 + t2 + t2;
|
|
77 t2 = cos9[2] * (in[4] + in[16]);
|
|
78
|
|
79 t6 = t3 - t0 - t2;
|
|
80 t0 += t3 + t1;
|
|
81 t3 += t2 - t1;
|
|
82
|
|
83 t2 = cos18[0] * (in[2] + in[10]);
|
|
84 t4 = cos18[1] * (in[10] - in[14]);
|
|
85 t7 = COS6_1 * in[6];
|
|
86
|
|
87 t1 = t2 + t4 + t7;
|
|
88 tmp[0] = t0 + t1;
|
|
89 tmp[8] = t0 - t1;
|
|
90 t1 = cos18[2] * (in[2] + in[14]);
|
|
91 t2 += t1 - t7;
|
|
92
|
|
93 tmp[3] = t3 + t2;
|
|
94 t0 = COS6_1 * (in[10] + in[14] - in[2]);
|
|
95 tmp[5] = t3 - t2;
|
|
96
|
|
97 t4 -= t1 + t7;
|
|
98
|
|
99 tmp[1] = t5 - t0;
|
|
100 tmp[7] = t5 + t0;
|
|
101 tmp[2] = t6 + t4;
|
|
102 tmp[6] = t6 - t4;
|
|
103 }
|
|
104
|
|
105 {
|
|
106 real t0, t1, t2, t3, t4, t5, t6, t7;
|
|
107
|
|
108 t1 = COS6_2 * in[13];
|
|
109 t2 = COS6_2 * (in[9] + in[17] - in[5]);
|
|
110
|
|
111 t3 = in[1] + t1;
|
|
112 t4 = in[1] - t1 - t1;
|
|
113 t5 = t4 - t2;
|
|
114
|
|
115 t0 = cos9[0] * (in[5] + in[9]);
|
|
116 t1 = cos9[1] * (in[9] - in[17]);
|
|
117
|
|
118 tmp[13] = (t4 + t2 + t2) * tfcos36[17-13];
|
|
119 t2 = cos9[2] * (in[5] + in[17]);
|
|
120
|
|
121 t6 = t3 - t0 - t2;
|
|
122 t0 += t3 + t1;
|
|
123 t3 += t2 - t1;
|
|
124
|
|
125 t2 = cos18[0] * (in[3] + in[11]);
|
|
126 t4 = cos18[1] * (in[11] - in[15]);
|
|
127 t7 = COS6_1 * in[7];
|
|
128
|
|
129 t1 = t2 + t4 + t7;
|
|
130 tmp[17] = (t0 + t1) * tfcos36[17-17];
|
|
131 tmp[9] = (t0 - t1) * tfcos36[17-9];
|
|
132 t1 = cos18[2] * (in[3] + in[15]);
|
|
133 t2 += t1 - t7;
|
|
134
|
|
135 tmp[14] = (t3 + t2) * tfcos36[17-14];
|
|
136 t0 = COS6_1 * (in[11] + in[15] - in[3]);
|
|
137 tmp[12] = (t3 - t2) * tfcos36[17-12];
|
|
138
|
|
139 t4 -= t1 + t7;
|
|
140
|
|
141 tmp[16] = (t5 - t0) * tfcos36[17-16];
|
|
142 tmp[10] = (t5 + t0) * tfcos36[17-10];
|
|
143 tmp[15] = (t6 + t4) * tfcos36[17-15];
|
|
144 tmp[11] = (t6 - t4) * tfcos36[17-11];
|
|
145 }
|
|
146
|
|
147 #define MACRO(v) { \
|
|
148 real tmpval; \
|
|
149 real sum0 = tmp[(v)]; \
|
|
150 real sum1 = tmp[17-(v)]; \
|
|
151 out2[9+(v)] = (tmpval = sum0 + sum1) * w[27+(v)]; \
|
|
152 out2[8-(v)] = tmpval * w[26-(v)]; \
|
|
153 sum0 -= sum1; \
|
|
154 ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
|
|
155 ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; }
|
|
156
|
|
157 {
|
|
158 register real *out2 = o2;
|
|
159 register real *w = wintab;
|
|
160 register real *out1 = o1;
|
|
161 register real *ts = tsbuf;
|
|
162
|
|
163 MACRO(0);
|
|
164 MACRO(1);
|
|
165 MACRO(2);
|
|
166 MACRO(3);
|
|
167 MACRO(4);
|
|
168 MACRO(5);
|
|
169 MACRO(6);
|
|
170 MACRO(7);
|
|
171 MACRO(8);
|
|
172 }
|
|
173
|
|
174 #else
|
|
175
|
|
176 {
|
|
177
|
|
178 #define MACRO0(v) { \
|
|
179 real tmp; \
|
|
180 out2[9+(v)] = (tmp = sum0 + sum1) * w[27+(v)]; \
|
|
181 out2[8-(v)] = tmp * w[26-(v)]; } \
|
|
182 sum0 -= sum1; \
|
|
183 ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
|
|
184 ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)];
|
|
185 #define MACRO1(v) { \
|
|
186 real sum0,sum1; \
|
|
187 sum0 = tmp1a + tmp2a; \
|
|
188 sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \
|
|
189 MACRO0(v); }
|
|
190 #define MACRO2(v) { \
|
|
191 real sum0,sum1; \
|
|
192 sum0 = tmp2a - tmp1a; \
|
|
193 sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \
|
|
194 MACRO0(v); }
|
|
195
|
|
196 register const real *c = nCOS9;
|
|
197 register real *out2 = o2;
|
|
198 register real *w = wintab;
|
|
199 register real *out1 = o1;
|
|
200 register real *ts = tsbuf;
|
|
201
|
|
202 real ta33,ta66,tb33,tb66;
|
|
203
|
|
204 ta33 = in[2*3+0] * c[3];
|
|
205 ta66 = in[2*6+0] * c[6];
|
|
206 tb33 = in[2*3+1] * c[3];
|
|
207 tb66 = in[2*6+1] * c[6];
|
|
208
|
|
209 {
|
|
210 real tmp1a,tmp2a,tmp1b,tmp2b;
|
|
211 tmp1a = in[2*1+0] * c[1] + ta33 + in[2*5+0] * c[5] + in[2*7+0] * c[7];
|
|
212 tmp1b = in[2*1+1] * c[1] + tb33 + in[2*5+1] * c[5] + in[2*7+1] * c[7];
|
|
213 tmp2a = in[2*0+0] + in[2*2+0] * c[2] + in[2*4+0] * c[4] + ta66 + in[2*8+0] * c[8];
|
|
214 tmp2b = in[2*0+1] + in[2*2+1] * c[2] + in[2*4+1] * c[4] + tb66 + in[2*8+1] * c[8];
|
|
215
|
|
216 MACRO1(0);
|
|
217 MACRO2(8);
|
|
218 }
|
|
219
|
|
220 {
|
|
221 real tmp1a,tmp2a,tmp1b,tmp2b;
|
|
222 tmp1a = ( in[2*1+0] - in[2*5+0] - in[2*7+0] ) * c[3];
|
|
223 tmp1b = ( in[2*1+1] - in[2*5+1] - in[2*7+1] ) * c[3];
|
|
224 tmp2a = ( in[2*2+0] - in[2*4+0] - in[2*8+0] ) * c[6] - in[2*6+0] + in[2*0+0];
|
|
225 tmp2b = ( in[2*2+1] - in[2*4+1] - in[2*8+1] ) * c[6] - in[2*6+1] + in[2*0+1];
|
|
226
|
|
227 MACRO1(1);
|
|
228 MACRO2(7);
|
|
229 }
|
|
230
|
|
231 {
|
|
232 real tmp1a,tmp2a,tmp1b,tmp2b;
|
|
233 tmp1a = in[2*1+0] * c[5] - ta33 - in[2*5+0] * c[7] + in[2*7+0] * c[1];
|
|
234 tmp1b = in[2*1+1] * c[5] - tb33 - in[2*5+1] * c[7] + in[2*7+1] * c[1];
|
|
235 tmp2a = in[2*0+0] - in[2*2+0] * c[8] - in[2*4+0] * c[2] + ta66 + in[2*8+0] * c[4];
|
|
236 tmp2b = in[2*0+1] - in[2*2+1] * c[8] - in[2*4+1] * c[2] + tb66 + in[2*8+1] * c[4];
|
|
237
|
|
238 MACRO1(2);
|
|
239 MACRO2(6);
|
|
240 }
|
|
241
|
|
242 {
|
|
243 real tmp1a,tmp2a,tmp1b,tmp2b;
|
|
244 tmp1a = in[2*1+0] * c[7] - ta33 + in[2*5+0] * c[1] - in[2*7+0] * c[5];
|
|
245 tmp1b = in[2*1+1] * c[7] - tb33 + in[2*5+1] * c[1] - in[2*7+1] * c[5];
|
|
246 tmp2a = in[2*0+0] - in[2*2+0] * c[4] + in[2*4+0] * c[8] + ta66 - in[2*8+0] * c[2];
|
|
247 tmp2b = in[2*0+1] - in[2*2+1] * c[4] + in[2*4+1] * c[8] + tb66 - in[2*8+1] * c[2];
|
|
248
|
|
249 MACRO1(3);
|
|
250 MACRO2(5);
|
|
251 }
|
|
252
|
|
253 {
|
|
254 real sum0,sum1;
|
|
255 sum0 = in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0];
|
|
256 sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4];
|
|
257 MACRO0(4);
|
|
258 }
|
|
259 }
|
|
260 #endif
|
|
261
|
|
262 }
|
|
263 }
|
|
264
|