61
|
1
|
|
2 /*
|
|
3 * Discrete Cosine Tansform (DCT) for subband synthesis
|
|
4 *
|
|
5 * -funroll-loops (for gcc) will remove the loops for better performance
|
|
6 * using loops in the source-code enhances readabillity
|
|
7 */
|
|
8
|
|
9 /*
|
|
10 * TODO: write an optimized version for the down-sampling modes
|
|
11 * (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero
|
|
12 */
|
|
13
|
|
14 #include "mpg123.h"
|
|
15
|
|
16 void
|
|
17 mpg123_dct64(real * out0, real * out1, real * samples)
|
|
18 {
|
|
19 real bufs[64];
|
|
20
|
|
21 {
|
|
22 register int i, j;
|
|
23 register real *b1, *b2, *bs, *costab;
|
|
24
|
|
25 b1 = samples;
|
|
26 bs = bufs;
|
|
27 costab = mpg123_pnts[0] + 16;
|
|
28 b2 = b1 + 32;
|
|
29
|
|
30 for (i = 15; i >= 0; i--)
|
|
31 *bs++ = (*b1++ + *--b2);
|
|
32 for (i = 15; i >= 0; i--)
|
|
33 *bs++ = (*--b2 - *b1++) * *--costab;
|
|
34
|
|
35 b1 = bufs;
|
|
36 costab = mpg123_pnts[1] + 8;
|
|
37 b2 = b1 + 16;
|
|
38
|
|
39 {
|
|
40 for (i = 7; i >= 0; i--)
|
|
41 *bs++ = (*b1++ + *--b2);
|
|
42 for (i = 7; i >= 0; i--)
|
|
43 *bs++ = (*--b2 - *b1++) * *--costab;
|
|
44 b2 += 32;
|
|
45 costab += 8;
|
|
46 for (i = 7; i >= 0; i--)
|
|
47 *bs++ = (*b1++ + *--b2);
|
|
48 for (i = 7; i >= 0; i--)
|
|
49 *bs++ = (*b1++ - *--b2) * *--costab;
|
|
50 b2 += 32;
|
|
51 }
|
|
52
|
|
53 bs = bufs;
|
|
54 costab = mpg123_pnts[2];
|
|
55 b2 = b1 + 8;
|
|
56
|
|
57 for (j = 2; j; j--) {
|
|
58 for (i = 3; i >= 0; i--)
|
|
59 *bs++ = (*b1++ + *--b2);
|
|
60 for (i = 3; i >= 0; i--)
|
|
61 *bs++ = (*--b2 - *b1++) * costab[i];
|
|
62 b2 += 16;
|
|
63 for (i = 3; i >= 0; i--)
|
|
64 *bs++ = (*b1++ + *--b2);
|
|
65 for (i = 3; i >= 0; i--)
|
|
66 *bs++ = (*b1++ - *--b2) * costab[i];
|
|
67 b2 += 16;
|
|
68 }
|
|
69
|
|
70 b1 = bufs;
|
|
71 costab = mpg123_pnts[3];
|
|
72 b2 = b1 + 4;
|
|
73
|
|
74 for (j = 4; j; j--) {
|
|
75 *bs++ = (*b1++ + *--b2);
|
|
76 *bs++ = (*b1++ + *--b2);
|
|
77 *bs++ = (*--b2 - *b1++) * costab[1];
|
|
78 *bs++ = (*--b2 - *b1++) * costab[0];
|
|
79 b2 += 8;
|
|
80 *bs++ = (*b1++ + *--b2);
|
|
81 *bs++ = (*b1++ + *--b2);
|
|
82 *bs++ = (*b1++ - *--b2) * costab[1];
|
|
83 *bs++ = (*b1++ - *--b2) * costab[0];
|
|
84 b2 += 8;
|
|
85 }
|
|
86 bs = bufs;
|
|
87 costab = mpg123_pnts[4];
|
|
88
|
|
89 for (j = 8; j; j--) {
|
|
90 real v0, v1;
|
|
91
|
|
92 v0 = *b1++;
|
|
93 v1 = *b1++;
|
|
94 *bs++ = (v0 + v1);
|
|
95 *bs++ = (v0 - v1) * (*costab);
|
|
96 v0 = *b1++;
|
|
97 v1 = *b1++;
|
|
98 *bs++ = (v0 + v1);
|
|
99 *bs++ = (v1 - v0) * (*costab);
|
|
100 }
|
|
101
|
|
102 }
|
|
103
|
|
104 {
|
|
105 register real *b1;
|
|
106 register int i;
|
|
107
|
|
108 for (b1 = bufs, i = 8; i; i--, b1 += 4)
|
|
109 b1[2] += b1[3];
|
|
110
|
|
111 for (b1 = bufs, i = 4; i; i--, b1 += 8) {
|
|
112 b1[4] += b1[6];
|
|
113 b1[6] += b1[5];
|
|
114 b1[5] += b1[7];
|
|
115 }
|
|
116
|
|
117 for (b1 = bufs, i = 2; i; i--, b1 += 16) {
|
|
118 b1[8] += b1[12];
|
|
119 b1[12] += b1[10];
|
|
120 b1[10] += b1[14];
|
|
121 b1[14] += b1[9];
|
|
122 b1[9] += b1[13];
|
|
123 b1[13] += b1[11];
|
|
124 b1[11] += b1[15];
|
|
125 }
|
|
126 }
|
|
127
|
|
128 out0[0x10 * 16] = bufs[0];
|
|
129 out0[0x10 * 15] = bufs[16 + 0] + bufs[16 + 8];
|
|
130 out0[0x10 * 14] = bufs[8];
|
|
131 out0[0x10 * 13] = bufs[16 + 8] + bufs[16 + 4];
|
|
132 out0[0x10 * 12] = bufs[4];
|
|
133 out0[0x10 * 11] = bufs[16 + 4] + bufs[16 + 12];
|
|
134 out0[0x10 * 10] = bufs[12];
|
|
135 out0[0x10 * 9] = bufs[16 + 12] + bufs[16 + 2];
|
|
136 out0[0x10 * 8] = bufs[2];
|
|
137 out0[0x10 * 7] = bufs[16 + 2] + bufs[16 + 10];
|
|
138 out0[0x10 * 6] = bufs[10];
|
|
139 out0[0x10 * 5] = bufs[16 + 10] + bufs[16 + 6];
|
|
140 out0[0x10 * 4] = bufs[6];
|
|
141 out0[0x10 * 3] = bufs[16 + 6] + bufs[16 + 14];
|
|
142 out0[0x10 * 2] = bufs[14];
|
|
143 out0[0x10 * 1] = bufs[16 + 14] + bufs[16 + 1];
|
|
144 out0[0x10 * 0] = bufs[1];
|
|
145
|
|
146 out1[0x10 * 0] = bufs[1];
|
|
147 out1[0x10 * 1] = bufs[16 + 1] + bufs[16 + 9];
|
|
148 out1[0x10 * 2] = bufs[9];
|
|
149 out1[0x10 * 3] = bufs[16 + 9] + bufs[16 + 5];
|
|
150 out1[0x10 * 4] = bufs[5];
|
|
151 out1[0x10 * 5] = bufs[16 + 5] + bufs[16 + 13];
|
|
152 out1[0x10 * 6] = bufs[13];
|
|
153 out1[0x10 * 7] = bufs[16 + 13] + bufs[16 + 3];
|
|
154 out1[0x10 * 8] = bufs[3];
|
|
155 out1[0x10 * 9] = bufs[16 + 3] + bufs[16 + 11];
|
|
156 out1[0x10 * 10] = bufs[11];
|
|
157 out1[0x10 * 11] = bufs[16 + 11] + bufs[16 + 7];
|
|
158 out1[0x10 * 12] = bufs[7];
|
|
159 out1[0x10 * 13] = bufs[16 + 7] + bufs[16 + 15];
|
|
160 out1[0x10 * 14] = bufs[15];
|
|
161 out1[0x10 * 15] = bufs[16 + 15];
|
|
162
|
|
163 }
|