annotate mp3lib/dct36.c @ 4689:61f4b8fd380e

Fixing "quake" by direct waiting of vsync. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ (I don't why - but SMART_SWITCH is always disabled on my card) Benchmarks: [SRC] VIDEO: [DIV3] 624x356 24bpp 24.00 fps 497.3 kbps (60.7 kbyte/s) [DEST] 1024x768@32 70fps (-xvidix -fs -zoom) -vc ffdivx -double: BENCHMARKs: V: 3.838s VO: 7.305s A: 0.555s Sys: 18.264s = 29.962s BENCHMARK%: V: 12.8110% VO: 24.3808% A: 1.8518% Sys: 60.9564% = 100.0000% total video time: 11.143s -vc ffdivx -nodouble: BENCHMARKs: V: 3.846s VO: 1.668s A: 0.539s Sys: 23.869s = 29.922s BENCHMARK%: V: 12.8525% VO: 5.5744% A: 1.8015% Sys: 79.7716% = 100.0000% total video time: 5.514s -vc divxds -double (direct rendering) BENCHMARKs: V: 8.275s VO: 5.750s A: 0.532s Sys: 15.414s = 29.971s BENCHMARK%: V: 27.6115% VO: 19.1850% A: 1.7737% Sys: 51.4298% = 100.0000% total video time: 14.070s -vc divxds -nodouble (direct rendering) BENCHMARKs: V: 7.353s VO: 0.002s A: 0.521s Sys: 22.083s = 29.958s BENCHMARK%: V: 24.5433% VO: 0.0052% A: 1.7382% Sys: 73.7133% = 100.0000% total video time: 7.355s Unfortunately we have dramatic lost of performance (100%) :(
author nick
date Wed, 13 Feb 2002 08:24:13 +0000
parents 03b7e2955a20
children 07e7a572bd84
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
1 /*
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
2 // This is an optimized DCT from Jeff Tsay's maplay 1.2+ package.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
3 // Saved one multiplication by doing the 'twiddle factor' stuff
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
4 // together with the window mul. (MH)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
5 //
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
6 // This uses Byeong Gi Lee's Fast Cosine Transform algorithm, but the
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
7 // 9 point IDCT needs to be reduced further. Unfortunately, I don't
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
8 // know how to do that, because 9 is not an even number. - Jeff.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
9 //
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
10 //////////////////////////////////////////////////////////////////
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
11 //
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
12 // 9 Point Inverse Discrete Cosine Transform
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
13 //
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
14 // This piece of code is Copyright 1997 Mikko Tommila and is freely usable
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
15 // by anybody. The algorithm itself is of course in the public domain.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
16 //
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
17 // Again derived heuristically from the 9-point WFTA.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
18 //
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
19 // The algorithm is optimized (?) for speed, not for small rounding errors or
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
20 // good readability.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
21 //
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
22 // 36 additions, 11 multiplications
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
23 //
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
24 // Again this is very likely sub-optimal.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
25 //
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
26 // The code is optimized to use a minimum number of temporary variables,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
27 // so it should compile quite well even on 8-register Intel x86 processors.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
28 // This makes the code quite obfuscated and very difficult to understand.
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
29 //
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
30 // References:
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
31 // [1] S. Winograd: "On Computing the Discrete Fourier Transform",
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
32 // Mathematics of Computation, Volume 32, Number 141, January 1978,
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
33 // Pages 175-199
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
34 */
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
35
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
36 /*------------------------------------------------------------------*/
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
37 /* */
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
38 /* Function: Calculation of the inverse MDCT */
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
39 /* */
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
40 /*------------------------------------------------------------------*/
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
41
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
42 static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf)
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
43 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
44 #ifdef NEW_DCT9
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
45 real tmp[18];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
46 #endif
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
47
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
48 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
49 register real *in = inbuf;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
50
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
51 in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
52 in[14]+=in[13]; in[13]+=in[12]; in[12]+=in[11];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
53 in[11]+=in[10]; in[10]+=in[9]; in[9] +=in[8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
54 in[8] +=in[7]; in[7] +=in[6]; in[6] +=in[5];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
55 in[5] +=in[4]; in[4] +=in[3]; in[3] +=in[2];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
56 in[2] +=in[1]; in[1] +=in[0];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
57
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
58 in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
59 in[9] +=in[7]; in[7] +=in[5]; in[5] +=in[3]; in[3] +=in[1];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
60
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
61
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
62 #ifdef NEW_DCT9
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
63 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
64 real t0, t1, t2, t3, t4, t5, t6, t7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
65
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
66 t1 = COS6_2 * in[12];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
67 t2 = COS6_2 * (in[8] + in[16] - in[4]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
68
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
69 t3 = in[0] + t1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
70 t4 = in[0] - t1 - t1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
71 t5 = t4 - t2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
72
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
73 t0 = cos9[0] * (in[4] + in[8]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
74 t1 = cos9[1] * (in[8] - in[16]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
75
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
76 tmp[4] = t4 + t2 + t2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
77 t2 = cos9[2] * (in[4] + in[16]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
78
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
79 t6 = t3 - t0 - t2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
80 t0 += t3 + t1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
81 t3 += t2 - t1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
82
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
83 t2 = cos18[0] * (in[2] + in[10]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
84 t4 = cos18[1] * (in[10] - in[14]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
85 t7 = COS6_1 * in[6];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
86
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
87 t1 = t2 + t4 + t7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
88 tmp[0] = t0 + t1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
89 tmp[8] = t0 - t1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
90 t1 = cos18[2] * (in[2] + in[14]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
91 t2 += t1 - t7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
92
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
93 tmp[3] = t3 + t2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
94 t0 = COS6_1 * (in[10] + in[14] - in[2]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
95 tmp[5] = t3 - t2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
96
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
97 t4 -= t1 + t7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
98
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
99 tmp[1] = t5 - t0;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
100 tmp[7] = t5 + t0;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
101 tmp[2] = t6 + t4;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
102 tmp[6] = t6 - t4;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
103 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
104
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
105 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
106 real t0, t1, t2, t3, t4, t5, t6, t7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
107
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
108 t1 = COS6_2 * in[13];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
109 t2 = COS6_2 * (in[9] + in[17] - in[5]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
110
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
111 t3 = in[1] + t1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
112 t4 = in[1] - t1 - t1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
113 t5 = t4 - t2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
114
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
115 t0 = cos9[0] * (in[5] + in[9]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
116 t1 = cos9[1] * (in[9] - in[17]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
117
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
118 tmp[13] = (t4 + t2 + t2) * tfcos36[17-13];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
119 t2 = cos9[2] * (in[5] + in[17]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
120
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
121 t6 = t3 - t0 - t2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
122 t0 += t3 + t1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
123 t3 += t2 - t1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
124
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
125 t2 = cos18[0] * (in[3] + in[11]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
126 t4 = cos18[1] * (in[11] - in[15]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
127 t7 = COS6_1 * in[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
128
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
129 t1 = t2 + t4 + t7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
130 tmp[17] = (t0 + t1) * tfcos36[17-17];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
131 tmp[9] = (t0 - t1) * tfcos36[17-9];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
132 t1 = cos18[2] * (in[3] + in[15]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
133 t2 += t1 - t7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
134
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
135 tmp[14] = (t3 + t2) * tfcos36[17-14];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
136 t0 = COS6_1 * (in[11] + in[15] - in[3]);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
137 tmp[12] = (t3 - t2) * tfcos36[17-12];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
138
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
139 t4 -= t1 + t7;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
140
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
141 tmp[16] = (t5 - t0) * tfcos36[17-16];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
142 tmp[10] = (t5 + t0) * tfcos36[17-10];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
143 tmp[15] = (t6 + t4) * tfcos36[17-15];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
144 tmp[11] = (t6 - t4) * tfcos36[17-11];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
145 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
146
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
147 #define MACRO(v) { \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
148 real tmpval; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
149 real sum0 = tmp[(v)]; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
150 real sum1 = tmp[17-(v)]; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
151 out2[9+(v)] = (tmpval = sum0 + sum1) * w[27+(v)]; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
152 out2[8-(v)] = tmpval * w[26-(v)]; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
153 sum0 -= sum1; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
154 ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
155 ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
156
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
157 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
158 register real *out2 = o2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
159 register real *w = wintab;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
160 register real *out1 = o1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
161 register real *ts = tsbuf;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
162
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
163 MACRO(0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
164 MACRO(1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
165 MACRO(2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
166 MACRO(3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
167 MACRO(4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
168 MACRO(5);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
169 MACRO(6);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
170 MACRO(7);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
171 MACRO(8);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
172 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
173
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
174 #else
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
175
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
176 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
177
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
178 #define MACRO0(v) { \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
179 real tmp; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
180 out2[9+(v)] = (tmp = sum0 + sum1) * w[27+(v)]; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
181 out2[8-(v)] = tmp * w[26-(v)]; } \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
182 sum0 -= sum1; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
183 ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
184 ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
185 #define MACRO1(v) { \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
186 real sum0,sum1; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
187 sum0 = tmp1a + tmp2a; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
188 sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
189 MACRO0(v); }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
190 #define MACRO2(v) { \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
191 real sum0,sum1; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
192 sum0 = tmp2a - tmp1a; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
193 sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
194 MACRO0(v); }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
195
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1
diff changeset
196 register const real *c = COS9;
1
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
197 register real *out2 = o2;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
198 register real *w = wintab;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
199 register real *out1 = o1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
200 register real *ts = tsbuf;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
201
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
202 real ta33,ta66,tb33,tb66;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
203
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
204 ta33 = in[2*3+0] * c[3];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
205 ta66 = in[2*6+0] * c[6];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
206 tb33 = in[2*3+1] * c[3];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
207 tb66 = in[2*6+1] * c[6];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
208
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
209 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
210 real tmp1a,tmp2a,tmp1b,tmp2b;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
211 tmp1a = in[2*1+0] * c[1] + ta33 + in[2*5+0] * c[5] + in[2*7+0] * c[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
212 tmp1b = in[2*1+1] * c[1] + tb33 + in[2*5+1] * c[5] + in[2*7+1] * c[7];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
213 tmp2a = in[2*0+0] + in[2*2+0] * c[2] + in[2*4+0] * c[4] + ta66 + in[2*8+0] * c[8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
214 tmp2b = in[2*0+1] + in[2*2+1] * c[2] + in[2*4+1] * c[4] + tb66 + in[2*8+1] * c[8];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
215
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
216 MACRO1(0);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
217 MACRO2(8);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
218 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
219
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
220 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
221 real tmp1a,tmp2a,tmp1b,tmp2b;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
222 tmp1a = ( in[2*1+0] - in[2*5+0] - in[2*7+0] ) * c[3];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
223 tmp1b = ( in[2*1+1] - in[2*5+1] - in[2*7+1] ) * c[3];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
224 tmp2a = ( in[2*2+0] - in[2*4+0] - in[2*8+0] ) * c[6] - in[2*6+0] + in[2*0+0];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
225 tmp2b = ( in[2*2+1] - in[2*4+1] - in[2*8+1] ) * c[6] - in[2*6+1] + in[2*0+1];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
226
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
227 MACRO1(1);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
228 MACRO2(7);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
229 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
230
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
231 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
232 real tmp1a,tmp2a,tmp1b,tmp2b;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
233 tmp1a = in[2*1+0] * c[5] - ta33 - in[2*5+0] * c[7] + in[2*7+0] * c[1];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
234 tmp1b = in[2*1+1] * c[5] - tb33 - in[2*5+1] * c[7] + in[2*7+1] * c[1];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
235 tmp2a = in[2*0+0] - in[2*2+0] * c[8] - in[2*4+0] * c[2] + ta66 + in[2*8+0] * c[4];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
236 tmp2b = in[2*0+1] - in[2*2+1] * c[8] - in[2*4+1] * c[2] + tb66 + in[2*8+1] * c[4];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
237
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
238 MACRO1(2);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
239 MACRO2(6);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
240 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
241
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
242 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
243 real tmp1a,tmp2a,tmp1b,tmp2b;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
244 tmp1a = in[2*1+0] * c[7] - ta33 + in[2*5+0] * c[1] - in[2*7+0] * c[5];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
245 tmp1b = in[2*1+1] * c[7] - tb33 + in[2*5+1] * c[1] - in[2*7+1] * c[5];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
246 tmp2a = in[2*0+0] - in[2*2+0] * c[4] + in[2*4+0] * c[8] + ta66 - in[2*8+0] * c[2];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
247 tmp2b = in[2*0+1] - in[2*2+1] * c[4] + in[2*4+1] * c[8] + tb66 - in[2*8+1] * c[2];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
248
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
249 MACRO1(3);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
250 MACRO2(5);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
251 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
252
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
253 {
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
254 real sum0,sum1;
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
255 sum0 = in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
256 sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4];
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
257 MACRO0(4);
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
258 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
259 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
260 #endif
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
261
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
262 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
263 }
3b5f5d1c5041 Initial revision
arpi_esp
parents:
diff changeset
264