Mercurial > mplayer.hg
annotate mp3lib/decod386.c @ 9885:98712065a08d
10l patch by (Andreas Hess <jaska at gmx dot net>)
author | michael |
---|---|
date | Tue, 08 Apr 2003 23:46:35 +0000 |
parents | 60d144a16088 |
children | 83822b2b0a17 |
rev | line source |
---|---|
1 | 1 /* |
2 * Mpeg Layer-1,2,3 audio decoder | |
3 * ------------------------------ | |
4 * copyright (c) 1995,1996,1997 by Michael Hipp, All rights reserved. | |
5 * See also 'README' | |
6 * | |
7 * slighlty optimized for machines without autoincrement/decrement. | |
8 * The performance is highly compiler dependend. Maybe | |
9 * the decode.c version for 'normal' processor may be faster | |
10 * even for Intel processors. | |
11 */ | |
12 | |
13 | |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
14 #include "../config.h" |
1 | 15 |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
16 #if 0 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
17 /* old WRITE_SAMPLE */ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
18 /* is portable */ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
19 #define WRITE_SAMPLE(samples,sum,clip) { \ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
20 if( (sum) > 32767.0) { *(samples) = 0x7fff; (clip)++; } \ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
21 else if( (sum) < -32768.0) { *(samples) = -0x8000; (clip)++; }\ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
22 else { *(samples) = sum; } \ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
23 } |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
24 #else |
1 | 25 /* new WRITE_SAMPLE */ |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
26 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
27 /* |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
28 * should be the same as the "old WRITE_SAMPLE" macro above, but uses |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
29 * some tricks to avoid double->int conversions and floating point compares. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
30 * |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
31 * Here's how it works: |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
32 * ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) is |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
33 * 0x0010000080000000LL in hex. It computes 0x0010000080000000LL + sum |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
34 * as a double IEEE fp value and extracts the low-order 32-bits from the |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
35 * IEEE fp representation stored in memory. The 2^56 bit in the constant |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
36 * is intended to force the bits of "sum" into the least significant bits |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
37 * of the double mantissa. After an integer substraction of 0x80000000 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
38 * we have the original double value "sum" converted to an 32-bit int value. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
39 * |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
40 * (Is that really faster than the clean and simple old version of the macro?) |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
41 */ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
42 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
43 /* |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
44 * On a SPARC cpu, we fetch the low-order 32-bit from the second 32-bit |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
45 * word of the double fp value stored in memory. On an x86 cpu, we fetch it |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
46 * from the first 32-bit word. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
47 * I'm not sure if the WORDS_BIGENDIAN feature test covers all possible memory |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
48 * layouts of double floating point values an all cpu architectures. If |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
49 * it doesn't work for you, just enable the "old WRITE_SAMPLE" macro. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
50 */ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
51 #if WORDS_BIGENDIAN |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
52 #define MANTISSA_OFFSET 1 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
53 #else |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
54 #define MANTISSA_OFFSET 0 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
55 #endif |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
56 |
1 | 57 /* sizeof(int) == 4 */ |
58 #define WRITE_SAMPLE(samples,sum,clip) { \ | |
7299
131497b1f6ad
- GCC 3.x (SPARC) is too clever for the double->int conversion trick used in
jkeil
parents:
4321
diff
changeset
|
59 union { double dtemp; int itemp[2]; } u; int v; \ |
131497b1f6ad
- GCC 3.x (SPARC) is too clever for the double->int conversion trick used in
jkeil
parents:
4321
diff
changeset
|
60 u.dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\ |
131497b1f6ad
- GCC 3.x (SPARC) is too clever for the double->int conversion trick used in
jkeil
parents:
4321
diff
changeset
|
61 v = u.itemp[MANTISSA_OFFSET] - 0x80000000; \ |
1 | 62 if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \ |
63 else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \ | |
64 else { *(samples) = v; } \ | |
65 } | |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
66 #endif |
1 | 67 |
68 | |
69 /* | |
70 #define WRITE_SAMPLE(samples,sum,clip) { \ | |
71 double dtemp; int v; \ | |
72 dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\ | |
73 v = ((*(int *)&dtemp) - 0x80000000); \ | |
74 if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \ | |
75 else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \ | |
76 else { *(samples) = v; } \ | |
77 } | |
78 */ | |
79 | |
80 static int synth_1to1_mono(real *bandPtr,unsigned char *samples,int *pnt) | |
81 { | |
82 short samples_tmp[64]; | |
83 short *tmp1 = samples_tmp; | |
84 int i,ret; | |
85 int pnt1 = 0; | |
86 | |
87 ret = synth_1to1(bandPtr,0,(unsigned char *) samples_tmp,&pnt1); | |
88 samples += *pnt; | |
89 | |
90 for(i=0;i<32;i++) { | |
91 *( (short *) samples) = *tmp1; | |
92 samples += 2; | |
93 tmp1 += 2; | |
94 } | |
95 *pnt += 64; | |
96 | |
97 return ret; | |
98 } | |
99 | |
100 | |
101 static int synth_1to1_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt) | |
102 { | |
103 int i,ret; | |
104 | |
105 ret = synth_1to1(bandPtr,0,samples,pnt); | |
106 samples = samples + *pnt - 128; | |
107 | |
108 for(i=0;i<32;i++) { | |
109 ((short *)samples)[1] = ((short *)samples)[0]; | |
110 samples+=4; | |
111 } | |
112 | |
113 return ret; | |
114 } | |
115 | |
116 | |
732
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
117 #ifdef USE_FAKE_MONO |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
118 static int synth_1to1_l(real *bandPtr,int channel,unsigned char *out,int *pnt) |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
119 { |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
120 int i,ret; |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
121 |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
122 ret = synth_1to1(bandPtr,channel,out,pnt); |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
123 out = out + *pnt - 128; |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
124 |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
125 for(i=0;i<32;i++) { |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
126 ((short *)out)[1] = ((short *)out)[0]; |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
127 out+=4; |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
128 } |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
129 |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
130 return ret; |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
131 } |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
132 |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
133 |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
134 static int synth_1to1_r(real *bandPtr,int channel,unsigned char *out,int *pnt) |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
135 { |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
136 int i,ret; |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
137 |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
138 ret = synth_1to1(bandPtr,channel,out,pnt); |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
139 out = out + *pnt - 128; |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
140 |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
141 for(i=0;i<32;i++) { |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
142 ((short *)out)[0] = ((short *)out)[1]; |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
143 out+=4; |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
144 } |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
145 |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
146 return ret; |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
147 } |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
148 #endif |
e14114170e01
applied 'fakemono' patch by Bryan Chan scorpio@acm.org
arpi_esp
parents:
1
diff
changeset
|
149 |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
787
diff
changeset
|
150 synth_func_t synth_func; |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
787
diff
changeset
|
151 |
4321 | 152 #if defined(CAN_COMPILE_X86_ASM) |
1942
853be3ebe862
Eeeh I need some sleep, accidently commited a wrong version of the files that had other changes, too.
atmos4
parents:
1941
diff
changeset
|
153 int synth_1to1_MMX( real *bandPtr,int channel,short * samples) |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
787
diff
changeset
|
154 { |
1942
853be3ebe862
Eeeh I need some sleep, accidently commited a wrong version of the files that had other changes, too.
atmos4
parents:
1941
diff
changeset
|
155 static short buffs[2][2][0x110]; |
853be3ebe862
Eeeh I need some sleep, accidently commited a wrong version of the files that had other changes, too.
atmos4
parents:
1941
diff
changeset
|
156 static int bo = 1; |
853be3ebe862
Eeeh I need some sleep, accidently commited a wrong version of the files that had other changes, too.
atmos4
parents:
1941
diff
changeset
|
157 synth_1to1_MMX_s(bandPtr, channel, samples, (short *) buffs, &bo); |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
787
diff
changeset
|
158 return 0; |
1258 | 159 } |
160 #endif | |
9002 | 161 |
162 #ifdef HAVE_ALTIVEC | |
163 #define dct64_base(a,b,c) if(gCpuCaps.hasAltiVec) dct64_altivec(a,b,c); else dct64(a,b,c) | |
164 #else /* HAVE_ALTIVEC */ | |
165 #define dct64_base(a,b,c) dct64(a,b,c) | |
166 #endif /* HAVE_ALTIVEC */ | |
167 | |
1 | 168 static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt) |
169 { | |
170 static real buffs[2][2][0x110]; | |
171 static const int step = 2; | |
172 static int bo = 1; | |
173 short *samples = (short *) (out + *pnt); | |
174 real *b0,(*buf)[0x110]; | |
175 int clip = 0; | |
176 int bo1; | |
8543 | 177 |
178 *pnt += 128; | |
179 | |
4149 | 180 /* optimized for x86 */ |
4321 | 181 #if defined(CAN_COMPILE_X86_ASM) |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
787
diff
changeset
|
182 if ( synth_func ) |
787 | 183 { |
8543 | 184 // printf("Calling %p, bandPtr=%p channel=%d samples=%p\n",synth_func,bandPtr,channel,samples); |
185 // FIXME: synth_func() may destroy EBP, don't rely on stack contents!!! | |
186 return (*synth_func)( bandPtr,channel,samples); | |
736 | 187 } |
1258 | 188 #endif |
1 | 189 if(!channel) { /* channel=0 */ |
190 bo--; | |
191 bo &= 0xf; | |
192 buf = buffs[0]; | |
193 } | |
194 else { | |
195 samples++; | |
196 buf = buffs[1]; | |
197 } | |
198 | |
199 if(bo & 0x1) { | |
200 b0 = buf[0]; | |
201 bo1 = bo; | |
9002 | 202 dct64_base(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr); |
1 | 203 } |
204 else { | |
205 b0 = buf[1]; | |
206 bo1 = bo+1; | |
9002 | 207 dct64_base(buf[0]+bo,buf[1]+bo+1,bandPtr); |
1 | 208 } |
209 | |
210 { | |
211 register int j; | |
8560
1320f1b3229d
fixing that f*cking linker 'bug' e.g. naming config with libmp3lame
alex
parents:
8543
diff
changeset
|
212 real *window = mp3lib_decwin + 16 - bo1; |
1 | 213 |
214 for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step) | |
215 { | |
216 real sum; | |
217 sum = window[0x0] * b0[0x0]; | |
218 sum -= window[0x1] * b0[0x1]; | |
219 sum += window[0x2] * b0[0x2]; | |
220 sum -= window[0x3] * b0[0x3]; | |
221 sum += window[0x4] * b0[0x4]; | |
222 sum -= window[0x5] * b0[0x5]; | |
223 sum += window[0x6] * b0[0x6]; | |
224 sum -= window[0x7] * b0[0x7]; | |
225 sum += window[0x8] * b0[0x8]; | |
226 sum -= window[0x9] * b0[0x9]; | |
227 sum += window[0xA] * b0[0xA]; | |
228 sum -= window[0xB] * b0[0xB]; | |
229 sum += window[0xC] * b0[0xC]; | |
230 sum -= window[0xD] * b0[0xD]; | |
231 sum += window[0xE] * b0[0xE]; | |
232 sum -= window[0xF] * b0[0xF]; | |
233 | |
234 WRITE_SAMPLE(samples,sum,clip); | |
235 } | |
236 | |
237 { | |
238 real sum; | |
239 sum = window[0x0] * b0[0x0]; | |
240 sum += window[0x2] * b0[0x2]; | |
241 sum += window[0x4] * b0[0x4]; | |
242 sum += window[0x6] * b0[0x6]; | |
243 sum += window[0x8] * b0[0x8]; | |
244 sum += window[0xA] * b0[0xA]; | |
245 sum += window[0xC] * b0[0xC]; | |
246 sum += window[0xE] * b0[0xE]; | |
247 WRITE_SAMPLE(samples,sum,clip); | |
248 b0-=0x10,window-=0x20,samples+=step; | |
249 } | |
250 window += bo1<<1; | |
251 | |
252 for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step) | |
253 { | |
254 real sum; | |
255 sum = -window[-0x1] * b0[0x0]; | |
256 sum -= window[-0x2] * b0[0x1]; | |
257 sum -= window[-0x3] * b0[0x2]; | |
258 sum -= window[-0x4] * b0[0x3]; | |
259 sum -= window[-0x5] * b0[0x4]; | |
260 sum -= window[-0x6] * b0[0x5]; | |
261 sum -= window[-0x7] * b0[0x6]; | |
262 sum -= window[-0x8] * b0[0x7]; | |
263 sum -= window[-0x9] * b0[0x8]; | |
264 sum -= window[-0xA] * b0[0x9]; | |
265 sum -= window[-0xB] * b0[0xA]; | |
266 sum -= window[-0xC] * b0[0xB]; | |
267 sum -= window[-0xD] * b0[0xC]; | |
268 sum -= window[-0xE] * b0[0xD]; | |
269 sum -= window[-0xF] * b0[0xE]; | |
270 sum -= window[-0x0] * b0[0xF]; | |
271 | |
272 WRITE_SAMPLE(samples,sum,clip); | |
273 } | |
274 } | |
275 | |
276 return clip; | |
277 | |
278 } | |
279 |