Mercurial > mplayer.hg
annotate mp3lib/decod386.c @ 23613:990bafe740df
Avoid void * arithmetic
author | reimar |
---|---|
date | Sun, 24 Jun 2007 10:26:01 +0000 |
parents | 059d7c379fb3 |
children | 49aa1cef1867 |
rev | line source |
---|---|
15167
07e7a572bd84
Mark modified imported files as such to comply with (L)GPL ¡ø2a.
diego
parents:
13188
diff
changeset
|
1 /* |
18783 | 2 * Modified for use with MPlayer, for details see the changelog at |
3 * http://svn.mplayerhq.hu/mplayer/trunk/ | |
15167
07e7a572bd84
Mark modified imported files as such to comply with (L)GPL ¡ø2a.
diego
parents:
13188
diff
changeset
|
4 * $Id$ |
07e7a572bd84
Mark modified imported files as such to comply with (L)GPL ¡ø2a.
diego
parents:
13188
diff
changeset
|
5 */ |
07e7a572bd84
Mark modified imported files as such to comply with (L)GPL ¡ø2a.
diego
parents:
13188
diff
changeset
|
6 |
1 | 7 /* |
8 * Mpeg Layer-1,2,3 audio decoder | |
9 * ------------------------------ | |
10 * copyright (c) 1995,1996,1997 by Michael Hipp, All rights reserved. | |
11 * See also 'README' | |
12 * | |
13 * slighlty optimized for machines without autoincrement/decrement. | |
14 * The performance is highly compiler dependend. Maybe | |
15 * the decode.c version for 'normal' processor may be faster | |
16 * even for Intel processors. | |
17 */ | |
18 | |
19 | |
16989 | 20 #include "config.h" |
1 | 21 |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
22 #if 0 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
23 /* old WRITE_SAMPLE */ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
24 /* is portable */ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
25 #define WRITE_SAMPLE(samples,sum,clip) { \ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
26 if( (sum) > 32767.0) { *(samples) = 0x7fff; (clip)++; } \ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
27 else if( (sum) < -32768.0) { *(samples) = -0x8000; (clip)++; }\ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
28 else { *(samples) = sum; } \ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
29 } |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
30 #else |
1 | 31 /* new WRITE_SAMPLE */ |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
32 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
33 /* |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
34 * should be the same as the "old WRITE_SAMPLE" macro above, but uses |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
35 * some tricks to avoid double->int conversions and floating point compares. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
36 * |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
37 * Here's how it works: |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
38 * ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) is |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
39 * 0x0010000080000000LL in hex. It computes 0x0010000080000000LL + sum |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
40 * as a double IEEE fp value and extracts the low-order 32-bits from the |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
41 * IEEE fp representation stored in memory. The 2^56 bit in the constant |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
42 * is intended to force the bits of "sum" into the least significant bits |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
43 * of the double mantissa. After an integer substraction of 0x80000000 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
44 * we have the original double value "sum" converted to an 32-bit int value. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
45 * |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
46 * (Is that really faster than the clean and simple old version of the macro?) |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
47 */ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
48 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
49 /* |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
50 * On a SPARC cpu, we fetch the low-order 32-bit from the second 32-bit |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
51 * word of the double fp value stored in memory. On an x86 cpu, we fetch it |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
52 * from the first 32-bit word. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
53 * I'm not sure if the WORDS_BIGENDIAN feature test covers all possible memory |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
54 * layouts of double floating point values an all cpu architectures. If |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
55 * it doesn't work for you, just enable the "old WRITE_SAMPLE" macro. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
56 */ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
57 #if WORDS_BIGENDIAN |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
58 #define MANTISSA_OFFSET 1 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
59 #else |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
60 #define MANTISSA_OFFSET 0 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
61 #endif |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
62 |
1 | 63 /* sizeof(int) == 4 */ |
64 #define WRITE_SAMPLE(samples,sum,clip) { \ | |
7299
131497b1f6ad
- GCC 3.x (SPARC) is too clever for the double->int conversion trick used in
jkeil
parents:
4321
diff
changeset
|
65 union { double dtemp; int itemp[2]; } u; int v; \ |
131497b1f6ad
- GCC 3.x (SPARC) is too clever for the double->int conversion trick used in
jkeil
parents:
4321
diff
changeset
|
66 u.dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\ |
131497b1f6ad
- GCC 3.x (SPARC) is too clever for the double->int conversion trick used in
jkeil
parents:
4321
diff
changeset
|
67 v = u.itemp[MANTISSA_OFFSET] - 0x80000000; \ |
1 | 68 if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \ |
69 else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \ | |
70 else { *(samples) = v; } \ | |
71 } | |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
72 #endif |
1 | 73 |
74 | |
75 /* | |
76 #define WRITE_SAMPLE(samples,sum,clip) { \ | |
77 double dtemp; int v; \ | |
78 dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\ | |
79 v = ((*(int *)&dtemp) - 0x80000000); \ | |
80 if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \ | |
81 else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \ | |
82 else { *(samples) = v; } \ | |
83 } | |
84 */ | |
85 | |
13188 | 86 static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt); |
87 | |
1 | 88 static int synth_1to1_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt) |
89 { | |
90 int i,ret; | |
91 | |
92 ret = synth_1to1(bandPtr,0,samples,pnt); | |
93 samples = samples + *pnt - 128; | |
94 | |
95 for(i=0;i<32;i++) { | |
96 ((short *)samples)[1] = ((short *)samples)[0]; | |
97 samples+=4; | |
98 } | |
99 | |
100 return ret; | |
101 } | |
102 | |
12134 | 103 static synth_func_t synth_func; |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
787
diff
changeset
|
104 |
23495 | 105 #ifdef HAVE_MMX |
23342
e070d7f61e9a
Rewrite generic code in decode_MMX.c in C for easier AMD64 port. Slightly faster than original assembly.
zuxy
parents:
21049
diff
changeset
|
106 extern int synth_1to1_MMX( real *bandPtr,int channel,short * samples); |
1258 | 107 #endif |
9002 | 108 |
109 #ifdef HAVE_ALTIVEC | |
110 #define dct64_base(a,b,c) if(gCpuCaps.hasAltiVec) dct64_altivec(a,b,c); else dct64(a,b,c) | |
111 #else /* HAVE_ALTIVEC */ | |
112 #define dct64_base(a,b,c) dct64(a,b,c) | |
113 #endif /* HAVE_ALTIVEC */ | |
114 | |
1 | 115 static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt) |
116 { | |
117 static real buffs[2][2][0x110]; | |
118 static const int step = 2; | |
119 static int bo = 1; | |
120 short *samples = (short *) (out + *pnt); | |
121 real *b0,(*buf)[0x110]; | |
122 int clip = 0; | |
123 int bo1; | |
8543 | 124 |
125 *pnt += 128; | |
126 | |
4149 | 127 /* optimized for x86 */ |
23495 | 128 #ifdef ARCH_X86 |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
787
diff
changeset
|
129 if ( synth_func ) |
787 | 130 { |
8543 | 131 // printf("Calling %p, bandPtr=%p channel=%d samples=%p\n",synth_func,bandPtr,channel,samples); |
132 // FIXME: synth_func() may destroy EBP, don't rely on stack contents!!! | |
133 return (*synth_func)( bandPtr,channel,samples); | |
736 | 134 } |
1258 | 135 #endif |
1 | 136 if(!channel) { /* channel=0 */ |
137 bo--; | |
138 bo &= 0xf; | |
139 buf = buffs[0]; | |
140 } | |
141 else { | |
142 samples++; | |
143 buf = buffs[1]; | |
144 } | |
145 | |
146 if(bo & 0x1) { | |
147 b0 = buf[0]; | |
148 bo1 = bo; | |
9002 | 149 dct64_base(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr); |
1 | 150 } |
151 else { | |
152 b0 = buf[1]; | |
153 bo1 = bo+1; | |
9002 | 154 dct64_base(buf[0]+bo,buf[1]+bo+1,bandPtr); |
1 | 155 } |
156 | |
157 { | |
158 register int j; | |
8560
1320f1b3229d
fixing that f*cking linker 'bug' e.g. naming config with libmp3lame
alex
parents:
8543
diff
changeset
|
159 real *window = mp3lib_decwin + 16 - bo1; |
1 | 160 |
161 for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step) | |
162 { | |
163 real sum; | |
164 sum = window[0x0] * b0[0x0]; | |
165 sum -= window[0x1] * b0[0x1]; | |
166 sum += window[0x2] * b0[0x2]; | |
167 sum -= window[0x3] * b0[0x3]; | |
168 sum += window[0x4] * b0[0x4]; | |
169 sum -= window[0x5] * b0[0x5]; | |
170 sum += window[0x6] * b0[0x6]; | |
171 sum -= window[0x7] * b0[0x7]; | |
172 sum += window[0x8] * b0[0x8]; | |
173 sum -= window[0x9] * b0[0x9]; | |
174 sum += window[0xA] * b0[0xA]; | |
175 sum -= window[0xB] * b0[0xB]; | |
176 sum += window[0xC] * b0[0xC]; | |
177 sum -= window[0xD] * b0[0xD]; | |
178 sum += window[0xE] * b0[0xE]; | |
179 sum -= window[0xF] * b0[0xF]; | |
180 | |
181 WRITE_SAMPLE(samples,sum,clip); | |
182 } | |
183 | |
184 { | |
185 real sum; | |
186 sum = window[0x0] * b0[0x0]; | |
187 sum += window[0x2] * b0[0x2]; | |
188 sum += window[0x4] * b0[0x4]; | |
189 sum += window[0x6] * b0[0x6]; | |
190 sum += window[0x8] * b0[0x8]; | |
191 sum += window[0xA] * b0[0xA]; | |
192 sum += window[0xC] * b0[0xC]; | |
193 sum += window[0xE] * b0[0xE]; | |
194 WRITE_SAMPLE(samples,sum,clip); | |
195 b0-=0x10,window-=0x20,samples+=step; | |
196 } | |
197 window += bo1<<1; | |
198 | |
199 for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step) | |
200 { | |
201 real sum; | |
202 sum = -window[-0x1] * b0[0x0]; | |
203 sum -= window[-0x2] * b0[0x1]; | |
204 sum -= window[-0x3] * b0[0x2]; | |
205 sum -= window[-0x4] * b0[0x3]; | |
206 sum -= window[-0x5] * b0[0x4]; | |
207 sum -= window[-0x6] * b0[0x5]; | |
208 sum -= window[-0x7] * b0[0x6]; | |
209 sum -= window[-0x8] * b0[0x7]; | |
210 sum -= window[-0x9] * b0[0x8]; | |
211 sum -= window[-0xA] * b0[0x9]; | |
212 sum -= window[-0xB] * b0[0xA]; | |
213 sum -= window[-0xC] * b0[0xB]; | |
214 sum -= window[-0xD] * b0[0xC]; | |
215 sum -= window[-0xE] * b0[0xD]; | |
216 sum -= window[-0xF] * b0[0xE]; | |
217 sum -= window[-0x0] * b0[0xF]; | |
218 | |
219 WRITE_SAMPLE(samples,sum,clip); | |
220 } | |
221 } | |
222 | |
223 return clip; | |
224 | |
225 } | |
226 | |
12291
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
227 #ifdef USE_FAKE_MONO |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
228 static int synth_1to1_l(real *bandPtr,int channel,unsigned char *out,int *pnt) |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
229 { |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
230 int i,ret; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
231 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
232 ret = synth_1to1(bandPtr,channel,out,pnt); |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
233 out = out + *pnt - 128; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
234 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
235 for(i=0;i<32;i++) { |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
236 ((short *)out)[1] = ((short *)out)[0]; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
237 out+=4; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
238 } |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
239 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
240 return ret; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
241 } |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
242 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
243 static int synth_1to1_r(real *bandPtr,int channel,unsigned char *out,int *pnt) |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
244 { |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
245 int i,ret; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
246 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
247 ret = synth_1to1(bandPtr,channel,out,pnt); |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
248 out = out + *pnt - 128; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
249 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
250 for(i=0;i<32;i++) { |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
251 ((short *)out)[0] = ((short *)out)[1]; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
252 out+=4; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
253 } |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
254 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
255 return ret; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
256 } |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
257 #endif |