Mercurial > mplayer.hg
annotate mp3lib/decod386.c @ 31685:31b6397e3b28
Another try at fixing swscale on win64, as per r31153.
Don't change paramater passing, but instead use casts.
Shouldn't affect asm output on anything other than win64.
libswscale should work on win64 now.
The rest of ffmpeg still isn't win64 compatible due to the issue of xmm
clobbers, but swscale doesn't use any SSE.
Patch by Anton Mitrofanov <BugMaster AT narod DOT ru>.
author | darkshikari |
---|---|
date | Sun, 18 Jul 2010 21:39:57 +0000 |
parents | 0ad2da052b2e |
children |
rev | line source |
---|---|
15167
07e7a572bd84
Mark modified imported files as such to comply with (L)GPL ¡ø2a.
diego
parents:
13188
diff
changeset
|
1 /* |
18783 | 2 * Modified for use with MPlayer, for details see the changelog at |
3 * http://svn.mplayerhq.hu/mplayer/trunk/ | |
15167
07e7a572bd84
Mark modified imported files as such to comply with (L)GPL ¡ø2a.
diego
parents:
13188
diff
changeset
|
4 * $Id$ |
07e7a572bd84
Mark modified imported files as such to comply with (L)GPL ¡ø2a.
diego
parents:
13188
diff
changeset
|
5 */ |
07e7a572bd84
Mark modified imported files as such to comply with (L)GPL ¡ø2a.
diego
parents:
13188
diff
changeset
|
6 |
1 | 7 /* |
8 * Mpeg Layer-1,2,3 audio decoder | |
9 * ------------------------------ | |
10 * copyright (c) 1995,1996,1997 by Michael Hipp, All rights reserved. | |
11 * See also 'README' | |
12 * | |
13 * slighlty optimized for machines without autoincrement/decrement. | |
14 * The performance is highly compiler dependend. Maybe | |
15 * the decode.c version for 'normal' processor may be faster | |
16 * even for Intel processors. | |
17 */ | |
18 | |
19 | |
16989 | 20 #include "config.h" |
1 | 21 |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
22 #if 0 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
23 /* old WRITE_SAMPLE */ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
24 /* is portable */ |
30990 | 25 #define WRITE_SAMPLE(samples,sum,clip) { \ |
26 if( (sum) > 32767.0) { *(samples) = 0x7fff; (clip)++; } \ | |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
27 else if( (sum) < -32768.0) { *(samples) = -0x8000; (clip)++; }\ |
30990 | 28 else { *(samples) = sum; } \ |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
29 } |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
30 #else |
1 | 31 /* new WRITE_SAMPLE */ |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
32 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
33 /* |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
34 * should be the same as the "old WRITE_SAMPLE" macro above, but uses |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
35 * some tricks to avoid double->int conversions and floating point compares. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
36 * |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
37 * Here's how it works: |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
38 * ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) is |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
39 * 0x0010000080000000LL in hex. It computes 0x0010000080000000LL + sum |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
40 * as a double IEEE fp value and extracts the low-order 32-bits from the |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
41 * IEEE fp representation stored in memory. The 2^56 bit in the constant |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
42 * is intended to force the bits of "sum" into the least significant bits |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
43 * of the double mantissa. After an integer substraction of 0x80000000 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
44 * we have the original double value "sum" converted to an 32-bit int value. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
45 * |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
46 * (Is that really faster than the clean and simple old version of the macro?) |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
47 */ |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
48 |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
49 /* |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
50 * On a SPARC cpu, we fetch the low-order 32-bit from the second 32-bit |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
51 * word of the double fp value stored in memory. On an x86 cpu, we fetch it |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
52 * from the first 32-bit word. |
29401
f01023c524c3
Replace WORDS_BIGENDIAN by HAVE_BIGENDIAN in all internal code.
diego
parents:
29263
diff
changeset
|
53 * I'm not sure if the HAVE_BIGENDIAN feature test covers all possible memory |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
54 * layouts of double floating point values an all cpu architectures. If |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
55 * it doesn't work for you, just enable the "old WRITE_SAMPLE" macro. |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
56 */ |
29401
f01023c524c3
Replace WORDS_BIGENDIAN by HAVE_BIGENDIAN in all internal code.
diego
parents:
29263
diff
changeset
|
57 #if HAVE_BIGENDIAN |
30990 | 58 #define MANTISSA_OFFSET 1 |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
59 #else |
30990 | 60 #define MANTISSA_OFFSET 0 |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
61 #endif |
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
62 |
1 | 63 /* sizeof(int) == 4 */ |
64 #define WRITE_SAMPLE(samples,sum,clip) { \ | |
7299
131497b1f6ad
- GCC 3.x (SPARC) is too clever for the double->int conversion trick used in
jkeil
parents:
4321
diff
changeset
|
65 union { double dtemp; int itemp[2]; } u; int v; \ |
131497b1f6ad
- GCC 3.x (SPARC) is too clever for the double->int conversion trick used in
jkeil
parents:
4321
diff
changeset
|
66 u.dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\ |
131497b1f6ad
- GCC 3.x (SPARC) is too clever for the double->int conversion trick used in
jkeil
parents:
4321
diff
changeset
|
67 v = u.itemp[MANTISSA_OFFSET] - 0x80000000; \ |
1 | 68 if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \ |
69 else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \ | |
70 else { *(samples) = v; } \ | |
71 } | |
1318
2052e18abd9d
mp3 audio decoding didn't work on big-endian architectures
jkeil
parents:
1258
diff
changeset
|
72 #endif |
1 | 73 |
74 | |
75 /* | |
76 #define WRITE_SAMPLE(samples,sum,clip) { \ | |
77 double dtemp; int v; \ | |
78 dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\ | |
79 v = ((*(int *)&dtemp) - 0x80000000); \ | |
80 if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \ | |
81 else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \ | |
82 else { *(samples) = v; } \ | |
83 } | |
84 */ | |
85 | |
13188 | 86 static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt); |
87 | |
1 | 88 static int synth_1to1_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt) |
89 { | |
90 int i,ret; | |
91 | |
92 ret = synth_1to1(bandPtr,0,samples,pnt); | |
93 samples = samples + *pnt - 128; | |
94 | |
95 for(i=0;i<32;i++) { | |
96 ((short *)samples)[1] = ((short *)samples)[0]; | |
97 samples+=4; | |
98 } | |
99 | |
100 return ret; | |
101 } | |
102 | |
12134 | 103 static synth_func_t synth_func; |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
787
diff
changeset
|
104 |
28290 | 105 #if HAVE_ALTIVEC |
9002 | 106 #define dct64_base(a,b,c) if(gCpuCaps.hasAltiVec) dct64_altivec(a,b,c); else dct64(a,b,c) |
107 #else /* HAVE_ALTIVEC */ | |
108 #define dct64_base(a,b,c) dct64(a,b,c) | |
109 #endif /* HAVE_ALTIVEC */ | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28339
diff
changeset
|
110 |
1 | 111 static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt) |
112 { | |
113 static real buffs[2][2][0x110]; | |
114 static const int step = 2; | |
115 static int bo = 1; | |
116 short *samples = (short *) (out + *pnt); | |
117 real *b0,(*buf)[0x110]; | |
118 int clip = 0; | |
119 int bo1; | |
8543 | 120 |
121 *pnt += 128; | |
122 | |
4149 | 123 /* optimized for x86 */ |
28290 | 124 #if ARCH_X86 |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
787
diff
changeset
|
125 if ( synth_func ) |
787 | 126 { |
8543 | 127 // printf("Calling %p, bandPtr=%p channel=%d samples=%p\n",synth_func,bandPtr,channel,samples); |
128 // FIXME: synth_func() may destroy EBP, don't rely on stack contents!!! | |
129 return (*synth_func)( bandPtr,channel,samples); | |
736 | 130 } |
1258 | 131 #endif |
1 | 132 if(!channel) { /* channel=0 */ |
133 bo--; | |
134 bo &= 0xf; | |
135 buf = buffs[0]; | |
136 } | |
137 else { | |
138 samples++; | |
139 buf = buffs[1]; | |
140 } | |
141 | |
142 if(bo & 0x1) { | |
143 b0 = buf[0]; | |
144 bo1 = bo; | |
9002 | 145 dct64_base(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr); |
1 | 146 } |
147 else { | |
148 b0 = buf[1]; | |
149 bo1 = bo+1; | |
9002 | 150 dct64_base(buf[0]+bo,buf[1]+bo+1,bandPtr); |
1 | 151 } |
152 | |
153 { | |
154 register int j; | |
8560
1320f1b3229d
fixing that f*cking linker 'bug' e.g. naming config with libmp3lame
alex
parents:
8543
diff
changeset
|
155 real *window = mp3lib_decwin + 16 - bo1; |
1 | 156 |
157 for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step) | |
158 { | |
159 real sum; | |
160 sum = window[0x0] * b0[0x0]; | |
161 sum -= window[0x1] * b0[0x1]; | |
162 sum += window[0x2] * b0[0x2]; | |
163 sum -= window[0x3] * b0[0x3]; | |
164 sum += window[0x4] * b0[0x4]; | |
165 sum -= window[0x5] * b0[0x5]; | |
166 sum += window[0x6] * b0[0x6]; | |
167 sum -= window[0x7] * b0[0x7]; | |
168 sum += window[0x8] * b0[0x8]; | |
169 sum -= window[0x9] * b0[0x9]; | |
170 sum += window[0xA] * b0[0xA]; | |
171 sum -= window[0xB] * b0[0xB]; | |
172 sum += window[0xC] * b0[0xC]; | |
173 sum -= window[0xD] * b0[0xD]; | |
174 sum += window[0xE] * b0[0xE]; | |
175 sum -= window[0xF] * b0[0xF]; | |
176 | |
177 WRITE_SAMPLE(samples,sum,clip); | |
178 } | |
179 | |
180 { | |
181 real sum; | |
182 sum = window[0x0] * b0[0x0]; | |
183 sum += window[0x2] * b0[0x2]; | |
184 sum += window[0x4] * b0[0x4]; | |
185 sum += window[0x6] * b0[0x6]; | |
186 sum += window[0x8] * b0[0x8]; | |
187 sum += window[0xA] * b0[0xA]; | |
188 sum += window[0xC] * b0[0xC]; | |
189 sum += window[0xE] * b0[0xE]; | |
190 WRITE_SAMPLE(samples,sum,clip); | |
191 b0-=0x10,window-=0x20,samples+=step; | |
192 } | |
193 window += bo1<<1; | |
194 | |
195 for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step) | |
196 { | |
197 real sum; | |
198 sum = -window[-0x1] * b0[0x0]; | |
199 sum -= window[-0x2] * b0[0x1]; | |
200 sum -= window[-0x3] * b0[0x2]; | |
201 sum -= window[-0x4] * b0[0x3]; | |
202 sum -= window[-0x5] * b0[0x4]; | |
203 sum -= window[-0x6] * b0[0x5]; | |
204 sum -= window[-0x7] * b0[0x6]; | |
205 sum -= window[-0x8] * b0[0x7]; | |
206 sum -= window[-0x9] * b0[0x8]; | |
207 sum -= window[-0xA] * b0[0x9]; | |
208 sum -= window[-0xB] * b0[0xA]; | |
209 sum -= window[-0xC] * b0[0xB]; | |
210 sum -= window[-0xD] * b0[0xC]; | |
211 sum -= window[-0xE] * b0[0xD]; | |
212 sum -= window[-0xF] * b0[0xE]; | |
213 sum -= window[-0x0] * b0[0xF]; | |
214 | |
215 WRITE_SAMPLE(samples,sum,clip); | |
216 } | |
217 } | |
218 | |
219 return clip; | |
220 | |
221 } | |
222 | |
27341
e7c989f7a7c9
Start unifying names of internal preprocessor directives.
diego
parents:
24173
diff
changeset
|
223 #ifdef CONFIG_FAKE_MONO |
12291
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
224 static int synth_1to1_l(real *bandPtr,int channel,unsigned char *out,int *pnt) |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
225 { |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
226 int i,ret; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
227 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
228 ret = synth_1to1(bandPtr,channel,out,pnt); |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
229 out = out + *pnt - 128; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
230 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
231 for(i=0;i<32;i++) { |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
232 ((short *)out)[1] = ((short *)out)[0]; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
233 out+=4; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
234 } |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
235 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
236 return ret; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
237 } |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
238 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
239 static int synth_1to1_r(real *bandPtr,int channel,unsigned char *out,int *pnt) |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
240 { |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
241 int i,ret; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
242 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
243 ret = synth_1to1(bandPtr,channel,out,pnt); |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
244 out = out + *pnt - 128; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
245 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
246 for(i=0;i<32;i++) { |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
247 ((short *)out)[0] = ((short *)out)[1]; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
248 out+=4; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
249 } |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
250 |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
251 return ret; |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
252 } |
4e6f75467d64
reorder funcs to avoid warnings/errors (gccs are nowadays are more pickier about code than gcc2.95 with -ansi)
alex
parents:
12134
diff
changeset
|
253 #endif |