Mercurial > mplayer.hg
comparison liba52/liba52_changes.diff @ 29601:cd3ae709054f
Disable liba52 SSE imdct for x86_64+PIC (e.g. OSX 64 bit) since it will
not compile for that combination.
author | reimar |
---|---|
date | Fri, 04 Sep 2009 10:31:24 +0000 |
parents | f01023c524c3 |
children |
comparison
equal
deleted
inserted
replaced
29600:577c86f00dd1 | 29601:cd3ae709054f |
---|---|
1502 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) | 1502 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) |
1503 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) | 1503 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) |
1504 { | 1504 { |
1505 int i, k; | 1505 int i, k; |
1506 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; | 1506 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; |
1507 @@ -285,6 +366,702 @@ | 1507 @@ -285,6 +366,704 @@ |
1508 } | 1508 } |
1509 } | 1509 } |
1510 | 1510 |
1511 +#if HAVE_ALTIVEC | 1511 +#if HAVE_ALTIVEC |
1512 + | 1512 + |
1867 +#include "imdct_3dnow.h" | 1867 +#include "imdct_3dnow.h" |
1868 +#undef HAVE_AMD3DNOWEXT | 1868 +#undef HAVE_AMD3DNOWEXT |
1869 +#define HAVE_AMD3DNOWEXT 1 | 1869 +#define HAVE_AMD3DNOWEXT 1 |
1870 +#include "imdct_3dnow.h" | 1870 +#include "imdct_3dnow.h" |
1871 + | 1871 + |
1872 +#if !ARCH_X86_64 || !defined(PIC) | |
1872 +void | 1873 +void |
1873 +imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) | 1874 +imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) |
1874 +{ | 1875 +{ |
1875 +/* int i,k; | 1876 +/* int i,k; |
1876 + int p,q;*/ | 1877 + int p,q;*/ |
2200 + " jb 1b \n\t" | 2201 + " jb 1b \n\t" |
2201 + :: "r" (buf), "r" (delay_ptr) | 2202 + :: "r" (buf), "r" (delay_ptr) |
2202 + : "%"REG_S, "%"REG_D | 2203 + : "%"REG_S, "%"REG_D |
2203 + ); | 2204 + ); |
2204 +} | 2205 +} |
2206 +#endif | |
2205 +#endif // ARCH_X86 || ARCH_X86_64 | 2207 +#endif // ARCH_X86 || ARCH_X86_64 |
2206 + | 2208 + |
2207 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) | 2209 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) |
2208 { | 2210 { |
2209 int i, k; | 2211 int i, k; |
2210 @@ -364,7 +1141,7 @@ | 2212 @@ -364,7 +1143,7 @@ |
2211 | 2213 |
2212 void a52_imdct_init (uint32_t mm_accel) | 2214 void a52_imdct_init (uint32_t mm_accel) |
2213 { | 2215 { |
2214 - int i, k; | 2216 - int i, k; |
2215 + int i, j, k; | 2217 + int i, j, k; |
2216 double sum; | 2218 double sum; |
2217 | 2219 |
2218 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ | 2220 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ |
2219 @@ -416,6 +1193,99 @@ | 2221 @@ -416,6 +1195,101 @@ |
2220 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); | 2222 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); |
2221 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); | 2223 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); |
2222 } | 2224 } |
2223 + for (i = 0; i < 128; i++) { | 2225 + for (i = 0; i < 128; i++) { |
2224 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); | 2226 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); |
2284 + a52_imdct_512 = imdct_do_512; | 2286 + a52_imdct_512 = imdct_do_512; |
2285 + ifft128 = ifft128_c; | 2287 + ifft128 = ifft128_c; |
2286 + ifft64 = ifft64_c; | 2288 + ifft64 = ifft64_c; |
2287 + | 2289 + |
2288 +#if ARCH_X86 || ARCH_X86_64 | 2290 +#if ARCH_X86 || ARCH_X86_64 |
2291 +#if !ARCH_X86_64 || !defined(PIC) | |
2289 + if(mm_accel & MM_ACCEL_X86_SSE) | 2292 + if(mm_accel & MM_ACCEL_X86_SSE) |
2290 + { | 2293 + { |
2291 + fprintf (stderr, "Using SSE optimized IMDCT transform\n"); | 2294 + fprintf (stderr, "Using SSE optimized IMDCT transform\n"); |
2292 + a52_imdct_512 = imdct_do_512_sse; | 2295 + a52_imdct_512 = imdct_do_512_sse; |
2293 + } | 2296 + } |
2294 + else | 2297 + else |
2298 +#endif | |
2295 + if(mm_accel & MM_ACCEL_X86_3DNOWEXT) | 2299 + if(mm_accel & MM_ACCEL_X86_3DNOWEXT) |
2296 + { | 2300 + { |
2297 + fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n"); | 2301 + fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n"); |
2298 + a52_imdct_512 = imdct_do_512_3dnowex; | 2302 + a52_imdct_512 = imdct_do_512_3dnowex; |
2299 + } | 2303 + } |
2314 + else | 2318 + else |
2315 +#endif | 2319 +#endif |
2316 | 2320 |
2317 #ifdef LIBA52_DJBFFT | 2321 #ifdef LIBA52_DJBFFT |
2318 if (mm_accel & MM_ACCEL_DJBFFT) { | 2322 if (mm_accel & MM_ACCEL_DJBFFT) { |
2319 @@ -426,7 +1296,5 @@ | 2323 @@ -426,7 +1300,5 @@ |
2320 #endif | 2324 #endif |
2321 { | 2325 { |
2322 fprintf (stderr, "No accelerated IMDCT transform found\n"); | 2326 fprintf (stderr, "No accelerated IMDCT transform found\n"); |
2323 - ifft128 = ifft128_c; | 2327 - ifft128 = ifft128_c; |
2324 - ifft64 = ifft64_c; | 2328 - ifft64 = ifft64_c; |