comparison liba52/liba52_changes.diff @ 29601:cd3ae709054f

Disable liba52 SSE imdct for x86_64+PIC (e.g. OSX 64 bit) since it will not compile for that combination.
author reimar
date Fri, 04 Sep 2009 10:31:24 +0000
parents f01023c524c3
children
comparison
equal deleted inserted replaced
29600:577c86f00dd1 29601:cd3ae709054f
1502 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) 1502 -void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
1503 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias) 1503 +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias)
1504 { 1504 {
1505 int i, k; 1505 int i, k;
1506 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; 1506 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
1507 @@ -285,6 +366,702 @@ 1507 @@ -285,6 +366,704 @@
1508 } 1508 }
1509 } 1509 }
1510 1510
1511 +#if HAVE_ALTIVEC 1511 +#if HAVE_ALTIVEC
1512 + 1512 +
1867 +#include "imdct_3dnow.h" 1867 +#include "imdct_3dnow.h"
1868 +#undef HAVE_AMD3DNOWEXT 1868 +#undef HAVE_AMD3DNOWEXT
1869 +#define HAVE_AMD3DNOWEXT 1 1869 +#define HAVE_AMD3DNOWEXT 1
1870 +#include "imdct_3dnow.h" 1870 +#include "imdct_3dnow.h"
1871 + 1871 +
1872 +#if !ARCH_X86_64 || !defined(PIC)
1872 +void 1873 +void
1873 +imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias) 1874 +imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
1874 +{ 1875 +{
1875 +/* int i,k; 1876 +/* int i,k;
1876 + int p,q;*/ 1877 + int p,q;*/
2200 + " jb 1b \n\t" 2201 + " jb 1b \n\t"
2201 + :: "r" (buf), "r" (delay_ptr) 2202 + :: "r" (buf), "r" (delay_ptr)
2202 + : "%"REG_S, "%"REG_D 2203 + : "%"REG_S, "%"REG_D
2203 + ); 2204 + );
2204 +} 2205 +}
2206 +#endif
2205 +#endif // ARCH_X86 || ARCH_X86_64 2207 +#endif // ARCH_X86 || ARCH_X86_64
2206 + 2208 +
2207 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) 2209 void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
2208 { 2210 {
2209 int i, k; 2211 int i, k;
2210 @@ -364,7 +1141,7 @@ 2212 @@ -364,7 +1143,7 @@
2211 2213
2212 void a52_imdct_init (uint32_t mm_accel) 2214 void a52_imdct_init (uint32_t mm_accel)
2213 { 2215 {
2214 - int i, k; 2216 - int i, k;
2215 + int i, j, k; 2217 + int i, j, k;
2216 double sum; 2218 double sum;
2217 2219
2218 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ 2220 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
2219 @@ -416,6 +1193,99 @@ 2221 @@ -416,6 +1195,101 @@
2220 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); 2222 post2[i].real = cos ((M_PI / 128) * (i + 0.5));
2221 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); 2223 post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
2222 } 2224 }
2223 + for (i = 0; i < 128; i++) { 2225 + for (i = 0; i < 128; i++) {
2224 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); 2226 + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
2284 + a52_imdct_512 = imdct_do_512; 2286 + a52_imdct_512 = imdct_do_512;
2285 + ifft128 = ifft128_c; 2287 + ifft128 = ifft128_c;
2286 + ifft64 = ifft64_c; 2288 + ifft64 = ifft64_c;
2287 + 2289 +
2288 +#if ARCH_X86 || ARCH_X86_64 2290 +#if ARCH_X86 || ARCH_X86_64
2291 +#if !ARCH_X86_64 || !defined(PIC)
2289 + if(mm_accel & MM_ACCEL_X86_SSE) 2292 + if(mm_accel & MM_ACCEL_X86_SSE)
2290 + { 2293 + {
2291 + fprintf (stderr, "Using SSE optimized IMDCT transform\n"); 2294 + fprintf (stderr, "Using SSE optimized IMDCT transform\n");
2292 + a52_imdct_512 = imdct_do_512_sse; 2295 + a52_imdct_512 = imdct_do_512_sse;
2293 + } 2296 + }
2294 + else 2297 + else
2298 +#endif
2295 + if(mm_accel & MM_ACCEL_X86_3DNOWEXT) 2299 + if(mm_accel & MM_ACCEL_X86_3DNOWEXT)
2296 + { 2300 + {
2297 + fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n"); 2301 + fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n");
2298 + a52_imdct_512 = imdct_do_512_3dnowex; 2302 + a52_imdct_512 = imdct_do_512_3dnowex;
2299 + } 2303 + }
2314 + else 2318 + else
2315 +#endif 2319 +#endif
2316 2320
2317 #ifdef LIBA52_DJBFFT 2321 #ifdef LIBA52_DJBFFT
2318 if (mm_accel & MM_ACCEL_DJBFFT) { 2322 if (mm_accel & MM_ACCEL_DJBFFT) {
2319 @@ -426,7 +1296,5 @@ 2323 @@ -426,7 +1300,5 @@
2320 #endif 2324 #endif
2321 { 2325 {
2322 fprintf (stderr, "No accelerated IMDCT transform found\n"); 2326 fprintf (stderr, "No accelerated IMDCT transform found\n");
2323 - ifft128 = ifft128_c; 2327 - ifft128 = ifft128_c;
2324 - ifft64 = ifft64_c; 2328 - ifft64 = ifft64_c;