Mercurial > audlegacy
diff Plugins/Input/wma/libffwma/fft.c @ 1398:1ddaf20ab50e trunk
[svn] AltiVec support for WMA, by Luca "lu_zero" Barbato from Gentoo.
author | chainsaw |
---|---|
date | Thu, 13 Jul 2006 16:01:57 -0700 |
parents | 62a33367a6cb |
children | f12d7e208b43 |
line wrap: on
line diff
--- a/Plugins/Input/wma/libffwma/fft.c Wed Jul 12 21:16:35 2006 -0700 +++ b/Plugins/Input/wma/libffwma/fft.c Thu Jul 13 16:01:57 2006 -0700 @@ -24,6 +24,205 @@ #include "dsputil.h" +#ifdef HAVE_ALTIVEC + +#ifdef HAVE_ALTIVEC_H +#include <altivec.h> +#endif + +#ifdef CONFIG_DARWIN +#include <sys/sysctl.h> +#else /* CONFIG_DARWIN */ +#include <signal.h> +#include <setjmp.h> + +static sigjmp_buf jmpbuf; +static volatile sig_atomic_t canjump = 0; + +static void sigill_handler (int sig) +{ + if (!canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + canjump = 0; + siglongjmp (jmpbuf, 1); +} +#endif /* CONFIG_DARWIN */ + + +#define WORD_0 0x00,0x01,0x02,0x03 +#define WORD_1 0x04,0x05,0x06,0x07 +#define WORD_2 0x08,0x09,0x0a,0x0b +#define WORD_3 0x0c,0x0d,0x0e,0x0f +#define WORD_s0 0x10,0x11,0x12,0x13 +#define WORD_s1 0x14,0x15,0x16,0x17 +#define WORD_s2 0x18,0x19,0x1a,0x1b +#define WORD_s3 0x1c,0x1d,0x1e,0x1f + +#ifdef CONFIG_DARWIN +#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) +#else +#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} +#endif + +// vcprmle is used to keep the same index as in the SSE version. +// it's the same as vcprm, with the index inversed +// ('le' is Little Endian) +#define vcprmle(a,b,c,d) vcprm(d,c,b,a) + +// used to build inverse/identity vectors (vcii) +// n is _n_egative, p is _p_ositive +#define FLOAT_n -1. +#define FLOAT_p 1. + + +#ifdef CONFIG_DARWIN +#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) +#else +#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} +#endif + +int has_altivec(void) +{ +#ifdef CONFIG_DARWIN + int sels[2] = {CTL_HW, HW_VECTORUNIT}; + int has_vu = 0; + size_t len = sizeof(has_vu); + int err; + + err = sysctl(sels, 2, &has_vu, &len, NULL, 0); + + if (err == 0) return (has_vu != 0); +#else /* CONFIG_DARWIN */ +/* no Darwin, do it the brute-force way */ +/* this is borrowed from the libmpeg2 library */ + { + signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, SIG_DFL); + } else { + canjump = 1; + + asm volatile ("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); + + signal (SIGILL, SIG_DFL); + return 1; + } + } +#endif /* CONFIG_DARWIN */ + return 0; +} + + +void fft_calc_altivec(FFTContext *s, FFTComplex *z) +{ +#ifdef CONFIG_DARWIN + register const vector float vczero = (const vector float)(0.); +#else + register const vector float vczero = (const vector float){0.,0.,0.,0.}; +#endif + + int ln = s->nbits; + int j, np, np2; + int nblocks, nloops; + register FFTComplex *p, *q; + FFTComplex *cptr, *cptr1; + int k; + + np = 1 << ln; + + { + vector float *r, a, b, a1, c1, c2; + + r = (vector float *)&z[0]; + + c1 = vcii(p,p,n,n); + + if (s->inverse) + { + c2 = vcii(p,p,n,p); + } + else + { + c2 = vcii(p,p,p,n); + } + + j = (np >> 2); + do { + a = vec_ld(0, r); + a1 = vec_ld(sizeof(vector float), r); + + b = vec_perm(a,a,vcprmle(1,0,3,2)); + a = vec_madd(a,c1,b); + /* do the pass 0 butterfly */ + + b = vec_perm(a1,a1,vcprmle(1,0,3,2)); + b = vec_madd(a1,c1,b); + /* do the pass 0 butterfly */ + + /* multiply third by -i */ + b = vec_perm(b,b,vcprmle(2,3,1,0)); + + /* do the pass 1 butterfly */ + vec_st(vec_madd(b,c2,a), 0, r); + vec_st(vec_nmsub(b,c2,a), sizeof(vector float), r); + + r += 2; + } while (--j != 0); + } + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + + cptr1 = s->exptab1; + do { + p = z; + q = z + nloops; + j = nblocks; + do { + cptr = cptr1; + k = nloops >> 1; + do { + vector float a,b,c,t1; + + a = vec_ld(0, (float*)p); + b = vec_ld(0, (float*)q); + + /* complex mul */ + c = vec_ld(0, (float*)cptr); + /* cre*re cim*re */ + t1 = vec_madd(c, vec_perm(b,b,vcprmle(2,2,0,0)),vczero); + c = vec_ld(sizeof(vector float), (float*)cptr); + /* -cim*im cre*im */ + b = vec_madd(c, vec_perm(b,b,vcprmle(3,3,1,1)),t1); + + /* butterfly */ + vec_st(vec_add(a,b), 0, (float*)p); + vec_st(vec_sub(a,b), 0, (float*)q); + + p += 2; + q += 2; + cptr += 4; + } while (--k); + + p += nloops; + q += nloops; + } while (--j); + cptr1 += nloops * 2; + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); +} + +#endif + /** * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is * done @@ -36,10 +235,10 @@ s->nbits = nbits; n = 1 << nbits; - s->exptab = malloc((n / 2) * sizeof(FFTComplex)); + s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); if (!s->exptab) goto fail; - s->revtab = malloc(n * sizeof(uint16_t)); + s->revtab = av_malloc(n * sizeof(uint16_t)); if (!s->revtab) goto fail; s->inverse = inverse; @@ -56,15 +255,12 @@ s->fft_calc = fft_calc_c; s->exptab1 = NULL; /* compute constant table for HAVE_SSE version */ -#if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR)) || defined(BLAH_NO_ALTIVEC) +#if (defined(HAVE_ALTIVEC)) { int has_vectors = 0; -#if defined(HAVE_MMX) - has_vectors = mm_support() & MM_SSE; -#endif -#if defined(BLAH_NO_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE) - has_vectors = mm_support() & MM_ALTIVEC; +#if defined(HAVE_ALTIVEC) + has_vectors = has_altivec(); #endif if (has_vectors) { int np, nblocks, np2, l; @@ -73,7 +269,7 @@ np = 1 << nbits; nblocks = np >> 3; np2 = np >> 1; - s->exptab1 = malloc(np * 2 * sizeof(FFTComplex)); + s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); if (!s->exptab1) goto fail; q = s->exptab1; @@ -92,11 +288,7 @@ nblocks = nblocks >> 1; } while (nblocks != 0); av_freep(&s->exptab); -#if defined(HAVE_MMX) - s->fft_calc = fft_calc_sse; -#else s->fft_calc = fft_calc_altivec; -#endif } } #endif