Mercurial > libavcodec.hg
comparison fft.c @ 10153:7a63015e4627 libavcodec
ARM: NEON optimised FFT and MDCT
Vorbis and AC3 ~3x faster.
Parts by Naotoshi Nojiri, naonoj gmail
author | mru |
---|---|
date | Thu, 10 Sep 2009 08:50:03 +0000 |
parents | c5e8a5a044c3 |
children | a349795e8dca |
comparison
equal
deleted
inserted
replaced
10152:ed85bbd5dccb | 10153:7a63015e4627 |
---|---|
62 { | 62 { |
63 int i, j, m, n; | 63 int i, j, m, n; |
64 float alpha, c1, s1, s2; | 64 float alpha, c1, s1, s2; |
65 int split_radix = 1; | 65 int split_radix = 1; |
66 int av_unused has_vectors; | 66 int av_unused has_vectors; |
67 int revtab_shift = 0; | |
67 | 68 |
68 if (nbits < 2 || nbits > 16) | 69 if (nbits < 2 || nbits > 16) |
69 goto fail; | 70 goto fail; |
70 s->nbits = nbits; | 71 s->nbits = nbits; |
71 n = 1 << nbits; | 72 n = 1 << nbits; |
110 has_vectors = mm_support(); | 111 has_vectors = mm_support(); |
111 if (has_vectors & FF_MM_ALTIVEC) { | 112 if (has_vectors & FF_MM_ALTIVEC) { |
112 s->fft_calc = ff_fft_calc_altivec; | 113 s->fft_calc = ff_fft_calc_altivec; |
113 split_radix = 0; | 114 split_radix = 0; |
114 } | 115 } |
116 #elif HAVE_NEON | |
117 s->fft_permute = ff_fft_permute_neon; | |
118 s->fft_calc = ff_fft_calc_neon; | |
119 s->imdct_calc = ff_imdct_calc_neon; | |
120 s->imdct_half = ff_imdct_half_neon; | |
121 revtab_shift = 3; | |
115 #endif | 122 #endif |
116 | 123 |
117 if (split_radix) { | 124 if (split_radix) { |
118 for(j=4; j<=nbits; j++) { | 125 for(j=4; j<=nbits; j++) { |
119 int m = 1<<j; | 126 int m = 1<<j; |
123 tab[i] = cos(i*freq); | 130 tab[i] = cos(i*freq); |
124 for(i=1; i<m/4; i++) | 131 for(i=1; i<m/4; i++) |
125 tab[m/2-i] = tab[i]; | 132 tab[m/2-i] = tab[i]; |
126 } | 133 } |
127 for(i=0; i<n; i++) | 134 for(i=0; i<n; i++) |
128 s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i; | 135 s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = |
136 i << revtab_shift; | |
129 s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); | 137 s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); |
130 } else { | 138 } else { |
131 int np, nblocks, np2, l; | 139 int np, nblocks, np2, l; |
132 FFTComplex *q; | 140 FFTComplex *q; |
133 | 141 |