comparison fft.c @ 10153:7a63015e4627 libavcodec

ARM: NEON optimised FFT and MDCT Vorbis and AC3 ~3x faster. Parts by Naotoshi Nojiri, naonoj gmail
author mru
date Thu, 10 Sep 2009 08:50:03 +0000
parents c5e8a5a044c3
children a349795e8dca
comparison
equal deleted inserted replaced
10152:ed85bbd5dccb 10153:7a63015e4627
62 { 62 {
63 int i, j, m, n; 63 int i, j, m, n;
64 float alpha, c1, s1, s2; 64 float alpha, c1, s1, s2;
65 int split_radix = 1; 65 int split_radix = 1;
66 int av_unused has_vectors; 66 int av_unused has_vectors;
67 int revtab_shift = 0;
67 68
68 if (nbits < 2 || nbits > 16) 69 if (nbits < 2 || nbits > 16)
69 goto fail; 70 goto fail;
70 s->nbits = nbits; 71 s->nbits = nbits;
71 n = 1 << nbits; 72 n = 1 << nbits;
110 has_vectors = mm_support(); 111 has_vectors = mm_support();
111 if (has_vectors & FF_MM_ALTIVEC) { 112 if (has_vectors & FF_MM_ALTIVEC) {
112 s->fft_calc = ff_fft_calc_altivec; 113 s->fft_calc = ff_fft_calc_altivec;
113 split_radix = 0; 114 split_radix = 0;
114 } 115 }
116 #elif HAVE_NEON
117 s->fft_permute = ff_fft_permute_neon;
118 s->fft_calc = ff_fft_calc_neon;
119 s->imdct_calc = ff_imdct_calc_neon;
120 s->imdct_half = ff_imdct_half_neon;
121 revtab_shift = 3;
115 #endif 122 #endif
116 123
117 if (split_radix) { 124 if (split_radix) {
118 for(j=4; j<=nbits; j++) { 125 for(j=4; j<=nbits; j++) {
119 int m = 1<<j; 126 int m = 1<<j;
123 tab[i] = cos(i*freq); 130 tab[i] = cos(i*freq);
124 for(i=1; i<m/4; i++) 131 for(i=1; i<m/4; i++)
125 tab[m/2-i] = tab[i]; 132 tab[m/2-i] = tab[i];
126 } 133 }
127 for(i=0; i<n; i++) 134 for(i=0; i<n; i++)
128 s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i; 135 s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] =
136 i << revtab_shift;
129 s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); 137 s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
130 } else { 138 } else {
131 int np, nblocks, np2, l; 139 int np, nblocks, np2, l;
132 FFTComplex *q; 140 FFTComplex *q;
133 141