comparison dsputil.h @ 7542:a8a8205a9081 libavcodec

split-radix FFT c is 1.9x faster than previous c (on various x86 cpus), sse is 1.6x faster than previous sse.
author lorenm
date Tue, 12 Aug 2008 00:26:58 +0000
parents a4ebced5d4a2
children ee1cb5ab9f99
comparison
equal deleted inserted replaced
7541:570c0c027998 7542:a8a8205a9081
637 int nbits; 637 int nbits;
638 int inverse; 638 int inverse;
639 uint16_t *revtab; 639 uint16_t *revtab;
640 FFTComplex *exptab; 640 FFTComplex *exptab;
641 FFTComplex *exptab1; /* only used by SSE code */ 641 FFTComplex *exptab1; /* only used by SSE code */
642 FFTComplex *tmp_buf;
643 void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
642 void (*fft_calc)(struct FFTContext *s, FFTComplex *z); 644 void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
643 void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, 645 void (*imdct_calc)(struct MDCTContext *s, FFTSample *output,
644 const FFTSample *input, FFTSample *tmp); 646 const FFTSample *input, FFTSample *tmp);
645 void (*imdct_half)(struct MDCTContext *s, FFTSample *output, 647 void (*imdct_half)(struct MDCTContext *s, FFTSample *output,
646 const FFTSample *input, FFTSample *tmp); 648 const FFTSample *input, FFTSample *tmp);
647 } FFTContext; 649 } FFTContext;
648 650
649 int ff_fft_init(FFTContext *s, int nbits, int inverse); 651 int ff_fft_init(FFTContext *s, int nbits, int inverse);
650 void ff_fft_permute(FFTContext *s, FFTComplex *z); 652 void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
653 void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
651 void ff_fft_calc_c(FFTContext *s, FFTComplex *z); 654 void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
652 void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); 655 void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
653 void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); 656 void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z);
654 void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); 657 void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z);
655 void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); 658 void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
656 659
660 static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
661 {
662 s->fft_permute(s, z);
663 }
657 static inline void ff_fft_calc(FFTContext *s, FFTComplex *z) 664 static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
658 { 665 {
659 s->fft_calc(s, z); 666 s->fft_calc(s, z);
660 } 667 }
661 void ff_fft_end(FFTContext *s); 668 void ff_fft_end(FFTContext *s);