Mercurial > libavcodec.hg
comparison x86/fft_sse.c @ 10199:38ab367d4231 libavcodec
Merge FFTContext and MDCTContext
author | mru |
---|---|
date | Sun, 20 Sep 2009 17:30:20 +0000 |
parents | 5cf49858179a |
children | 34a65026fa06 |
comparison
equal
deleted
inserted
replaced
10198:78af613fc316 | 10199:38ab367d4231 |
---|---|
69 ); | 69 ); |
70 } | 70 } |
71 memcpy(z, s->tmp_buf, n*sizeof(FFTComplex)); | 71 memcpy(z, s->tmp_buf, n*sizeof(FFTComplex)); |
72 } | 72 } |
73 | 73 |
74 void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input) | 74 void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input) |
75 { | 75 { |
76 av_unused x86_reg i, j, k, l; | 76 av_unused x86_reg i, j, k, l; |
77 long n = 1 << s->nbits; | 77 long n = 1 << s->mdct_bits; |
78 long n2 = n >> 1; | 78 long n2 = n >> 1; |
79 long n4 = n >> 2; | 79 long n4 = n >> 2; |
80 long n8 = n >> 3; | 80 long n8 = n >> 3; |
81 const uint16_t *revtab = s->fft.revtab + n8; | 81 const uint16_t *revtab = s->revtab + n8; |
82 const FFTSample *tcos = s->tcos; | 82 const FFTSample *tcos = s->tcos; |
83 const FFTSample *tsin = s->tsin; | 83 const FFTSample *tsin = s->tsin; |
84 FFTComplex *z = (FFTComplex *)output; | 84 FFTComplex *z = (FFTComplex *)output; |
85 | 85 |
86 /* pre rotation */ | 86 /* pre rotation */ |
127 __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]])); | 127 __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]])); |
128 __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]])); | 128 __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]])); |
129 #endif | 129 #endif |
130 } | 130 } |
131 | 131 |
132 ff_fft_dispatch_sse(z, s->fft.nbits); | 132 ff_fft_dispatch_sse(z, s->nbits); |
133 | 133 |
134 /* post rotation + reinterleave + reorder */ | 134 /* post rotation + reinterleave + reorder */ |
135 | 135 |
136 #define CMUL(j,xmm0,xmm1)\ | 136 #define CMUL(j,xmm0,xmm1)\ |
137 "movaps (%2,"#j",2), %%xmm6 \n"\ | 137 "movaps (%2,"#j",2), %%xmm6 \n"\ |
170 :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8) | 170 :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8) |
171 :"memory" | 171 :"memory" |
172 ); | 172 ); |
173 } | 173 } |
174 | 174 |
175 void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input) | 175 void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input) |
176 { | 176 { |
177 x86_reg j, k; | 177 x86_reg j, k; |
178 long n = 1 << s->nbits; | 178 long n = 1 << s->mdct_bits; |
179 long n4 = n >> 2; | 179 long n4 = n >> 2; |
180 | 180 |
181 ff_imdct_half_sse(s, output+n4, input); | 181 ff_imdct_half_sse(s, output+n4, input); |
182 | 182 |
183 j = -n; | 183 j = -n; |