comparison x86/fft_sse.c @ 10199:38ab367d4231 libavcodec

Merge FFTContext and MDCTContext
author mru
date Sun, 20 Sep 2009 17:30:20 +0000
parents 5cf49858179a
children 34a65026fa06
comparison
equal deleted inserted replaced
10198:78af613fc316 10199:38ab367d4231
69 ); 69 );
70 } 70 }
71 memcpy(z, s->tmp_buf, n*sizeof(FFTComplex)); 71 memcpy(z, s->tmp_buf, n*sizeof(FFTComplex));
72 } 72 }
73 73
74 void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input) 74 void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
75 { 75 {
76 av_unused x86_reg i, j, k, l; 76 av_unused x86_reg i, j, k, l;
77 long n = 1 << s->nbits; 77 long n = 1 << s->mdct_bits;
78 long n2 = n >> 1; 78 long n2 = n >> 1;
79 long n4 = n >> 2; 79 long n4 = n >> 2;
80 long n8 = n >> 3; 80 long n8 = n >> 3;
81 const uint16_t *revtab = s->fft.revtab + n8; 81 const uint16_t *revtab = s->revtab + n8;
82 const FFTSample *tcos = s->tcos; 82 const FFTSample *tcos = s->tcos;
83 const FFTSample *tsin = s->tsin; 83 const FFTSample *tsin = s->tsin;
84 FFTComplex *z = (FFTComplex *)output; 84 FFTComplex *z = (FFTComplex *)output;
85 85
86 /* pre rotation */ 86 /* pre rotation */
127 __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]])); 127 __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]]));
128 __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]])); 128 __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]]));
129 #endif 129 #endif
130 } 130 }
131 131
132 ff_fft_dispatch_sse(z, s->fft.nbits); 132 ff_fft_dispatch_sse(z, s->nbits);
133 133
134 /* post rotation + reinterleave + reorder */ 134 /* post rotation + reinterleave + reorder */
135 135
136 #define CMUL(j,xmm0,xmm1)\ 136 #define CMUL(j,xmm0,xmm1)\
137 "movaps (%2,"#j",2), %%xmm6 \n"\ 137 "movaps (%2,"#j",2), %%xmm6 \n"\
170 :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8) 170 :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8)
171 :"memory" 171 :"memory"
172 ); 172 );
173 } 173 }
174 174
175 void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input) 175 void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
176 { 176 {
177 x86_reg j, k; 177 x86_reg j, k;
178 long n = 1 << s->nbits; 178 long n = 1 << s->mdct_bits;
179 long n4 = n >> 2; 179 long n4 = n >> 2;
180 180
181 ff_imdct_half_sse(s, output+n4, input); 181 ff_imdct_half_sse(s, output+n4, input);
182 182
183 j = -n; 183 j = -n;