comparison i386/fft_sse.c @ 8031:eebc7209c47f libavcodec

Convert asm keyword into __asm__. Neither the asm() nor the __asm__() keyword is part of the C99 standard, but while GCC accepts the former in C89 syntax, it is not accepted in C99 unless GNU extensions are turned on (with -fasm). The latter form is accepted in any syntax as an extension (without requiring further command-line options). Sun Studio C99 compiler also does not accept asm() while accepting __asm__(), albeit reporting warnings that it's not valid C99 syntax.
author flameeyes
date Thu, 16 Oct 2008 13:34:09 +0000
parents 97383e012cb9
children
comparison
equal deleted inserted replaced
8030:a512ac8fa540 8031:eebc7209c47f
34 34
35 ff_fft_dispatch_interleave_sse(z, s->nbits); 35 ff_fft_dispatch_interleave_sse(z, s->nbits);
36 36
37 if(n <= 16) { 37 if(n <= 16) {
38 x86_reg i = -8*n; 38 x86_reg i = -8*n;
39 asm volatile( 39 __asm__ volatile(
40 "1: \n" 40 "1: \n"
41 "movaps (%0,%1), %%xmm0 \n" 41 "movaps (%0,%1), %%xmm0 \n"
42 "movaps %%xmm0, %%xmm1 \n" 42 "movaps %%xmm0, %%xmm1 \n"
43 "unpcklps 16(%0,%1), %%xmm0 \n" 43 "unpcklps 16(%0,%1), %%xmm0 \n"
44 "unpckhps 16(%0,%1), %%xmm1 \n" 44 "unpckhps 16(%0,%1), %%xmm1 \n"
56 void ff_fft_permute_sse(FFTContext *s, FFTComplex *z) 56 void ff_fft_permute_sse(FFTContext *s, FFTComplex *z)
57 { 57 {
58 int n = 1 << s->nbits; 58 int n = 1 << s->nbits;
59 int i; 59 int i;
60 for(i=0; i<n; i+=2) { 60 for(i=0; i<n; i+=2) {
61 asm volatile( 61 __asm__ volatile(
62 "movaps %2, %%xmm0 \n" 62 "movaps %2, %%xmm0 \n"
63 "movlps %%xmm0, %0 \n" 63 "movlps %%xmm0, %0 \n"
64 "movhps %%xmm0, %1 \n" 64 "movhps %%xmm0, %1 \n"
65 :"=m"(s->tmp_buf[s->revtab[i]]), 65 :"=m"(s->tmp_buf[s->revtab[i]]),
66 "=m"(s->tmp_buf[s->revtab[i+1]]) 66 "=m"(s->tmp_buf[s->revtab[i+1]])
82 const FFTSample *tsin = s->tsin; 82 const FFTSample *tsin = s->tsin;
83 FFTComplex *z = (FFTComplex *)output; 83 FFTComplex *z = (FFTComplex *)output;
84 84
85 /* pre rotation */ 85 /* pre rotation */
86 for(k=n8-2; k>=0; k-=2) { 86 for(k=n8-2; k>=0; k-=2) {
87 asm volatile( 87 __asm__ volatile(
88 "movaps (%2,%1,2), %%xmm0 \n" // { z[k].re, z[k].im, z[k+1].re, z[k+1].im } 88 "movaps (%2,%1,2), %%xmm0 \n" // { z[k].re, z[k].im, z[k+1].re, z[k+1].im }
89 "movaps -16(%2,%0,2), %%xmm1 \n" // { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } 89 "movaps -16(%2,%0,2), %%xmm1 \n" // { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im }
90 "movaps %%xmm0, %%xmm2 \n" 90 "movaps %%xmm0, %%xmm2 \n"
91 "shufps $0x88, %%xmm1, %%xmm0 \n" // { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re } 91 "shufps $0x88, %%xmm1, %%xmm0 \n" // { z[k].re, z[k+1].re, z[-k-2].re, z[-k-1].re }
92 "shufps $0x77, %%xmm2, %%xmm1 \n" // { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im } 92 "shufps $0x77, %%xmm2, %%xmm1 \n" // { z[-k-1].im, z[-k-2].im, z[k+1].im, z[k].im }
109 "r"(input+n4), "r"(tcos+n8), "r"(tsin+n8) 109 "r"(input+n4), "r"(tcos+n8), "r"(tsin+n8)
110 ); 110 );
111 #ifdef ARCH_X86_64 111 #ifdef ARCH_X86_64
112 // if we have enough regs, don't let gcc make the luts latency-bound 112 // if we have enough regs, don't let gcc make the luts latency-bound
113 // but if not, latency is faster than spilling 113 // but if not, latency is faster than spilling
114 asm("movlps %%xmm0, %0 \n" 114 __asm__("movlps %%xmm0, %0 \n"
115 "movhps %%xmm0, %1 \n" 115 "movhps %%xmm0, %1 \n"
116 "movlps %%xmm1, %2 \n" 116 "movlps %%xmm1, %2 \n"
117 "movhps %%xmm1, %3 \n" 117 "movhps %%xmm1, %3 \n"
118 :"=m"(z[revtab[-k-2]]), 118 :"=m"(z[revtab[-k-2]]),
119 "=m"(z[revtab[-k-1]]), 119 "=m"(z[revtab[-k-1]]),
120 "=m"(z[revtab[ k ]]), 120 "=m"(z[revtab[ k ]]),
121 "=m"(z[revtab[ k+1]]) 121 "=m"(z[revtab[ k+1]])
122 ); 122 );
123 #else 123 #else
124 asm("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]])); 124 __asm__("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]]));
125 asm("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]])); 125 __asm__("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]]));
126 asm("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]])); 126 __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]]));
127 asm("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]])); 127 __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]]));
128 #endif 128 #endif
129 } 129 }
130 130
131 ff_fft_dispatch_sse(z, s->fft.nbits); 131 ff_fft_dispatch_sse(z, s->fft.nbits);
132 132
144 "subps %%xmm6, "#xmm0"\n"\ 144 "subps %%xmm6, "#xmm0"\n"\
145 "addps %%xmm7, "#xmm1"\n" 145 "addps %%xmm7, "#xmm1"\n"
146 146
147 j = -n2; 147 j = -n2;
148 k = n2-16; 148 k = n2-16;
149 asm volatile( 149 __asm__ volatile(
150 "1: \n" 150 "1: \n"
151 CMUL(%0, %%xmm0, %%xmm1) 151 CMUL(%0, %%xmm0, %%xmm1)
152 CMUL(%1, %%xmm4, %%xmm5) 152 CMUL(%1, %%xmm4, %%xmm5)
153 "shufps $0x1b, %%xmm1, %%xmm1 \n" 153 "shufps $0x1b, %%xmm1, %%xmm1 \n"
154 "shufps $0x1b, %%xmm5, %%xmm5 \n" 154 "shufps $0x1b, %%xmm5, %%xmm5 \n"
179 179
180 ff_imdct_half_sse(s, output+n4, input); 180 ff_imdct_half_sse(s, output+n4, input);
181 181
182 j = -n; 182 j = -n;
183 k = n-16; 183 k = n-16;
184 asm volatile( 184 __asm__ volatile(
185 "movaps %4, %%xmm7 \n" 185 "movaps %4, %%xmm7 \n"
186 "1: \n" 186 "1: \n"
187 "movaps (%2,%1), %%xmm0 \n" 187 "movaps (%2,%1), %%xmm0 \n"
188 "movaps (%3,%0), %%xmm1 \n" 188 "movaps (%3,%0), %%xmm1 \n"
189 "shufps $0x1b, %%xmm0, %%xmm0 \n" 189 "shufps $0x1b, %%xmm0, %%xmm0 \n"