comparison i386/dsputil_h264_template_mmx.c @ 8031:eebc7209c47f libavcodec

Convert asm keyword into __asm__. Neither the asm() nor the __asm__() keyword is part of the C99 standard, but while GCC accepts the former in C89 syntax, it is not accepted in C99 unless GNU extensions are turned on (with -fasm). The latter form is accepted in any syntax as an extension (without requiring further command-line options). Sun Studio C99 compiler also does not accept asm() while accepting __asm__(), albeit reporting warnings that it's not valid C99 syntax.
author flameeyes
date Thu, 16 Oct 2008 13:34:09 +0000
parents 33896780c612
children
comparison
equal deleted inserted replaced
8030:a512ac8fa540 8031:eebc7209c47f
45 /* 1 dimensional filter only */ 45 /* 1 dimensional filter only */
46 const int dxy = x ? 1 : stride; 46 const int dxy = x ? 1 : stride;
47 47
48 rnd_reg = rnd ? &ff_pw_4 : &ff_pw_3; 48 rnd_reg = rnd ? &ff_pw_4 : &ff_pw_3;
49 49
50 asm volatile( 50 __asm__ volatile(
51 "movd %0, %%mm5\n\t" 51 "movd %0, %%mm5\n\t"
52 "movq %1, %%mm4\n\t" 52 "movq %1, %%mm4\n\t"
53 "movq %2, %%mm6\n\t" /* mm6 = rnd */ 53 "movq %2, %%mm6\n\t" /* mm6 = rnd */
54 "punpcklwd %%mm5, %%mm5\n\t" 54 "punpcklwd %%mm5, %%mm5\n\t"
55 "punpckldq %%mm5, %%mm5\n\t" /* mm5 = B = x */ 55 "punpckldq %%mm5, %%mm5\n\t" /* mm5 = B = x */
56 "pxor %%mm7, %%mm7\n\t" 56 "pxor %%mm7, %%mm7\n\t"
57 "psubw %%mm5, %%mm4\n\t" /* mm4 = A = 8-x */ 57 "psubw %%mm5, %%mm4\n\t" /* mm4 = A = 8-x */
58 :: "rm"(x+y), "m"(ff_pw_8), "m"(*rnd_reg)); 58 :: "rm"(x+y), "m"(ff_pw_8), "m"(*rnd_reg));
59 59
60 for(i=0; i<h; i++) { 60 for(i=0; i<h; i++) {
61 asm volatile( 61 __asm__ volatile(
62 /* mm0 = src[0..7], mm1 = src[1..8] */ 62 /* mm0 = src[0..7], mm1 = src[1..8] */
63 "movq %0, %%mm0\n\t" 63 "movq %0, %%mm0\n\t"
64 "movq %1, %%mm2\n\t" 64 "movq %1, %%mm2\n\t"
65 :: "m"(src[0]), "m"(src[dxy])); 65 :: "m"(src[0]), "m"(src[dxy]));
66 66
67 asm volatile( 67 __asm__ volatile(
68 /* [mm0,mm1] = A * src[0..7] */ 68 /* [mm0,mm1] = A * src[0..7] */
69 /* [mm2,mm3] = B * src[1..8] */ 69 /* [mm2,mm3] = B * src[1..8] */
70 "movq %%mm0, %%mm1\n\t" 70 "movq %%mm0, %%mm1\n\t"
71 "movq %%mm2, %%mm3\n\t" 71 "movq %%mm2, %%mm3\n\t"
72 "punpcklbw %%mm7, %%mm0\n\t" 72 "punpcklbw %%mm7, %%mm0\n\t"
96 return; 96 return;
97 } 97 }
98 98
99 /* general case, bilinear */ 99 /* general case, bilinear */
100 rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28.a; 100 rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28.a;
101 asm volatile("movd %2, %%mm4\n\t" 101 __asm__ volatile("movd %2, %%mm4\n\t"
102 "movd %3, %%mm6\n\t" 102 "movd %3, %%mm6\n\t"
103 "punpcklwd %%mm4, %%mm4\n\t" 103 "punpcklwd %%mm4, %%mm4\n\t"
104 "punpcklwd %%mm6, %%mm6\n\t" 104 "punpcklwd %%mm6, %%mm6\n\t"
105 "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */ 105 "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */
106 "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */ 106 "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */
117 "psubw %%mm7, %%mm4\n\t" /* mm4 = A = xy - (8x+8y) + 64 */ 117 "psubw %%mm7, %%mm4\n\t" /* mm4 = A = xy - (8x+8y) + 64 */
118 "pxor %%mm7, %%mm7\n\t" 118 "pxor %%mm7, %%mm7\n\t"
119 "movq %%mm4, %0\n\t" 119 "movq %%mm4, %0\n\t"
120 : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64)); 120 : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
121 121
122 asm volatile( 122 __asm__ volatile(
123 /* mm0 = src[0..7], mm1 = src[1..8] */ 123 /* mm0 = src[0..7], mm1 = src[1..8] */
124 "movq %0, %%mm0\n\t" 124 "movq %0, %%mm0\n\t"
125 "movq %1, %%mm1\n\t" 125 "movq %1, %%mm1\n\t"
126 : : "m" (src[0]), "m" (src[1])); 126 : : "m" (src[0]), "m" (src[1]));
127 127
128 for(i=0; i<h; i++) { 128 for(i=0; i<h; i++) {
129 src += stride; 129 src += stride;
130 130
131 asm volatile( 131 __asm__ volatile(
132 /* mm2 = A * src[0..3] + B * src[1..4] */ 132 /* mm2 = A * src[0..3] + B * src[1..4] */
133 /* mm3 = A * src[4..7] + B * src[5..8] */ 133 /* mm3 = A * src[4..7] + B * src[5..8] */
134 "movq %%mm0, %%mm2\n\t" 134 "movq %%mm0, %%mm2\n\t"
135 "movq %%mm1, %%mm3\n\t" 135 "movq %%mm1, %%mm3\n\t"
136 "punpckhbw %%mm7, %%mm0\n\t" 136 "punpckhbw %%mm7, %%mm0\n\t"
143 "pmullw %%mm5, %%mm3\n\t" 143 "pmullw %%mm5, %%mm3\n\t"
144 "paddw %%mm1, %%mm2\n\t" 144 "paddw %%mm1, %%mm2\n\t"
145 "paddw %%mm0, %%mm3\n\t" 145 "paddw %%mm0, %%mm3\n\t"
146 : : "m" (AA)); 146 : : "m" (AA));
147 147
148 asm volatile( 148 __asm__ volatile(
149 /* [mm2,mm3] += C * src[0..7] */ 149 /* [mm2,mm3] += C * src[0..7] */
150 "movq %0, %%mm0\n\t" 150 "movq %0, %%mm0\n\t"
151 "movq %%mm0, %%mm1\n\t" 151 "movq %%mm0, %%mm1\n\t"
152 "punpcklbw %%mm7, %%mm0\n\t" 152 "punpcklbw %%mm7, %%mm0\n\t"
153 "punpckhbw %%mm7, %%mm1\n\t" 153 "punpckhbw %%mm7, %%mm1\n\t"
155 "pmullw %%mm6, %%mm1\n\t" 155 "pmullw %%mm6, %%mm1\n\t"
156 "paddw %%mm0, %%mm2\n\t" 156 "paddw %%mm0, %%mm2\n\t"
157 "paddw %%mm1, %%mm3\n\t" 157 "paddw %%mm1, %%mm3\n\t"
158 : : "m" (src[0])); 158 : : "m" (src[0]));
159 159
160 asm volatile( 160 __asm__ volatile(
161 /* [mm2,mm3] += D * src[1..8] */ 161 /* [mm2,mm3] += D * src[1..8] */
162 "movq %1, %%mm1\n\t" 162 "movq %1, %%mm1\n\t"
163 "movq %%mm1, %%mm0\n\t" 163 "movq %%mm1, %%mm0\n\t"
164 "movq %%mm1, %%mm4\n\t" 164 "movq %%mm1, %%mm4\n\t"
165 "punpcklbw %%mm7, %%mm0\n\t" 165 "punpcklbw %%mm7, %%mm0\n\t"
169 "paddw %%mm0, %%mm2\n\t" 169 "paddw %%mm0, %%mm2\n\t"
170 "paddw %%mm4, %%mm3\n\t" 170 "paddw %%mm4, %%mm3\n\t"
171 "movq %0, %%mm0\n\t" 171 "movq %0, %%mm0\n\t"
172 : : "m" (src[0]), "m" (src[1]), "m" (DD)); 172 : : "m" (src[0]), "m" (src[1]), "m" (DD));
173 173
174 asm volatile( 174 __asm__ volatile(
175 /* dst[0..7] = ([mm2,mm3] + 32) >> 6 */ 175 /* dst[0..7] = ([mm2,mm3] + 32) >> 6 */
176 "paddw %1, %%mm2\n\t" 176 "paddw %1, %%mm2\n\t"
177 "paddw %1, %%mm3\n\t" 177 "paddw %1, %%mm3\n\t"
178 "psrlw $6, %%mm2\n\t" 178 "psrlw $6, %%mm2\n\t"
179 "psrlw $6, %%mm3\n\t" 179 "psrlw $6, %%mm3\n\t"
185 } 185 }
186 } 186 }
187 187
188 static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) 188 static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
189 { 189 {
190 asm volatile( 190 __asm__ volatile(
191 "pxor %%mm7, %%mm7 \n\t" 191 "pxor %%mm7, %%mm7 \n\t"
192 "movd %5, %%mm2 \n\t" 192 "movd %5, %%mm2 \n\t"
193 "movd %6, %%mm3 \n\t" 193 "movd %6, %%mm3 \n\t"
194 "movq "MANGLE(ff_pw_8)", %%mm4\n\t" 194 "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
195 "movq "MANGLE(ff_pw_8)", %%mm5\n\t" 195 "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
257 static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) 257 static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
258 { 258 {
259 int tmp = ((1<<16)-1)*x + 8; 259 int tmp = ((1<<16)-1)*x + 8;
260 int CD= tmp*y; 260 int CD= tmp*y;
261 int AB= (tmp<<3) - CD; 261 int AB= (tmp<<3) - CD;
262 asm volatile( 262 __asm__ volatile(
263 /* mm5 = {A,B,A,B} */ 263 /* mm5 = {A,B,A,B} */
264 /* mm6 = {C,D,C,D} */ 264 /* mm6 = {C,D,C,D} */
265 "movd %0, %%mm5\n\t" 265 "movd %0, %%mm5\n\t"
266 "movd %1, %%mm6\n\t" 266 "movd %1, %%mm6\n\t"
267 "punpckldq %%mm5, %%mm5\n\t" 267 "punpckldq %%mm5, %%mm5\n\t"
272 "punpcklbw %%mm7, %%mm2\n\t" 272 "punpcklbw %%mm7, %%mm2\n\t"
273 "pshufw $0x94, %%mm2, %%mm2\n\t" 273 "pshufw $0x94, %%mm2, %%mm2\n\t"
274 :: "r"(AB), "r"(CD), "m"(src[0])); 274 :: "r"(AB), "r"(CD), "m"(src[0]));
275 275
276 276
277 asm volatile( 277 __asm__ volatile(
278 "1:\n\t" 278 "1:\n\t"
279 "add %4, %1\n\t" 279 "add %4, %1\n\t"
280 /* mm1 = A * src[0,1] + B * src[1,2] */ 280 /* mm1 = A * src[0,1] + B * src[1,2] */
281 "movq %%mm2, %%mm1\n\t" 281 "movq %%mm2, %%mm1\n\t"
282 "pmaddwd %%mm5, %%mm1\n\t" 282 "pmaddwd %%mm5, %%mm1\n\t"