Mercurial > libavcodec.hg
comparison x86/cavsdsp_mmx.c @ 10343:b1218e0b0f2b libavcodec
Use MANGLE in cavsdsp, the current version using "m" constraints will not
compile on e.g. OpenBSD due to running out of registers.
author | reimar |
---|---|
date | Thu, 01 Oct 2009 15:30:27 +0000 |
parents | 53f9f3994ec8 |
children | 34a65026fa06 |
comparison
equal
deleted
inserted
replaced
10342:fc22144efcaf | 10343:b1218e0b0f2b |
---|---|
180 * motion compensation | 180 * motion compensation |
181 * | 181 * |
182 ****************************************************************************/ | 182 ****************************************************************************/ |
183 | 183 |
184 /* vertical filter [-1 -2 96 42 -7 0] */ | 184 /* vertical filter [-1 -2 96 42 -7 0] */ |
185 #define QPEL_CAVSV1(A,B,C,D,E,F,OP) \ | 185 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,MUL2) \ |
186 "movd (%0), "#F" \n\t"\ | 186 "movd (%0), "#F" \n\t"\ |
187 "movq "#C", %%mm6 \n\t"\ | 187 "movq "#C", %%mm6 \n\t"\ |
188 "pmullw %5, %%mm6 \n\t"\ | 188 "pmullw %5, %%mm6 \n\t"\ |
189 "movq "#D", %%mm7 \n\t"\ | 189 "movq "#D", %%mm7 \n\t"\ |
190 "pmullw %6, %%mm7 \n\t"\ | 190 "pmullw "MANGLE(MUL2)", %%mm7\n\t"\ |
191 "psllw $3, "#E" \n\t"\ | 191 "psllw $3, "#E" \n\t"\ |
192 "psubw "#E", %%mm6 \n\t"\ | 192 "psubw "#E", %%mm6 \n\t"\ |
193 "psraw $3, "#E" \n\t"\ | 193 "psraw $3, "#E" \n\t"\ |
194 "paddw %%mm7, %%mm6 \n\t"\ | 194 "paddw %%mm7, %%mm6 \n\t"\ |
195 "paddw "#E", %%mm6 \n\t"\ | 195 "paddw "#E", %%mm6 \n\t"\ |
205 "packuswb %%mm6, %%mm6 \n\t"\ | 205 "packuswb %%mm6, %%mm6 \n\t"\ |
206 OP(%%mm6, (%1), A, d) \ | 206 OP(%%mm6, (%1), A, d) \ |
207 "add %3, %1 \n\t" | 207 "add %3, %1 \n\t" |
208 | 208 |
209 /* vertical filter [ 0 -1 5 5 -1 0] */ | 209 /* vertical filter [ 0 -1 5 5 -1 0] */ |
210 #define QPEL_CAVSV2(A,B,C,D,E,F,OP) \ | 210 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,MUL2) \ |
211 "movd (%0), "#F" \n\t"\ | 211 "movd (%0), "#F" \n\t"\ |
212 "movq "#C", %%mm6 \n\t"\ | 212 "movq "#C", %%mm6 \n\t"\ |
213 "paddw "#D", %%mm6 \n\t"\ | 213 "paddw "#D", %%mm6 \n\t"\ |
214 "pmullw %5, %%mm6 \n\t"\ | 214 "pmullw %5, %%mm6 \n\t"\ |
215 "add %2, %0 \n\t"\ | 215 "add %2, %0 \n\t"\ |
221 "packuswb %%mm6, %%mm6 \n\t"\ | 221 "packuswb %%mm6, %%mm6 \n\t"\ |
222 OP(%%mm6, (%1), A, d) \ | 222 OP(%%mm6, (%1), A, d) \ |
223 "add %3, %1 \n\t" | 223 "add %3, %1 \n\t" |
224 | 224 |
225 /* vertical filter [ 0 -7 42 96 -2 -1] */ | 225 /* vertical filter [ 0 -7 42 96 -2 -1] */ |
226 #define QPEL_CAVSV3(A,B,C,D,E,F,OP) \ | 226 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,MUL2) \ |
227 "movd (%0), "#F" \n\t"\ | 227 "movd (%0), "#F" \n\t"\ |
228 "movq "#C", %%mm6 \n\t"\ | 228 "movq "#C", %%mm6 \n\t"\ |
229 "pmullw %6, %%mm6 \n\t"\ | 229 "pmullw "MANGLE(MUL2)", %%mm6\n\t"\ |
230 "movq "#D", %%mm7 \n\t"\ | 230 "movq "#D", %%mm7 \n\t"\ |
231 "pmullw %5, %%mm7 \n\t"\ | 231 "pmullw %5, %%mm7 \n\t"\ |
232 "psllw $3, "#B" \n\t"\ | 232 "psllw $3, "#B" \n\t"\ |
233 "psubw "#B", %%mm6 \n\t"\ | 233 "psubw "#B", %%mm6 \n\t"\ |
234 "psraw $3, "#B" \n\t"\ | 234 "psraw $3, "#B" \n\t"\ |
268 "punpcklbw %%mm7, %%mm0 \n\t"\ | 268 "punpcklbw %%mm7, %%mm0 \n\t"\ |
269 "punpcklbw %%mm7, %%mm1 \n\t"\ | 269 "punpcklbw %%mm7, %%mm1 \n\t"\ |
270 "punpcklbw %%mm7, %%mm2 \n\t"\ | 270 "punpcklbw %%mm7, %%mm2 \n\t"\ |
271 "punpcklbw %%mm7, %%mm3 \n\t"\ | 271 "punpcklbw %%mm7, %%mm3 \n\t"\ |
272 "punpcklbw %%mm7, %%mm4 \n\t"\ | 272 "punpcklbw %%mm7, %%mm4 \n\t"\ |
273 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ | 273 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ |
274 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ | 274 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ |
275 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | 275 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ |
276 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ | 276 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ |
277 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ | 277 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\ |
278 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ | 278 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\ |
279 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ | 279 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ |
280 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ | 280 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ |
281 \ | 281 \ |
282 : "+a"(src), "+c"(dst)\ | 282 : "+a"(src), "+c"(dst)\ |
283 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\ | 283 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ |
284 : "memory"\ | 284 : "memory"\ |
285 );\ | 285 );\ |
286 if(h==16){\ | 286 if(h==16){\ |
287 __asm__ volatile(\ | 287 __asm__ volatile(\ |
288 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | 288 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ |
289 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ | 289 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ |
290 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ | 290 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\ |
291 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ | 291 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\ |
292 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ | 292 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ |
293 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ | 293 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ |
294 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ | 294 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ |
295 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ | 295 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ |
296 \ | 296 \ |
297 : "+a"(src), "+c"(dst)\ | 297 : "+a"(src), "+c"(dst)\ |
298 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\ | 298 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ |
299 : "memory"\ | 299 : "memory"\ |
300 );\ | 300 );\ |
301 }\ | 301 }\ |
302 src += 4-(h+5)*srcStride;\ | 302 src += 4-(h+5)*srcStride;\ |
303 dst += 4-h*dstStride;\ | 303 dst += 4-h*dstStride;\ |