comparison x86/cavsdsp_mmx.c @ 10343:b1218e0b0f2b libavcodec

Use MANGLE in cavsdsp, the current version using "m" constraints will not compile on e.g. OpenBSD due to running out of registers.
author reimar
date Thu, 01 Oct 2009 15:30:27 +0000
parents 53f9f3994ec8
children 34a65026fa06
comparison
equal deleted inserted replaced
10342:fc22144efcaf 10343:b1218e0b0f2b
180 * motion compensation 180 * motion compensation
181 * 181 *
182 ****************************************************************************/ 182 ****************************************************************************/
183 183
184 /* vertical filter [-1 -2 96 42 -7 0] */ 184 /* vertical filter [-1 -2 96 42 -7 0] */
185 #define QPEL_CAVSV1(A,B,C,D,E,F,OP) \ 185 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,MUL2) \
186 "movd (%0), "#F" \n\t"\ 186 "movd (%0), "#F" \n\t"\
187 "movq "#C", %%mm6 \n\t"\ 187 "movq "#C", %%mm6 \n\t"\
188 "pmullw %5, %%mm6 \n\t"\ 188 "pmullw %5, %%mm6 \n\t"\
189 "movq "#D", %%mm7 \n\t"\ 189 "movq "#D", %%mm7 \n\t"\
190 "pmullw %6, %%mm7 \n\t"\ 190 "pmullw "MANGLE(MUL2)", %%mm7\n\t"\
191 "psllw $3, "#E" \n\t"\ 191 "psllw $3, "#E" \n\t"\
192 "psubw "#E", %%mm6 \n\t"\ 192 "psubw "#E", %%mm6 \n\t"\
193 "psraw $3, "#E" \n\t"\ 193 "psraw $3, "#E" \n\t"\
194 "paddw %%mm7, %%mm6 \n\t"\ 194 "paddw %%mm7, %%mm6 \n\t"\
195 "paddw "#E", %%mm6 \n\t"\ 195 "paddw "#E", %%mm6 \n\t"\
205 "packuswb %%mm6, %%mm6 \n\t"\ 205 "packuswb %%mm6, %%mm6 \n\t"\
206 OP(%%mm6, (%1), A, d) \ 206 OP(%%mm6, (%1), A, d) \
207 "add %3, %1 \n\t" 207 "add %3, %1 \n\t"
208 208
209 /* vertical filter [ 0 -1 5 5 -1 0] */ 209 /* vertical filter [ 0 -1 5 5 -1 0] */
210 #define QPEL_CAVSV2(A,B,C,D,E,F,OP) \ 210 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,MUL2) \
211 "movd (%0), "#F" \n\t"\ 211 "movd (%0), "#F" \n\t"\
212 "movq "#C", %%mm6 \n\t"\ 212 "movq "#C", %%mm6 \n\t"\
213 "paddw "#D", %%mm6 \n\t"\ 213 "paddw "#D", %%mm6 \n\t"\
214 "pmullw %5, %%mm6 \n\t"\ 214 "pmullw %5, %%mm6 \n\t"\
215 "add %2, %0 \n\t"\ 215 "add %2, %0 \n\t"\
221 "packuswb %%mm6, %%mm6 \n\t"\ 221 "packuswb %%mm6, %%mm6 \n\t"\
222 OP(%%mm6, (%1), A, d) \ 222 OP(%%mm6, (%1), A, d) \
223 "add %3, %1 \n\t" 223 "add %3, %1 \n\t"
224 224
225 /* vertical filter [ 0 -7 42 96 -2 -1] */ 225 /* vertical filter [ 0 -7 42 96 -2 -1] */
226 #define QPEL_CAVSV3(A,B,C,D,E,F,OP) \ 226 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,MUL2) \
227 "movd (%0), "#F" \n\t"\ 227 "movd (%0), "#F" \n\t"\
228 "movq "#C", %%mm6 \n\t"\ 228 "movq "#C", %%mm6 \n\t"\
229 "pmullw %6, %%mm6 \n\t"\ 229 "pmullw "MANGLE(MUL2)", %%mm6\n\t"\
230 "movq "#D", %%mm7 \n\t"\ 230 "movq "#D", %%mm7 \n\t"\
231 "pmullw %5, %%mm7 \n\t"\ 231 "pmullw %5, %%mm7 \n\t"\
232 "psllw $3, "#B" \n\t"\ 232 "psllw $3, "#B" \n\t"\
233 "psubw "#B", %%mm6 \n\t"\ 233 "psubw "#B", %%mm6 \n\t"\
234 "psraw $3, "#B" \n\t"\ 234 "psraw $3, "#B" \n\t"\
268 "punpcklbw %%mm7, %%mm0 \n\t"\ 268 "punpcklbw %%mm7, %%mm0 \n\t"\
269 "punpcklbw %%mm7, %%mm1 \n\t"\ 269 "punpcklbw %%mm7, %%mm1 \n\t"\
270 "punpcklbw %%mm7, %%mm2 \n\t"\ 270 "punpcklbw %%mm7, %%mm2 \n\t"\
271 "punpcklbw %%mm7, %%mm3 \n\t"\ 271 "punpcklbw %%mm7, %%mm3 \n\t"\
272 "punpcklbw %%mm7, %%mm4 \n\t"\ 272 "punpcklbw %%mm7, %%mm4 \n\t"\
273 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ 273 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\
274 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ 274 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\
275 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ 275 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\
276 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ 276 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\
277 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ 277 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\
278 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ 278 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\
279 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ 279 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\
280 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ 280 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\
281 \ 281 \
282 : "+a"(src), "+c"(dst)\ 282 : "+a"(src), "+c"(dst)\
283 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\ 283 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\
284 : "memory"\ 284 : "memory"\
285 );\ 285 );\
286 if(h==16){\ 286 if(h==16){\
287 __asm__ volatile(\ 287 __asm__ volatile(\
288 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ 288 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\
289 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ 289 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\
290 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ 290 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\
291 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ 291 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\
292 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ 292 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\
293 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ 293 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\
294 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ 294 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\
295 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ 295 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\
296 \ 296 \
297 : "+a"(src), "+c"(dst)\ 297 : "+a"(src), "+c"(dst)\
298 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\ 298 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\
299 : "memory"\ 299 : "memory"\
300 );\ 300 );\
301 }\ 301 }\
302 src += 4-(h+5)*srcStride;\ 302 src += 4-(h+5)*srcStride;\
303 dst += 4-h*dstStride;\ 303 dst += 4-h*dstStride;\