comparison i386/dsputil_mmx.c @ 2210:e1c60876a0ae libavcodec

optimization
author michael
date Tue, 07 Sep 2004 17:22:37 +0000
parents c4a476971abc
children ee8e91ec869a
comparison
equal deleted inserted replaced
2209:c4a476971abc 2210:e1c60876a0ae
2487 }\ 2487 }\
2488 \ 2488 \
2489 static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 2489 static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2490 uint64_t temp[(8+5)*2];\ 2490 uint64_t temp[(8+5)*2];\
2491 uint64_t *temp_ptr= temp;\ 2491 uint64_t *temp_ptr= temp;\
2492 int h= 8+5;\ 2492 int h= 4;\
2493 \ 2493 \
2494 src -= 2*srcStride;\ 2494 src -= 2*srcStride;\
2495 /*FIXME unroll */\ 2495 \
2496 asm volatile(\ 2496 asm volatile(\
2497 "pxor %%mm7, %%mm7 \n\t"\ 2497 "pxor %%mm7, %%mm7 \n\t"\
2498 "1: \n\t"\ 2498 "1: \n\t"\
2499 "movq (%0), %%mm0 \n\t"\ 2499 "movq (%0), %%mm0 \n\t"\
2500 "movq (%0), %%mm1 \n\t"\ 2500 "movq %%mm0, %%mm1 \n\t"\
2501 "addl %3, %0 \n\t"\
2501 "punpcklbw %%mm7, %%mm0 \n\t"\ 2502 "punpcklbw %%mm7, %%mm0 \n\t"\
2503 "movq %%mm0, (%1) \n\t"\
2502 "punpckhbw %%mm7, %%mm1 \n\t"\ 2504 "punpckhbw %%mm7, %%mm1 \n\t"\
2503 "movq %%mm0, (%1) \n\t"\ 2505 "movq (%0), %%mm0 \n\t"\
2504 "movq %%mm1, 8(%1) \n\t"\ 2506 "movq %%mm1, 8(%1) \n\t"\
2505 "addl $16, %1 \n\t"\ 2507 "movq %%mm0, %%mm1 \n\t"\
2506 "addl %3, %0 \n\t"\ 2508 "addl %3, %0 \n\t"\
2509 "punpcklbw %%mm7, %%mm0 \n\t"\
2510 "movq %%mm0, 16(%1) \n\t"\
2511 "punpckhbw %%mm7, %%mm1 \n\t"\
2512 "movq (%0), %%mm0 \n\t"\
2513 "movq %%mm1, 24(%1) \n\t"\
2514 "movq %%mm0, %%mm1 \n\t"\
2515 "addl %3, %0 \n\t"\
2516 "punpcklbw %%mm7, %%mm0 \n\t"\
2517 "movq %%mm0, 32(%1) \n\t"\
2518 "punpckhbw %%mm7, %%mm1 \n\t"\
2519 "movq %%mm1, 40(%1) \n\t"\
2520 "addl $48, %1 \n\t"\
2507 "decl %2 \n\t"\ 2521 "decl %2 \n\t"\
2508 " jnz 1b \n\t"\ 2522 " jnz 1b \n\t"\
2523 "movq (%0), %%mm0 \n\t"\
2524 "movq %%mm0, %%mm1 \n\t"\
2525 "punpcklbw %%mm7, %%mm0 \n\t"\
2526 "movq %%mm0, (%1) \n\t"\
2527 "punpckhbw %%mm7, %%mm1 \n\t"\
2528 "movq %%mm1, 8(%1) \n\t"\
2509 : "+a" (src), "+c" (temp_ptr), "+d"(h)\ 2529 : "+a" (src), "+c" (temp_ptr), "+d"(h)\
2510 : "S" (srcStride)\ 2530 : "S" (srcStride)\
2511 : "memory"\ 2531 : "memory"\
2512 );\ 2532 );\
2513 \ 2533 \
2518 "movq %4, %%mm6 \n\t"\ 2538 "movq %4, %%mm6 \n\t"\
2519 "movq %5, %%mm7 \n\t"\ 2539 "movq %5, %%mm7 \n\t"\
2520 "1: \n\t"\ 2540 "1: \n\t"\
2521 "movq 2*16+0(%0), %%mm0 \n\t"\ 2541 "movq 2*16+0(%0), %%mm0 \n\t"\
2522 "movq 2*16+8(%0), %%mm1 \n\t"\ 2542 "movq 2*16+8(%0), %%mm1 \n\t"\
2523 "movq 3*16+0(%0), %%mm2 \n\t"\ 2543 "paddw 3*16+0(%0), %%mm0 \n\t"\
2524 "movq 3*16+8(%0), %%mm3 \n\t"\ 2544 "paddw 3*16+8(%0), %%mm1 \n\t"\
2525 "paddw %%mm2, %%mm0 \n\t"\
2526 "paddw %%mm3, %%mm1 \n\t"\
2527 "psllw $2, %%mm0 \n\t"\ 2545 "psllw $2, %%mm0 \n\t"\
2528 "psllw $2, %%mm1 \n\t"\ 2546 "psllw $2, %%mm1 \n\t"\
2529 "movq 1*16+0(%0), %%mm2 \n\t"\ 2547 "movq 1*16+0(%0), %%mm2 \n\t"\
2530 "movq 1*16+8(%0), %%mm3 \n\t"\ 2548 "movq 1*16+8(%0), %%mm3 \n\t"\
2531 "movq 4*16+0(%0), %%mm4 \n\t"\ 2549 "paddw 4*16+0(%0), %%mm2 \n\t"\
2532 "movq 4*16+8(%0), %%mm5 \n\t"\ 2550 "paddw 4*16+8(%0), %%mm3 \n\t"\
2533 "paddw %%mm4, %%mm2 \n\t"\
2534 "paddw %%mm5, %%mm3 \n\t"\
2535 "psubw %%mm2, %%mm0 \n\t"\ 2551 "psubw %%mm2, %%mm0 \n\t"\
2536 "psubw %%mm3, %%mm1 \n\t"\ 2552 "psubw %%mm3, %%mm1 \n\t"\
2537 "pmullw %%mm6, %%mm0 \n\t"\ 2553 "pmullw %%mm6, %%mm0 \n\t"\
2538 "pmullw %%mm6, %%mm1 \n\t"\ 2554 "pmullw %%mm6, %%mm1 \n\t"\
2539 "movq 0*16+0(%0), %%mm2 \n\t"\ 2555 "movq 0*16+0(%0), %%mm2 \n\t"\
2540 "movq 0*16+8(%0), %%mm3 \n\t"\ 2556 "movq 0*16+8(%0), %%mm3 \n\t"\
2541 "movq 5*16+0(%0), %%mm4 \n\t"\ 2557 "paddw 5*16+0(%0), %%mm2 \n\t"\
2542 "movq 5*16+8(%0), %%mm5 \n\t"\ 2558 "paddw 5*16+8(%0), %%mm3 \n\t"\
2543 "paddw %%mm4, %%mm2 \n\t"\
2544 "paddw %%mm5, %%mm3 \n\t"\
2545 "paddw %%mm2, %%mm0 \n\t"\ 2559 "paddw %%mm2, %%mm0 \n\t"\
2546 "paddw %%mm3, %%mm1 \n\t"\ 2560 "paddw %%mm3, %%mm1 \n\t"\
2547 "paddw %%mm7, %%mm0 \n\t"\ 2561 "paddw %%mm7, %%mm0 \n\t"\
2548 "paddw %%mm7, %%mm1 \n\t"\ 2562 "paddw %%mm7, %%mm1 \n\t"\
2549 "psraw $5, %%mm0 \n\t"\ 2563 "psraw $5, %%mm0 \n\t"\