comparison x86/dsputil_mmx.c @ 9861:89270a3bc4a0 libavcodec

SSE version of clear_blocks
author darkshikari
date Tue, 16 Jun 2009 17:33:57 +0000
parents 41245484dc0b
children 6972b493f41f
comparison
equal deleted inserted replaced
9860:7e82083caab7 9861:89270a3bc4a0
523 "movaps %%xmm0, 80(%0) \n" 523 "movaps %%xmm0, 80(%0) \n"
524 "movaps %%xmm0, 96(%0) \n" 524 "movaps %%xmm0, 96(%0) \n"
525 "movaps %%xmm0, 112(%0) \n" 525 "movaps %%xmm0, 112(%0) \n"
526 :: "r"(block) 526 :: "r"(block)
527 : "memory" 527 : "memory"
528 );
529 }
530
531 static void clear_blocks_sse(DCTELEM *blocks)
532 {\
533 __asm__ volatile(
534 "xorps %%xmm0, %%xmm0 \n"
535 "mov %1, %%"REG_a" \n"
536 "1: \n"
537 "movaps %%xmm0, (%0, %%"REG_a") \n"
538 "movaps %%xmm0, 16(%0, %%"REG_a") \n"
539 "movaps %%xmm0, 32(%0, %%"REG_a") \n"
540 "movaps %%xmm0, 48(%0, %%"REG_a") \n"
541 "movaps %%xmm0, 64(%0, %%"REG_a") \n"
542 "movaps %%xmm0, 80(%0, %%"REG_a") \n"
543 "movaps %%xmm0, 96(%0, %%"REG_a") \n"
544 "movaps %%xmm0, 112(%0, %%"REG_a") \n"
545 "add $128, %%"REG_a" \n"
546 " js 1b \n"
547 : : "r" (((uint8_t *)blocks)+128*6),
548 "i" (-128*6)
549 : "%"REG_a
528 ); 550 );
529 } 551 }
530 552
531 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ 553 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
532 x86_reg i=0; 554 x86_reg i=0;
2669 c->put_pixels_clamped = put_pixels_clamped_mmx; 2691 c->put_pixels_clamped = put_pixels_clamped_mmx;
2670 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; 2692 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
2671 c->add_pixels_clamped = add_pixels_clamped_mmx; 2693 c->add_pixels_clamped = add_pixels_clamped_mmx;
2672 c->clear_block = clear_block_mmx; 2694 c->clear_block = clear_block_mmx;
2673 c->clear_blocks = clear_blocks_mmx; 2695 c->clear_blocks = clear_blocks_mmx;
2674 if (mm_flags & FF_MM_SSE) 2696 if (mm_flags & FF_MM_SSE){
2675 c->clear_block = clear_block_sse; 2697 c->clear_block = clear_block_sse;
2698 c->clear_blocks = clear_blocks_sse;
2699 }
2676 2700
2677 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ 2701 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
2678 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ 2702 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
2679 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ 2703 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
2680 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ 2704 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \