Mercurial > libavcodec.hg
comparison x86/dsputil_mmx.c @ 9861:89270a3bc4a0 libavcodec
SSE version of clear_blocks
author | darkshikari |
---|---|
date | Tue, 16 Jun 2009 17:33:57 +0000 |
parents | 41245484dc0b |
children | 6972b493f41f |
comparison
equal
deleted
inserted
replaced
9860:7e82083caab7 | 9861:89270a3bc4a0 |
---|---|
523 "movaps %%xmm0, 80(%0) \n" | 523 "movaps %%xmm0, 80(%0) \n" |
524 "movaps %%xmm0, 96(%0) \n" | 524 "movaps %%xmm0, 96(%0) \n" |
525 "movaps %%xmm0, 112(%0) \n" | 525 "movaps %%xmm0, 112(%0) \n" |
526 :: "r"(block) | 526 :: "r"(block) |
527 : "memory" | 527 : "memory" |
528 ); | |
529 } | |
530 | |
531 static void clear_blocks_sse(DCTELEM *blocks) | |
532 {\ | |
533 __asm__ volatile( | |
534 "xorps %%xmm0, %%xmm0 \n" | |
535 "mov %1, %%"REG_a" \n" | |
536 "1: \n" | |
537 "movaps %%xmm0, (%0, %%"REG_a") \n" | |
538 "movaps %%xmm0, 16(%0, %%"REG_a") \n" | |
539 "movaps %%xmm0, 32(%0, %%"REG_a") \n" | |
540 "movaps %%xmm0, 48(%0, %%"REG_a") \n" | |
541 "movaps %%xmm0, 64(%0, %%"REG_a") \n" | |
542 "movaps %%xmm0, 80(%0, %%"REG_a") \n" | |
543 "movaps %%xmm0, 96(%0, %%"REG_a") \n" | |
544 "movaps %%xmm0, 112(%0, %%"REG_a") \n" | |
545 "add $128, %%"REG_a" \n" | |
546 " js 1b \n" | |
547 : : "r" (((uint8_t *)blocks)+128*6), | |
548 "i" (-128*6) | |
549 : "%"REG_a | |
528 ); | 550 ); |
529 } | 551 } |
530 | 552 |
531 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ | 553 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ |
532 x86_reg i=0; | 554 x86_reg i=0; |
2669 c->put_pixels_clamped = put_pixels_clamped_mmx; | 2691 c->put_pixels_clamped = put_pixels_clamped_mmx; |
2670 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; | 2692 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; |
2671 c->add_pixels_clamped = add_pixels_clamped_mmx; | 2693 c->add_pixels_clamped = add_pixels_clamped_mmx; |
2672 c->clear_block = clear_block_mmx; | 2694 c->clear_block = clear_block_mmx; |
2673 c->clear_blocks = clear_blocks_mmx; | 2695 c->clear_blocks = clear_blocks_mmx; |
2674 if (mm_flags & FF_MM_SSE) | 2696 if (mm_flags & FF_MM_SSE){ |
2675 c->clear_block = clear_block_sse; | 2697 c->clear_block = clear_block_sse; |
2698 c->clear_blocks = clear_blocks_sse; | |
2699 } | |
2676 | 2700 |
2677 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ | 2701 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ |
2678 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ | 2702 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ |
2679 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ | 2703 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ |
2680 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ | 2704 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ |