comparison i386/dsputil_mmx.c @ 8288:800444234375 libavcodec

clear_block mmx
author lorenm
date Wed, 10 Dec 2008 21:35:17 +0000
parents cf4d575b1982
children 08b0f63a91c5
comparison
equal deleted inserted replaced
8287:7a1d037482c4 8288:800444234375
462 : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size) 462 : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
463 : "memory" 463 : "memory"
464 ); 464 );
465 } 465 }
466 466
467 static void clear_blocks_mmx(DCTELEM *blocks) 467 #define CLEAR_BLOCKS(name,n) \
468 static void name(DCTELEM *blocks)\
469 {\
470 __asm__ volatile(\
471 "pxor %%mm7, %%mm7 \n\t"\
472 "mov %1, %%"REG_a" \n\t"\
473 "1: \n\t"\
474 "movq %%mm7, (%0, %%"REG_a") \n\t"\
475 "movq %%mm7, 8(%0, %%"REG_a") \n\t"\
476 "movq %%mm7, 16(%0, %%"REG_a") \n\t"\
477 "movq %%mm7, 24(%0, %%"REG_a") \n\t"\
478 "add $32, %%"REG_a" \n\t"\
479 " js 1b \n\t"\
480 : : "r" (((uint8_t *)blocks)+128*n),\
481 "i" (-128*n)\
482 : "%"REG_a\
483 );\
484 }
485 CLEAR_BLOCKS(clear_blocks_mmx, 6)
486 CLEAR_BLOCKS(clear_block_mmx, 1)
487
488 static void clear_block_sse(DCTELEM *block)
468 { 489 {
469 __asm__ volatile( 490 __asm__ volatile(
470 "pxor %%mm7, %%mm7 \n\t" 491 "xorps %%xmm0, %%xmm0 \n"
471 "mov $-128*6, %%"REG_a" \n\t" 492 "movaps %%xmm0, (%0) \n"
472 "1: \n\t" 493 "movaps %%xmm0, 16(%0) \n"
473 "movq %%mm7, (%0, %%"REG_a") \n\t" 494 "movaps %%xmm0, 32(%0) \n"
474 "movq %%mm7, 8(%0, %%"REG_a") \n\t" 495 "movaps %%xmm0, 48(%0) \n"
475 "movq %%mm7, 16(%0, %%"REG_a") \n\t" 496 "movaps %%xmm0, 64(%0) \n"
476 "movq %%mm7, 24(%0, %%"REG_a") \n\t" 497 "movaps %%xmm0, 80(%0) \n"
477 "add $32, %%"REG_a" \n\t" 498 "movaps %%xmm0, 96(%0) \n"
478 " js 1b \n\t" 499 "movaps %%xmm0, 112(%0) \n"
479 : : "r" (((uint8_t *)blocks)+128*6) 500 :: "r"(block)
480 : "%"REG_a 501 : "memory"
481 ); 502 );
482 } 503 }
483 504
484 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ 505 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
485 x86_reg i=0; 506 x86_reg i=0;
486 __asm__ volatile( 507 __asm__ volatile(
2567 } 2588 }
2568 2589
2569 c->put_pixels_clamped = put_pixels_clamped_mmx; 2590 c->put_pixels_clamped = put_pixels_clamped_mmx;
2570 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; 2591 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
2571 c->add_pixels_clamped = add_pixels_clamped_mmx; 2592 c->add_pixels_clamped = add_pixels_clamped_mmx;
2593 c->clear_block = clear_block_mmx;
2572 c->clear_blocks = clear_blocks_mmx; 2594 c->clear_blocks = clear_blocks_mmx;
2595 if (mm_flags & FF_MM_SSE)
2596 c->clear_block = clear_block_sse;
2573 2597
2574 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ 2598 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
2575 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ 2599 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
2576 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ 2600 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
2577 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ 2601 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \