Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 8288:800444234375 libavcodec
clear_block mmx
author | lorenm |
---|---|
date | Wed, 10 Dec 2008 21:35:17 +0000 |
parents | cf4d575b1982 |
children | 08b0f63a91c5 |
comparison
equal
deleted
inserted
replaced
8287:7a1d037482c4 | 8288:800444234375 |
---|---|
462 : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size) | 462 : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size) |
463 : "memory" | 463 : "memory" |
464 ); | 464 ); |
465 } | 465 } |
466 | 466 |
467 static void clear_blocks_mmx(DCTELEM *blocks) | 467 #define CLEAR_BLOCKS(name,n) \ |
468 static void name(DCTELEM *blocks)\ | |
469 {\ | |
470 __asm__ volatile(\ | |
471 "pxor %%mm7, %%mm7 \n\t"\ | |
472 "mov %1, %%"REG_a" \n\t"\ | |
473 "1: \n\t"\ | |
474 "movq %%mm7, (%0, %%"REG_a") \n\t"\ | |
475 "movq %%mm7, 8(%0, %%"REG_a") \n\t"\ | |
476 "movq %%mm7, 16(%0, %%"REG_a") \n\t"\ | |
477 "movq %%mm7, 24(%0, %%"REG_a") \n\t"\ | |
478 "add $32, %%"REG_a" \n\t"\ | |
479 " js 1b \n\t"\ | |
480 : : "r" (((uint8_t *)blocks)+128*n),\ | |
481 "i" (-128*n)\ | |
482 : "%"REG_a\ | |
483 );\ | |
484 } | |
485 CLEAR_BLOCKS(clear_blocks_mmx, 6) | |
486 CLEAR_BLOCKS(clear_block_mmx, 1) | |
487 | |
488 static void clear_block_sse(DCTELEM *block) | |
468 { | 489 { |
469 __asm__ volatile( | 490 __asm__ volatile( |
470 "pxor %%mm7, %%mm7 \n\t" | 491 "xorps %%xmm0, %%xmm0 \n" |
471 "mov $-128*6, %%"REG_a" \n\t" | 492 "movaps %%xmm0, (%0) \n" |
472 "1: \n\t" | 493 "movaps %%xmm0, 16(%0) \n" |
473 "movq %%mm7, (%0, %%"REG_a") \n\t" | 494 "movaps %%xmm0, 32(%0) \n" |
474 "movq %%mm7, 8(%0, %%"REG_a") \n\t" | 495 "movaps %%xmm0, 48(%0) \n" |
475 "movq %%mm7, 16(%0, %%"REG_a") \n\t" | 496 "movaps %%xmm0, 64(%0) \n" |
476 "movq %%mm7, 24(%0, %%"REG_a") \n\t" | 497 "movaps %%xmm0, 80(%0) \n" |
477 "add $32, %%"REG_a" \n\t" | 498 "movaps %%xmm0, 96(%0) \n" |
478 " js 1b \n\t" | 499 "movaps %%xmm0, 112(%0) \n" |
479 : : "r" (((uint8_t *)blocks)+128*6) | 500 :: "r"(block) |
480 : "%"REG_a | 501 : "memory" |
481 ); | 502 ); |
482 } | 503 } |
483 | 504 |
484 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ | 505 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ |
485 x86_reg i=0; | 506 x86_reg i=0; |
486 __asm__ volatile( | 507 __asm__ volatile( |
2567 } | 2588 } |
2568 | 2589 |
2569 c->put_pixels_clamped = put_pixels_clamped_mmx; | 2590 c->put_pixels_clamped = put_pixels_clamped_mmx; |
2570 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; | 2591 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; |
2571 c->add_pixels_clamped = add_pixels_clamped_mmx; | 2592 c->add_pixels_clamped = add_pixels_clamped_mmx; |
2593 c->clear_block = clear_block_mmx; | |
2572 c->clear_blocks = clear_blocks_mmx; | 2594 c->clear_blocks = clear_blocks_mmx; |
2595 if (mm_flags & FF_MM_SSE) | |
2596 c->clear_block = clear_block_sse; | |
2573 | 2597 |
2574 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ | 2598 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ |
2575 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ | 2599 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ |
2576 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ | 2600 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ |
2577 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ | 2601 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ |