# HG changeset patch # User lorenm # Date 1228944917 0 # Node ID 800444234375dea6fdb57bdc17aa931f8c5d13c8 # Parent 7a1d037482c41796dcc131ebee20efc3faf64161 clear_block mmx diff -r 7a1d037482c4 -r 800444234375 dsputil.c --- a/dsputil.c Wed Dec 10 21:26:00 2008 +0000 +++ b/dsputil.c Wed Dec 10 21:35:17 2008 +0000 @@ -3420,6 +3420,11 @@ } } +static void clear_block_c(DCTELEM *block) +{ + memset(block, 0, sizeof(DCTELEM)*64); +} + /** * memset(blocks, 0, sizeof(DCTELEM)*6*64) */ @@ -4288,6 +4293,7 @@ c->sum_abs_dctelem = sum_abs_dctelem_c; c->gmc1 = gmc1_c; c->gmc = ff_gmc_c; + c->clear_block = clear_block_c; c->clear_blocks = clear_blocks_c; c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; diff -r 7a1d037482c4 -r 800444234375 dsputil.h --- a/dsputil.h Wed Dec 10 21:26:00 2008 +0000 +++ b/dsputil.h Wed Dec 10 21:35:17 2008 +0000 @@ -203,6 +203,7 @@ */ void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); + void (*clear_block)(DCTELEM *block/*align 16*/); void (*clear_blocks)(DCTELEM *blocks/*align 16*/); int (*pix_sum)(uint8_t * pix, int line_size); int (*pix_norm1)(uint8_t * pix, int line_size); diff -r 7a1d037482c4 -r 800444234375 h263.c --- a/h263.c Wed Dec 10 21:26:00 2008 +0000 +++ b/h263.c Wed Dec 10 21:35:17 2008 +0000 @@ -810,7 +810,7 @@ for (i = 0; i < 6; i++) { if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ s->block_last_index[i]= -1; - memset(s->block[i], 0, sizeof(DCTELEM)*64); + s->dsp.clear_block(s->block[i]); } } }else{ @@ -853,7 +853,7 @@ for (i = 0; i < 6; i++) { if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ s->block_last_index[i]= -1; - memset(s->block[i], 0, sizeof(DCTELEM)*64); + s->dsp.clear_block(s->block[i]); } } }else{ @@ -4651,7 +4651,7 @@ rl = &rl_intra_aic; i = 0; s->gb= gb; - memset(block, 0, sizeof(DCTELEM)*64); + s->dsp.clear_block(block); goto retry; } av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra); diff -r 7a1d037482c4 -r 800444234375 i386/dsputil_mmx.c --- a/i386/dsputil_mmx.c Wed Dec 10 21:26:00 2008 +0000 +++ b/i386/dsputil_mmx.c Wed Dec 10 21:35:17 2008 +0000 @@ -464,21 +464,42 @@ ); } -static void clear_blocks_mmx(DCTELEM *blocks) +#define CLEAR_BLOCKS(name,n) \ +static void name(DCTELEM *blocks)\ +{\ + __asm__ volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "mov %1, %%"REG_a" \n\t"\ + "1: \n\t"\ + "movq %%mm7, (%0, %%"REG_a") \n\t"\ + "movq %%mm7, 8(%0, %%"REG_a") \n\t"\ + "movq %%mm7, 16(%0, %%"REG_a") \n\t"\ + "movq %%mm7, 24(%0, %%"REG_a") \n\t"\ + "add $32, %%"REG_a" \n\t"\ + " js 1b \n\t"\ + : : "r" (((uint8_t *)blocks)+128*n),\ + "i" (-128*n)\ + : "%"REG_a\ + );\ +} +CLEAR_BLOCKS(clear_blocks_mmx, 6) +CLEAR_BLOCKS(clear_block_mmx, 1) + +static void clear_block_sse(DCTELEM *block) { __asm__ volatile( - "pxor %%mm7, %%mm7 \n\t" - "mov $-128*6, %%"REG_a" \n\t" - "1: \n\t" - "movq %%mm7, (%0, %%"REG_a") \n\t" - "movq %%mm7, 8(%0, %%"REG_a") \n\t" - "movq %%mm7, 16(%0, %%"REG_a") \n\t" - "movq %%mm7, 24(%0, %%"REG_a") \n\t" - "add $32, %%"REG_a" \n\t" - " js 1b \n\t" - : : "r" (((uint8_t *)blocks)+128*6) - : "%"REG_a - ); + "xorps %%xmm0, %%xmm0 \n" + "movaps %%xmm0, (%0) \n" + "movaps %%xmm0, 16(%0) \n" + "movaps %%xmm0, 32(%0) \n" + "movaps %%xmm0, 48(%0) \n" + "movaps %%xmm0, 64(%0) \n" + "movaps %%xmm0, 80(%0) \n" + "movaps %%xmm0, 96(%0) \n" + "movaps %%xmm0, 112(%0) \n" + :: "r"(block) + : "memory" + ); } static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ @@ -2569,7 +2590,10 @@ c->put_pixels_clamped = put_pixels_clamped_mmx; c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; c->add_pixels_clamped = add_pixels_clamped_mmx; + c->clear_block = clear_block_mmx; c->clear_blocks = clear_blocks_mmx; + if (mm_flags & FF_MM_SSE) + c->clear_block = clear_block_sse; #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ diff -r 7a1d037482c4 -r 800444234375 intrax8.c --- a/intrax8.c Wed Dec 10 21:26:00 2008 +0000 +++ b/intrax8.c Wed Dec 10 21:35:17 2008 +0000 @@ -511,7 +511,7 @@ int sign; assert(w->orient<12); - memset(s->block[0],0x00,64*sizeof(DCTELEM)); + s->dsp.clear_block(s->block[0]); if(chroma){ dc_mode=2; diff -r 7a1d037482c4 -r 800444234375 mimic.c --- a/mimic.c Wed Dec 10 21:26:00 2008 +0000 +++ b/mimic.c Wed Dec 10 21:35:17 2008 +0000 @@ -163,7 +163,7 @@ DCTELEM *block = ctx->dct_block; unsigned int pos; - memset(block, 0, 64 * sizeof(DCTELEM)); + ctx->dsp.clear_block(block); block[0] = get_bits(&ctx->gb, 8) << 3; diff -r 7a1d037482c4 -r 800444234375 mjpegdec.c --- a/mjpegdec.c Wed Dec 10 21:26:00 2008 +0000 +++ b/mjpegdec.c Wed Dec 10 21:35:17 2008 +0000 @@ -444,7 +444,7 @@ int dc_index, int16_t *quant_matrix, int Al) { int val; - memset(block, 0, 64*sizeof(DCTELEM)); + s->dsp.clear_block(block); val = mjpeg_decode_dc(s, dc_index); if (val == 0xffff) { av_log(s->avctx, AV_LOG_ERROR, "error dc\n"); @@ -800,7 +800,7 @@ if(s->interlaced && s->bottom_field) ptr += linesize[c] >> 1; if(!s->progressive) { - memset(s->block, 0, sizeof(s->block)); + s->dsp.clear_block(s->block); if(decode_block(s, s->block, i, s->dc_index[i], s->ac_index[i], s->quant_matrixes[ s->quant_index[c] ]) < 0) { diff -r 7a1d037482c4 -r 800444234375 vp3.c --- a/vp3.c Wed Dec 10 21:26:00 2008 +0000 +++ b/vp3.c Wed Dec 10 21:35:17 2008 +0000 @@ -1402,14 +1402,14 @@ /* dequantize the DCT coefficients */ if(s->avctx->idct_algo==FF_IDCT_VP3){ Coeff *coeff= s->coeffs + i; - memset(block, 0, sizeof(block)); + s->dsp.clear_block(block); while(coeff->next){ block[coeff->index]= coeff->coeff * dequantizer[coeff->index]; coeff= coeff->next; } }else{ Coeff *coeff= s->coeffs + i; - memset(block, 0, sizeof(block)); + s->dsp.clear_block(block); while(coeff->next){ block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2; coeff= coeff->next; diff -r 7a1d037482c4 -r 800444234375 vp56.c --- a/vp56.c Wed Dec 10 21:26:00 2008 +0000 +++ b/vp56.c Wed Dec 10 21:35:17 2008 +0000 @@ -405,7 +405,7 @@ mb_type = vp56_decode_mv(s, row, col); ref_frame = vp56_reference_frame[mb_type]; - memset(s->block_coeff, 0, sizeof(s->block_coeff)); + s->dsp.clear_blocks(*s->block_coeff); s->parse_coeff(s); diff -r 7a1d037482c4 -r 800444234375 wmv2.c --- a/wmv2.c Wed Dec 10 21:26:00 2008 +0000 +++ b/wmv2.c Wed Dec 10 21:35:17 2008 +0000 @@ -43,12 +43,12 @@ case 1: ff_simple_idct84_add(dst , stride, block1); ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]); - memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM)); + s->dsp.clear_block(w->abt_block2[n]); break; case 2: ff_simple_idct48_add(dst , stride, block1); ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]); - memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM)); + s->dsp.clear_block(w->abt_block2[n]); break; default: av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n");