Mercurial > libavcodec.hg
changeset 1261:362947395f5c libavcodec
fastdiv patch by (BERO <bero at geocities dot co dot jp>) with fixes & cleanup by me
author | michaelni |
---|---|
date | Wed, 14 May 2003 15:12:13 +0000 |
parents | bc68a29d0dd1 |
children | 82e0e1b9c283 |
files | common.h h263.c i386/mpegvideo_mmx.c msmpeg4.c |
diffstat | 4 files changed, 26 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/common.h Wed May 14 12:32:17 2003 +0000 +++ b/common.h Wed May 14 15:12:13 2003 +0000 @@ -197,6 +197,25 @@ #define FFMAX(a,b) ((a) > (b) ? (a) : (b)) #define FFMIN(a,b) ((a) > (b) ? (b) : (a)) +extern const uint32_t inverse[256]; + +#ifdef ARCH_X86 +# define FASTDIV(a,b) \ + ({\ + int ret,dmy;\ + asm volatile(\ + "mull %3"\ + :"=d"(ret),"=a"(dmy)\ + :"1"(a),"g"(inverse[b])\ + );\ + ret;\ + }) +#elif defined(CONFIG_FASTDIV) +# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a)*inverse[b])>>32)) +#else +# define FASTDIV(a,b) ((a)/(b)) +#endif + #ifdef ARCH_X86 // avoid +32 for shift optimization (gcc should do that ...) static inline int32_t NEG_SSR32( int32_t a, int8_t s){
--- a/h263.c Wed May 14 12:32:17 2003 +0000 +++ b/h263.c Wed May 14 15:12:13 2003 +0000 @@ -73,8 +73,6 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s); static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr, int *dir_ptr); -extern uint32_t inverse[256]; - #ifdef CONFIG_ENCODERS static uint8_t uni_DCtab_lum_len[512]; static uint8_t uni_DCtab_chrom_len[512]; @@ -1823,7 +1821,6 @@ { int a, b, c, wrap, pred, scale; uint16_t *dc_val; - int dummy; /* find prediction */ if (n < 4) { @@ -1859,16 +1856,7 @@ *dir_ptr = 0; /* left */ } /* we assume pred is positive */ -#ifdef ARCH_X86 - asm volatile ( - "xorl %%edx, %%edx \n\t" - "mul %%ecx \n\t" - : "=d" (pred), "=a"(dummy) - : "a" (pred + (scale >> 1)), "c" (inverse[scale]) - ); -#else - pred = (pred + (scale >> 1)) / scale; -#endif + pred = FASTDIV((pred + (scale >> 1)), scale); /* prepare address for prediction update */ *dc_val_ptr = &dc_val[0]; @@ -3668,8 +3656,8 @@ /* DC coef */ if(s->partitioned_frame){ level = s->dc_val[0][ s->block_index[n] ]; - if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; //FIXME optimizs - else level= (level + (s->c_dc_scale>>1))/s->c_dc_scale; + if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale); + else level= FASTDIV((level + (s->c_dc_scale>>1)), s->c_dc_scale); dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<<n)&32; }else{ level = mpeg4_decode_dc(s, n, &dc_pred_dir);
--- a/i386/mpegvideo_mmx.c Wed May 14 12:32:17 2003 +0000 +++ b/i386/mpegvideo_mmx.c Wed May 14 15:12:13 2003 +0000 @@ -26,7 +26,6 @@ extern uint8_t zigzag_direct_noperm[64]; extern uint16_t inv_zigzag_direct16[64]; -extern uint32_t inverse[256]; static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
--- a/msmpeg4.c Wed May 14 12:32:17 2003 +0000 +++ b/msmpeg4.c Wed May 14 15:12:13 2003 +0000 @@ -78,8 +78,6 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); -extern uint32_t inverse[256]; - #ifdef DEBUG int intra_count = 0; @@ -699,7 +697,7 @@ sum+=src[x + y*stride]; } } - return (sum + (scale>>1))/scale; + return FASTDIV((sum + (scale>>1)), scale); } /* dir = 0: left, dir = 1: top prediction */ @@ -763,9 +761,9 @@ b = (b + (8 >> 1)) / 8; c = (c + (8 >> 1)) / 8; } else { - a = (a + (scale >> 1)) / scale; - b = (b + (scale >> 1)) / scale; - c = (c + (scale >> 1)) / scale; + a = FASTDIV((a + (scale >> 1)), scale); + b = FASTDIV((b + (scale >> 1)), scale); + c = FASTDIV((c + (scale >> 1)), scale); } #endif /* XXX: WARNING: they did not choose the same test as MPEG4. This