# HG changeset patch # User michaelni # Date 1052925133 0 # Node ID 362947395f5cb4e210d62b5b5a3bc650d2ee3d10 # Parent bc68a29d0dd1d6589993ef39b1bad0d57872602b fastdiv patch by (BERO ) with fixes & cleanup by me diff -r bc68a29d0dd1 -r 362947395f5c common.h --- a/common.h Wed May 14 12:32:17 2003 +0000 +++ b/common.h Wed May 14 15:12:13 2003 +0000 @@ -197,6 +197,25 @@ #define FFMAX(a,b) ((a) > (b) ? (a) : (b)) #define FFMIN(a,b) ((a) > (b) ? (b) : (a)) +extern const uint32_t inverse[256]; + +#ifdef ARCH_X86 +# define FASTDIV(a,b) \ + ({\ + int ret,dmy;\ + asm volatile(\ + "mull %3"\ + :"=d"(ret),"=a"(dmy)\ + :"1"(a),"g"(inverse[b])\ + );\ + ret;\ + }) +#elif defined(CONFIG_FASTDIV) +# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a)*inverse[b])>>32)) +#else +# define FASTDIV(a,b) ((a)/(b)) +#endif + #ifdef ARCH_X86 // avoid +32 for shift optimization (gcc should do that ...) static inline int32_t NEG_SSR32( int32_t a, int8_t s){ diff -r bc68a29d0dd1 -r 362947395f5c h263.c --- a/h263.c Wed May 14 12:32:17 2003 +0000 +++ b/h263.c Wed May 14 15:12:13 2003 +0000 @@ -73,8 +73,6 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s); static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr, int *dir_ptr); -extern uint32_t inverse[256]; - #ifdef CONFIG_ENCODERS static uint8_t uni_DCtab_lum_len[512]; static uint8_t uni_DCtab_chrom_len[512]; @@ -1823,7 +1821,6 @@ { int a, b, c, wrap, pred, scale; uint16_t *dc_val; - int dummy; /* find prediction */ if (n < 4) { @@ -1859,16 +1856,7 @@ *dir_ptr = 0; /* left */ } /* we assume pred is positive */ -#ifdef ARCH_X86 - asm volatile ( - "xorl %%edx, %%edx \n\t" - "mul %%ecx \n\t" - : "=d" (pred), "=a"(dummy) - : "a" (pred + (scale >> 1)), "c" (inverse[scale]) - ); -#else - pred = (pred + (scale >> 1)) / scale; -#endif + pred = FASTDIV((pred + (scale >> 1)), scale); /* prepare address for prediction update */ *dc_val_ptr = &dc_val[0]; @@ -3668,8 +3656,8 @@ /* DC coef */ if(s->partitioned_frame){ level = s->dc_val[0][ s->block_index[n] ]; - if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; //FIXME optimizs - else level= (level + (s->c_dc_scale>>1))/s->c_dc_scale; + if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale); + else level= FASTDIV((level + (s->c_dc_scale>>1)), s->c_dc_scale); dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<>1))/scale; + return FASTDIV((sum + (scale>>1)), scale); } /* dir = 0: left, dir = 1: top prediction */ @@ -763,9 +761,9 @@ b = (b + (8 >> 1)) / 8; c = (c + (8 >> 1)) / 8; } else { - a = (a + (scale >> 1)) / scale; - b = (b + (scale >> 1)) / scale; - c = (c + (scale >> 1)) / scale; + a = FASTDIV((a + (scale >> 1)), scale); + b = FASTDIV((b + (scale >> 1)), scale); + c = FASTDIV((c + (scale >> 1)), scale); } #endif /* XXX: WARNING: they did not choose the same test as MPEG4. This