comparison dsputil.c @ 3568:945caa35ee9a libavcodec

sse and 3dnow implementations of float->int conversion and mdct windowing. 15% faster vorbis.
author lorenm
date Thu, 10 Aug 2006 19:06:25 +0000
parents 545a15c19c91
children 47821be55b6c
comparison
equal deleted inserted replaced
3567:1f8730f62765 3568:945caa35ee9a
3751 WARPER8_16_SQ(dct_max8x8_c, dct_max16_c) 3751 WARPER8_16_SQ(dct_max8x8_c, dct_max16_c)
3752 WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) 3752 WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
3753 WARPER8_16_SQ(rd8x8_c, rd16_c) 3753 WARPER8_16_SQ(rd8x8_c, rd16_c)
3754 WARPER8_16_SQ(bit8x8_c, bit16_c) 3754 WARPER8_16_SQ(bit8x8_c, bit16_c)
3755 3755
3756 static void vector_fmul_c(float *dst, const float *src, int len){
3757 int i;
3758 for(i=0; i<len; i++)
3759 dst[i] *= src[i];
3760 }
3761
3762 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
3763 int i;
3764 src1 += len-1;
3765 for(i=0; i<len; i++)
3766 dst[i] = src0[i] * src1[-i];
3767 }
3768
3769 void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){
3770 int i;
3771 for(i=0; i<len; i++)
3772 dst[i*step] = src0[i] * src1[i] + src2[i] + src3;
3773 }
3774
3775 void ff_float_to_int16_c(int16_t *dst, const float *src, int len){
3776 int i;
3777 for(i=0; i<len; i++) {
3778 int_fast32_t tmp = ((int32_t*)src)[i];
3779 if(tmp & 0xf0000){
3780 tmp = (0x43c0ffff - tmp)>>31;
3781 // is this faster on some gcc/cpu combinations?
3782 // if(tmp > 0x43c0ffff) tmp = 0xFFFF;
3783 // else tmp = 0;
3784 }
3785 dst[i] = tmp - 0x8000;
3786 }
3787 }
3788
3756 /* XXX: those functions should be suppressed ASAP when all IDCTs are 3789 /* XXX: those functions should be suppressed ASAP when all IDCTs are
3757 converted */ 3790 converted */
3758 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) 3791 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
3759 { 3792 {
3760 j_rev_dct (block); 3793 j_rev_dct (block);
4094 #endif 4127 #endif
4095 4128
4096 #ifdef CONFIG_VORBIS_DECODER 4129 #ifdef CONFIG_VORBIS_DECODER
4097 c->vorbis_inverse_coupling = vorbis_inverse_coupling; 4130 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
4098 #endif 4131 #endif
4132 c->vector_fmul = vector_fmul_c;
4133 c->vector_fmul_reverse = vector_fmul_reverse_c;
4134 c->vector_fmul_add_add = ff_vector_fmul_add_add_c;
4135 c->float_to_int16 = ff_float_to_int16_c;
4099 4136
4100 c->shrink[0]= ff_img_copy_plane; 4137 c->shrink[0]= ff_img_copy_plane;
4101 c->shrink[1]= ff_shrink22; 4138 c->shrink[1]= ff_shrink22;
4102 c->shrink[2]= ff_shrink44; 4139 c->shrink[2]= ff_shrink44;
4103 c->shrink[3]= ff_shrink88; 4140 c->shrink[3]= ff_shrink88;