comparison dsputil.c @ 6384:0a403ade8c81 libavcodec

simd and unroll png_filter_row cycles per 1000 pixels on core2: left: 9211->5170 top: 9283->2138 avg: 12215->7611 paeth: 64024->17360 overall rgb png decoding speed: +45% overall greyscale png decoding speed: +6%
author lorenm
date Thu, 21 Feb 2008 07:10:46 +0000
parents 2799f65a24de
children 40fbc878ce3f
comparison
equal deleted inserted replaced
6383:7ba06222bda7 6384:0a403ade8c81
41 /* vorbis.c */ 41 /* vorbis.c */
42 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); 42 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
43 43
44 /* flacenc.c */ 44 /* flacenc.c */
45 void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); 45 void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
46
47 /* pngdec.c */
48 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
46 49
47 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; 50 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
48 uint32_t ff_squareTbl[512] = {0, }; 51 uint32_t ff_squareTbl[512] = {0, };
49 52
50 const uint8_t ff_zigzag_direct[64] = { 53 const uint8_t ff_zigzag_direct[64] = {
3286 } 3289 }
3287 for(; i<w; i++) 3290 for(; i<w; i++)
3288 dst[i+0] += src[i+0]; 3291 dst[i+0] += src[i+0];
3289 } 3292 }
3290 3293
3294 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
3295 int i;
3296 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
3297 long a = *(long*)(src1+i);
3298 long b = *(long*)(src2+i);
3299 *(long*)(dst+i) = ((a&0x7f7f7f7f7f7f7f7fL) + (b&0x7f7f7f7f7f7f7f7fL)) ^ ((a^b)&0x8080808080808080L);
3300 }
3301 for(; i<w; i++)
3302 dst[i] = src1[i]+src2[i];
3303 }
3304
3291 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ 3305 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
3292 int i; 3306 int i;
3293 for(i=0; i+7<w; i+=8){ 3307 for(i=0; i+7<w; i+=8){
3294 dst[i+0] = src1[i+0]-src2[i+0]; 3308 dst[i+0] = src1[i+0]-src2[i+0];
3295 dst[i+1] = src1[i+1]-src2[i+1]; 3309 dst[i+1] = src1[i+1]-src2[i+1];
4230 #endif 4244 #endif
4231 4245
4232 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; 4246 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
4233 4247
4234 c->add_bytes= add_bytes_c; 4248 c->add_bytes= add_bytes_c;
4249 c->add_bytes_l2= add_bytes_l2_c;
4235 c->diff_bytes= diff_bytes_c; 4250 c->diff_bytes= diff_bytes_c;
4236 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; 4251 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
4237 c->bswap_buf= bswap_buf; 4252 c->bswap_buf= bswap_buf;
4253 #ifdef CONFIG_PNG_DECODER
4254 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
4255 #endif
4238 4256
4239 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; 4257 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
4240 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; 4258 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
4241 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; 4259 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
4242 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; 4260 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;