Mercurial > libavcodec.hg
comparison dsputil.c @ 6384:0a403ade8c81 libavcodec
simd and unroll png_filter_row
cycles per 1000 pixels on core2:
left: 9211->5170
top: 9283->2138
avg: 12215->7611
paeth: 64024->17360
overall rgb png decoding speed: +45%
overall greyscale png decoding speed: +6%
author | lorenm |
---|---|
date | Thu, 21 Feb 2008 07:10:46 +0000 |
parents | 2799f65a24de |
children | 40fbc878ce3f |
comparison
equal
deleted
inserted
replaced
6383:7ba06222bda7 | 6384:0a403ade8c81 |
---|---|
41 /* vorbis.c */ | 41 /* vorbis.c */ |
42 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); | 42 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); |
43 | 43 |
44 /* flacenc.c */ | 44 /* flacenc.c */ |
45 void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); | 45 void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); |
46 | |
47 /* pngdec.c */ | |
48 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); | |
46 | 49 |
47 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; | 50 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; |
48 uint32_t ff_squareTbl[512] = {0, }; | 51 uint32_t ff_squareTbl[512] = {0, }; |
49 | 52 |
50 const uint8_t ff_zigzag_direct[64] = { | 53 const uint8_t ff_zigzag_direct[64] = { |
3286 } | 3289 } |
3287 for(; i<w; i++) | 3290 for(; i<w; i++) |
3288 dst[i+0] += src[i+0]; | 3291 dst[i+0] += src[i+0]; |
3289 } | 3292 } |
3290 | 3293 |
3294 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | |
3295 int i; | |
3296 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | |
3297 long a = *(long*)(src1+i); | |
3298 long b = *(long*)(src2+i); | |
3299 *(long*)(dst+i) = ((a&0x7f7f7f7f7f7f7f7fL) + (b&0x7f7f7f7f7f7f7f7fL)) ^ ((a^b)&0x8080808080808080L); | |
3300 } | |
3301 for(; i<w; i++) | |
3302 dst[i] = src1[i]+src2[i]; | |
3303 } | |
3304 | |
3291 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | 3305 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ |
3292 int i; | 3306 int i; |
3293 for(i=0; i+7<w; i+=8){ | 3307 for(i=0; i+7<w; i+=8){ |
3294 dst[i+0] = src1[i+0]-src2[i+0]; | 3308 dst[i+0] = src1[i+0]-src2[i+0]; |
3295 dst[i+1] = src1[i+1]-src2[i+1]; | 3309 dst[i+1] = src1[i+1]-src2[i+1]; |
4230 #endif | 4244 #endif |
4231 | 4245 |
4232 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; | 4246 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; |
4233 | 4247 |
4234 c->add_bytes= add_bytes_c; | 4248 c->add_bytes= add_bytes_c; |
4249 c->add_bytes_l2= add_bytes_l2_c; | |
4235 c->diff_bytes= diff_bytes_c; | 4250 c->diff_bytes= diff_bytes_c; |
4236 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; | 4251 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; |
4237 c->bswap_buf= bswap_buf; | 4252 c->bswap_buf= bswap_buf; |
4253 #ifdef CONFIG_PNG_DECODER | |
4254 c->add_png_paeth_prediction= ff_add_png_paeth_prediction; | |
4255 #endif | |
4238 | 4256 |
4239 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; | 4257 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; |
4240 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; | 4258 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; |
4241 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; | 4259 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; |
4242 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; | 4260 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; |