Mercurial > libavcodec.hg
comparison dsputil.h @ 5737:efa3c1f9259a libavcodec
sse2 version of compute_autocorr().
4x faster than c (somehow, even though doubles only allow 2x simd).
overal flac encoding: 15-50% faster on core2, 4-11% on k8, 3-13% on p4.
author | lorenm |
---|---|
date | Sat, 29 Sep 2007 22:31:18 +0000 |
parents | d7970c9e3049 |
children | 09f99af1db40 |
comparison
equal
deleted
inserted
replaced
5736:810067f2c33d | 5737:efa3c1f9259a |
---|---|
326 | 326 |
327 void (*h261_loop_filter)(uint8_t *src, int stride); | 327 void (*h261_loop_filter)(uint8_t *src, int stride); |
328 | 328 |
329 /* assume len is a multiple of 4, and arrays are 16-byte aligned */ | 329 /* assume len is a multiple of 4, and arrays are 16-byte aligned */ |
330 void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); | 330 void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); |
331 /* no alignment needed */ | |
332 void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc); | |
331 /* assume len is a multiple of 8, and arrays are 16-byte aligned */ | 333 /* assume len is a multiple of 8, and arrays are 16-byte aligned */ |
332 void (*vector_fmul)(float *dst, const float *src, int len); | 334 void (*vector_fmul)(float *dst, const float *src, int len); |
333 void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); | 335 void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); |
334 /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ | 336 /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ |
335 void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step); | 337 void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step); |