# HG changeset patch # User nickols_k # Date 998323295 0 # Node ID 9e8ae8222ddc888b87930d278d43a45874808afa # Parent 0b09bd08ef4b92213e995b14fb55183d3627e459 Aligned malloc. Another 10% of speedup. diff -r 0b09bd08ef4b -r 9e8ae8222ddc utils.c --- a/utils.c Wed Aug 15 22:33:03 2001 +0000 +++ b/utils.c Mon Aug 20 16:01:35 2001 +0000 @@ -20,6 +20,11 @@ #include #include #include +#include /* __GLIBC__ and __GLIBC_MINOR__ are defined here */ +#if __GLIBC__ >=2 && __GLIBC_MINOR__ >= 1 /* Fixme about glibc-2.0 */ +#define HAVE_MEMALIGN 1 +#include +#endif #include "common.h" #include "dsputil.h" #include "avcodec.h" @@ -28,7 +33,33 @@ void *av_mallocz(int size) { void *ptr; +#if defined ( ARCH_X86 ) && defined ( HAVE_MEMALIGN ) +/* + From glibc-2.1.x manuals: + ------------------------- + The address of a block returned by `malloc' or `realloc' in the GNU +system is always a multiple of eight (or sixteen on 64-bit systems). +If you need a block whose address is a multiple of a higher power of +two than that, use `memalign' or `valloc'. These functions are +declared in `stdlib.h'. + + With the GNU library, you can use `free' to free the blocks that +`memalign' and `valloc' return. That does not work in BSD, +however--BSD does not provide any way to free such blocks. +*/ + ptr = memalign(64,size); + /* Why 64? + Indeed, we should align it: + on 4 for 386 + on 16 for 486 + on 32 for 586, PPro - k6-III + on 64 for K7 (maybe for P3 too). + Because L1 and L2 caches are aligned on those values. + But I don't want to code such logic here! + */ +#else ptr = malloc(size); +#endif if (!ptr) return NULL; memset(ptr, 0, size);