changeset 77:9e8ae8222ddc libavcodec

Aligned malloc. Another 10% of speedup.
author nickols_k
date Mon, 20 Aug 2001 16:01:35 +0000
parents 0b09bd08ef4b
children 92ed1c4f94fa
files utils.c
diffstat 1 files changed, 31 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/utils.c	Wed Aug 15 22:33:03 2001 +0000
+++ b/utils.c	Mon Aug 20 16:01:35 2001 +0000
@@ -20,6 +20,11 @@
 #include <stdio.h>
 #include <string.h>
 #include <errno.h>
+#include <limits.h> /* __GLIBC__ and __GLIBC_MINOR__ are defined here */
+#if __GLIBC__ >=2 && __GLIBC_MINOR__ >= 1 /* Fixme about glibc-2.0 */
+#define HAVE_MEMALIGN 1
+#include <malloc.h>
+#endif
 #include "common.h"
 #include "dsputil.h"
 #include "avcodec.h"
@@ -28,7 +33,33 @@
 void *av_mallocz(int size)
 {
     void *ptr;
+#if defined ( ARCH_X86 ) && defined ( HAVE_MEMALIGN )
+/*
+   From glibc-2.1.x manuals:
+   -------------------------
+   The address of a block returned by `malloc' or `realloc' in the GNU
+system is always a multiple of eight (or sixteen on 64-bit systems).
+If you need a block whose address is a multiple of a higher power of
+two than that, use `memalign' or `valloc'.  These functions are
+declared in `stdlib.h'.
+
+   With the GNU library, you can use `free' to free the blocks that
+`memalign' and `valloc' return.  That does not work in BSD,
+however--BSD does not provide any way to free such blocks.
+*/
+    ptr = memalign(64,size);
+    /* Why 64? 
+       Indeed, we should align it:
+         on 4 for 386
+         on 16 for 486
+	 on 32 for 586, PPro - k6-III
+	 on 64 for K7 (maybe for P3 too).
+       Because L1 and L2 caches are aligned on those values.
+       But I don't want to code such logic here!
+     */
+#else
     ptr = malloc(size);
+#endif
     if (!ptr)
         return NULL;
     memset(ptr, 0, size);