changeset 677:0ed44dd02bbf libavcodec

fixing memalign
author michaelni
date Sun, 15 Sep 2002 10:02:15 +0000
parents c3bdb00a98a9
children 9c7a661a9fbe
files mem.c
diffstat 1 files changed, 20 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/mem.c	Sat Sep 14 22:07:35 2002 +0000
+++ b/mem.c	Sun Sep 15 10:02:15 2002 +0000
@@ -29,8 +29,8 @@
 void *av_malloc(int size)
 {
     void *ptr;
-#if defined ( ARCH_X86 ) && defined ( HAVE_MEMALIGN ) && 0
-    ptr = memalign(64,size);
+#if defined (HAVE_MEMALIGN)
+    ptr = memalign(16,size);
     /* Why 64? 
        Indeed, we should align it:
          on 4 for 386
@@ -40,11 +40,29 @@
        Because L1 and L2 caches are aligned on those values.
        But I don't want to code such logic here!
      */
+     /* Why 16?
+        because some cpus need alignment, for example SSE2 on P4, & most RISC cpus
+        it will just trigger an exception and the unaligned load will be done in the
+        exception handler or it will just segfault (SSE2 on P4)
+        Why not larger? because i didnt see a difference in benchmarks ...
+     */
+     /* benchmarks with p3
+        memalign(64)+1		3071,3051,3032
+        memalign(64)+2		3051,3032,3041
+        memalign(64)+4		2911,2896,2915
+        memalign(64)+8		2545,2554,2550
+        memalign(64)+16		2543,2572,2563
+        memalign(64)+32		2546,2545,2571
+        memalign(64)+64		2570,2533,2558
+        
+        btw, malloc seems to do 8 byte alignment by default here
+     */
 #else
     ptr = malloc(size);
 #endif
     if (!ptr)
         return NULL;
+//fprintf(stderr, "%X %d\n", (int)ptr, size);
     /* NOTE: this memset should not be present */
     memset(ptr, 0, size);
     return ptr;