Mercurial > mplayer.hg
changeset 16174:b118c000ddd2
add the liba52 amd64 changes in a separate diff file
author | aurel |
---|---|
date | Fri, 05 Aug 2005 13:37:32 +0000 |
parents | d6219ce521e9 |
children | 6b86089c2edd |
files | liba52/liba52_amd64_changes.diff |
diffstat | 1 files changed, 2189 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liba52/liba52_amd64_changes.diff Fri Aug 05 13:37:32 2005 +0000 @@ -0,0 +1,2189 @@ +Index: liba52/a52_internal.h +=================================================================== +RCS file: /cvsroot/mplayer/main/liba52/a52_internal.h,v +retrieving revision 1.4 +diff -u -r1.4 a52_internal.h +--- liba52/a52_internal.h 22 Mar 2005 23:27:18 -0000 1.4 ++++ liba52/a52_internal.h 31 Jul 2005 21:20:09 -0000 +@@ -41,6 +41,20 @@ + #define DELTA_BIT_NONE (2) + #define DELTA_BIT_RESERVED (3) + ++#ifdef ARCH_X86_64 ++# define REG_a "rax" ++# define REG_d "rdx" ++# define REG_S "rsi" ++# define REG_D "rdi" ++# define REG_BP "rbp" ++#else ++# define REG_a "eax" ++# define REG_d "edx" ++# define REG_S "esi" ++# define REG_D "edi" ++# define REG_BP "ebp" ++#endif ++ + void bit_allocate (a52_state_t * state, a52_ba_t * ba, int bndstart, + int start, int end, int fastleak, int slowleak, + uint8_t * exp, int8_t * bap); +Index: liba52/downmix.c +=================================================================== +RCS file: /cvsroot/mplayer/main/liba52/downmix.c,v +retrieving revision 1.17 +diff -u -r1.17 downmix.c +--- liba52/downmix.c 22 Mar 2005 23:27:18 -0000 1.17 ++++ liba52/downmix.c 31 Jul 2005 21:20:09 -0000 +@@ -56,7 +56,7 @@ + { + upmix= upmix_C; + downmix= downmix_C; +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX; + if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE; + if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow; +@@ -684,27 +684,27 @@ + } + } + +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) + { + asm volatile( + "movlps %2, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps (%0, %%esi), %%xmm0 \n\t" +- "movaps 16(%0, %%esi), %%xmm1 \n\t" +- "addps (%1, %%esi), %%xmm0 \n\t" +- "addps 16(%1, %%esi), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps (%1, %%"REG_S"), %%xmm0 \n\t" ++ "addps 16(%1, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" + "addps %%xmm7, %%xmm1 \n\t" +- "movaps %%xmm0, (%1, %%esi) \n\t" +- "movaps %%xmm1, 16(%1, %%esi) \n\t" +- "addl $32, %%esi \n\t" ++ "movaps %%xmm0, (%1, %%"REG_S") \n\t" ++ "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" ++ "add $32, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (src+256), "r" (dest+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -713,19 +713,19 @@ + asm volatile( + "movlps %1, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps (%0, %%esi), %%xmm0 \n\t" +- "movaps 1024(%0, %%esi), %%xmm1 \n\t" +- "addps 2048(%0, %%esi), %%xmm0 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm1, %%xmm0 \n\t" +- "movaps %%xmm0, (%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm0, (%0, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -734,20 +734,20 @@ + asm volatile( + "movlps %1, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps (%0, %%esi), %%xmm0 \n\t" +- "movaps 1024(%0, %%esi), %%xmm1 \n\t" +- "addps 2048(%0, %%esi), %%xmm0 \n\t" +- "addps 3072(%0, %%esi), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" + "addps %%xmm1, %%xmm0 \n\t" +- "movaps %%xmm0, (%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm0, (%0, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -756,21 +756,21 @@ + asm volatile( + "movlps %1, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps (%0, %%esi), %%xmm0 \n\t" +- "movaps 1024(%0, %%esi), %%xmm1 \n\t" +- "addps 2048(%0, %%esi), %%xmm0 \n\t" +- "addps 3072(%0, %%esi), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" +- "addps 4096(%0, %%esi), %%xmm1 \n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm1, %%xmm0 \n\t" +- "movaps %%xmm0, (%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm0, (%0, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -779,21 +779,21 @@ + asm volatile( + "movlps %1, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps 1024(%0, %%esi), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" //common +- "movaps (%0, %%esi), %%xmm1 \n\t" +- "movaps 2048(%0, %%esi), %%xmm2 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" +- "movaps %%xmm1, (%0, %%esi) \n\t" +- "movaps %%xmm2, 1024(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm1, (%0, %%"REG_S") \n\t" ++ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -802,21 +802,21 @@ + asm volatile( + "movlps %2, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps 1024(%1, %%esi), %%xmm0 \n\t" ++ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" //common +- "movaps (%0, %%esi), %%xmm1 \n\t" +- "movaps (%1, %%esi), %%xmm2 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps (%1, %%"REG_S"), %%xmm2 \n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" +- "movaps %%xmm1, (%0, %%esi) \n\t" +- "movaps %%xmm2, (%1, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm1, (%0, %%"REG_S") \n\t" ++ "movaps %%xmm2, (%1, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (left+256), "r" (right+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -825,22 +825,22 @@ + asm volatile( + "movlps %1, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps 2048(%0, %%esi), %%xmm0 \n\t" // surround +- "movaps (%0, %%esi), %%xmm1 \n\t" +- "movaps 1024(%0, %%esi), %%xmm2 \n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm7, %%xmm2 \n\t" + "subps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" +- "movaps %%xmm1, (%0, %%esi) \n\t" +- "movaps %%xmm2, 1024(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm1, (%0, %%"REG_S") \n\t" ++ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -849,22 +849,22 @@ + asm volatile( + "movlps %1, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps 1024(%0, %%esi), %%xmm0 \n\t" +- "addps 3072(%0, %%esi), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common +- "movaps (%0, %%esi), %%xmm1 \n\t" +- "movaps 2048(%0, %%esi), %%xmm2 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" +- "movaps %%xmm1, (%0, %%esi) \n\t" +- "movaps %%xmm2, 1024(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm1, (%0, %%"REG_S") \n\t" ++ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -873,24 +873,24 @@ + asm volatile( + "movlps %1, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps 1024(%0, %%esi), %%xmm0 \n\t" +- "movaps 3072(%0, %%esi), %%xmm3 \n\t" // surround ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround + "addps %%xmm7, %%xmm0 \n\t" // common +- "movaps (%0, %%esi), %%xmm1 \n\t" +- "movaps 2048(%0, %%esi), %%xmm2 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" + "subps %%xmm3, %%xmm1 \n\t" + "addps %%xmm3, %%xmm2 \n\t" +- "movaps %%xmm1, (%0, %%esi) \n\t" +- "movaps %%xmm2, 1024(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm1, (%0, %%"REG_S") \n\t" ++ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -899,23 +899,23 @@ + asm volatile( + "movlps %1, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps 2048(%0, %%esi), %%xmm0 \n\t" +- "addps 3072(%0, %%esi), %%xmm0 \n\t" // surround +- "movaps (%0, %%esi), %%xmm1 \n\t" +- "movaps 1024(%0, %%esi), %%xmm2 \n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm1 \n\t" + "addps %%xmm7, %%xmm2 \n\t" + "subps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm2 \n\t" +- "movaps %%xmm1, (%0, %%esi) \n\t" +- "movaps %%xmm2, 1024(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm1, (%0, %%"REG_S") \n\t" ++ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -924,22 +924,22 @@ + asm volatile( + "movlps %1, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps 1024(%0, %%esi), %%xmm0 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common + "movaps %%xmm0, %%xmm1 \n\t" // common +- "addps (%0, %%esi), %%xmm0 \n\t" +- "addps 2048(%0, %%esi), %%xmm1 \n\t" +- "addps 3072(%0, %%esi), %%xmm0 \n\t" +- "addps 4096(%0, %%esi), %%xmm1 \n\t" +- "movaps %%xmm0, (%0, %%esi) \n\t" +- "movaps %%xmm1, 1024(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "addps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" ++ "movaps %%xmm0, (%0, %%"REG_S") \n\t" ++ "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -948,25 +948,25 @@ + asm volatile( + "movlps %1, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps 1024(%0, %%esi), %%xmm0 \n\t" +- "movaps 3072(%0, %%esi), %%xmm2 \n\t" ++ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" + "addps %%xmm7, %%xmm0 \n\t" // common +- "addps 4096(%0, %%esi), %%xmm2 \n\t" // surround +- "movaps (%0, %%esi), %%xmm1 \n\t" +- "movaps 2048(%0, %%esi), %%xmm3 \n\t" ++ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" ++ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" + "subps %%xmm2, %%xmm1 \n\t" + "addps %%xmm2, %%xmm3 \n\t" + "addps %%xmm0, %%xmm1 \n\t" + "addps %%xmm0, %%xmm3 \n\t" +- "movaps %%xmm1, (%0, %%esi) \n\t" +- "movaps %%xmm3, 1024(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm1, (%0, %%"REG_S") \n\t" ++ "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -975,40 +975,40 @@ + asm volatile( + "movlps %2, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movaps (%0, %%esi), %%xmm0 \n\t" +- "movaps 16(%0, %%esi), %%xmm1 \n\t" +- "addps 1024(%0, %%esi), %%xmm0 \n\t" +- "addps 1040(%0, %%esi), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" ++ "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" ++ "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" + "addps %%xmm7, %%xmm0 \n\t" + "addps %%xmm7, %%xmm1 \n\t" +- "movaps %%xmm0, (%1, %%esi) \n\t" +- "movaps %%xmm1, 16(%1, %%esi) \n\t" +- "addl $32, %%esi \n\t" ++ "movaps %%xmm0, (%1, %%"REG_S") \n\t" ++ "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" ++ "add $32, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (src+256), "r" (dest+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + + static void zero_MMX(sample_t * samples) + { + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "pxor %%mm0, %%mm0 \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq %%mm0, (%0, %%esi) \n\t" +- "movq %%mm0, 8(%0, %%esi) \n\t" +- "movq %%mm0, 16(%0, %%esi) \n\t" +- "movq %%mm0, 24(%0, %%esi) \n\t" +- "addl $32, %%esi \n\t" ++ "movq %%mm0, (%0, %%"REG_S") \n\t" ++ "movq %%mm0, 8(%0, %%"REG_S") \n\t" ++ "movq %%mm0, 16(%0, %%"REG_S") \n\t" ++ "movq %%mm0, 24(%0, %%"REG_S") \n\t" ++ "add $32, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms" + :: "r" (samples+256) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1257,29 +1257,29 @@ + asm volatile( + "movd %2, %%mm7 \n\t" + "punpckldq %2, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq (%0, %%esi), %%mm0 \n\t" +- "movq 8(%0, %%esi), %%mm1 \n\t" +- "movq 16(%0, %%esi), %%mm2 \n\t" +- "movq 24(%0, %%esi), %%mm3 \n\t" +- "pfadd (%1, %%esi), %%mm0 \n\t" +- "pfadd 8(%1, %%esi), %%mm1 \n\t" +- "pfadd 16(%1, %%esi), %%mm2 \n\t" +- "pfadd 24(%1, %%esi), %%mm3 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm1 \n\t" ++ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 24(%0, %%"REG_S"), %%mm3 \n\t" ++ "pfadd (%1, %%"REG_S"), %%mm0 \n\t" ++ "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" ++ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" ++ "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" + "pfadd %%mm7, %%mm2 \n\t" + "pfadd %%mm7, %%mm3 \n\t" +- "movq %%mm0, (%1, %%esi) \n\t" +- "movq %%mm1, 8(%1, %%esi) \n\t" +- "movq %%mm2, 16(%1, %%esi) \n\t" +- "movq %%mm3, 24(%1, %%esi) \n\t" +- "addl $32, %%esi \n\t" ++ "movq %%mm0, (%1, %%"REG_S") \n\t" ++ "movq %%mm1, 8(%1, %%"REG_S") \n\t" ++ "movq %%mm2, 16(%1, %%"REG_S") \n\t" ++ "movq %%mm3, 24(%1, %%"REG_S") \n\t" ++ "add $32, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (src+256), "r" (dest+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1288,25 +1288,25 @@ + asm volatile( + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq (%0, %%esi), %%mm0 \n\t" +- "movq 8(%0, %%esi), %%mm1 \n\t" +- "movq 1024(%0, %%esi), %%mm2 \n\t" +- "movq 1032(%0, %%esi), %%mm3 \n\t" +- "pfadd 2048(%0, %%esi), %%mm0 \n\t" +- "pfadd 2056(%0, %%esi), %%mm1 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm1 \n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1032(%0, %%"REG_S"), %%mm3\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" + "pfadd %%mm2, %%mm0 \n\t" + "pfadd %%mm3, %%mm1 \n\t" +- "movq %%mm0, (%0, %%esi) \n\t" +- "movq %%mm1, 8(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm0, (%0, %%"REG_S") \n\t" ++ "movq %%mm1, 8(%0, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1315,27 +1315,27 @@ + asm volatile( + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq (%0, %%esi), %%mm0 \n\t" +- "movq 8(%0, %%esi), %%mm1 \n\t" +- "movq 1024(%0, %%esi), %%mm2 \n\t" +- "movq 1032(%0, %%esi), %%mm3 \n\t" +- "pfadd 2048(%0, %%esi), %%mm0 \n\t" +- "pfadd 2056(%0, %%esi), %%mm1 \n\t" +- "pfadd 3072(%0, %%esi), %%mm2 \n\t" +- "pfadd 3080(%0, %%esi), %%mm3 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm1 \n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1032(%0, %%"REG_S"), %%mm3\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" + "pfadd %%mm2, %%mm0 \n\t" + "pfadd %%mm3, %%mm1 \n\t" +- "movq %%mm0, (%0, %%esi) \n\t" +- "movq %%mm1, 8(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm0, (%0, %%"REG_S") \n\t" ++ "movq %%mm1, 8(%0, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1344,29 +1344,29 @@ + asm volatile( + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq (%0, %%esi), %%mm0 \n\t" +- "movq 8(%0, %%esi), %%mm1 \n\t" +- "movq 1024(%0, %%esi), %%mm2 \n\t" +- "movq 1032(%0, %%esi), %%mm3 \n\t" +- "pfadd 2048(%0, %%esi), %%mm0 \n\t" +- "pfadd 2056(%0, %%esi), %%mm1 \n\t" +- "pfadd 3072(%0, %%esi), %%mm2 \n\t" +- "pfadd 3080(%0, %%esi), %%mm3 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm1 \n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm2\n\t" ++ "movq 1032(%0, %%"REG_S"), %%mm3\n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" +- "pfadd 4096(%0, %%esi), %%mm2 \n\t" +- "pfadd 4104(%0, %%esi), %%mm3 \n\t" ++ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm2, %%mm0 \n\t" + "pfadd %%mm3, %%mm1 \n\t" +- "movq %%mm0, (%0, %%esi) \n\t" +- "movq %%mm1, 8(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm0, (%0, %%"REG_S") \n\t" ++ "movq %%mm1, 8(%0, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1375,29 +1375,29 @@ + asm volatile( + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq 1024(%0, %%esi), %%mm0 \n\t" +- "movq 1032(%0, %%esi), %%mm1 \n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" //common + "pfadd %%mm7, %%mm1 \n\t" //common +- "movq (%0, %%esi), %%mm2 \n\t" +- "movq 8(%0, %%esi), %%mm3 \n\t" +- "movq 2048(%0, %%esi), %%mm4 \n\t" +- "movq 2056(%0, %%esi), %%mm5 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" ++ "movq 2048(%0, %%"REG_S"), %%mm4\n\t" ++ "movq 2056(%0, %%"REG_S"), %%mm5\n\t" + "pfadd %%mm0, %%mm2 \n\t" + "pfadd %%mm1, %%mm3 \n\t" + "pfadd %%mm0, %%mm4 \n\t" + "pfadd %%mm1, %%mm5 \n\t" +- "movq %%mm2, (%0, %%esi) \n\t" +- "movq %%mm3, 8(%0, %%esi) \n\t" +- "movq %%mm4, 1024(%0, %%esi) \n\t" +- "movq %%mm5, 1032(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm2, (%0, %%"REG_S") \n\t" ++ "movq %%mm3, 8(%0, %%"REG_S") \n\t" ++ "movq %%mm4, 1024(%0, %%"REG_S")\n\t" ++ "movq %%mm5, 1032(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1406,29 +1406,29 @@ + asm volatile( + "movd %2, %%mm7 \n\t" + "punpckldq %2, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq 1024(%1, %%esi), %%mm0 \n\t" +- "movq 1032(%1, %%esi), %%mm1 \n\t" ++ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" ++ "movq 1032(%1, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" //common + "pfadd %%mm7, %%mm1 \n\t" //common +- "movq (%0, %%esi), %%mm2 \n\t" +- "movq 8(%0, %%esi), %%mm3 \n\t" +- "movq (%1, %%esi), %%mm4 \n\t" +- "movq 8(%1, %%esi), %%mm5 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" ++ "movq (%1, %%"REG_S"), %%mm4 \n\t" ++ "movq 8(%1, %%"REG_S"), %%mm5 \n\t" + "pfadd %%mm0, %%mm2 \n\t" + "pfadd %%mm1, %%mm3 \n\t" + "pfadd %%mm0, %%mm4 \n\t" + "pfadd %%mm1, %%mm5 \n\t" +- "movq %%mm2, (%0, %%esi) \n\t" +- "movq %%mm3, 8(%0, %%esi) \n\t" +- "movq %%mm4, (%1, %%esi) \n\t" +- "movq %%mm5, 8(%1, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm2, (%0, %%"REG_S") \n\t" ++ "movq %%mm3, 8(%0, %%"REG_S") \n\t" ++ "movq %%mm4, (%1, %%"REG_S") \n\t" ++ "movq %%mm5, 8(%1, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (left+256), "r" (right+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1437,15 +1437,15 @@ + asm volatile( + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq 2048(%0, %%esi), %%mm0 \n\t" // surround +- "movq 2056(%0, %%esi), %%mm1 \n\t" // surround +- "movq (%0, %%esi), %%mm2 \n\t" +- "movq 8(%0, %%esi), %%mm3 \n\t" +- "movq 1024(%0, %%esi), %%mm4 \n\t" +- "movq 1032(%0, %%esi), %%mm5 \n\t" ++ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround ++ "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm4\n\t" ++ "movq 1032(%0, %%"REG_S"), %%mm5\n\t" + "pfadd %%mm7, %%mm2 \n\t" + "pfadd %%mm7, %%mm3 \n\t" + "pfadd %%mm7, %%mm4 \n\t" +@@ -1454,14 +1454,14 @@ + "pfsub %%mm1, %%mm3 \n\t" + "pfadd %%mm0, %%mm4 \n\t" + "pfadd %%mm1, %%mm5 \n\t" +- "movq %%mm2, (%0, %%esi) \n\t" +- "movq %%mm3, 8(%0, %%esi) \n\t" +- "movq %%mm4, 1024(%0, %%esi) \n\t" +- "movq %%mm5, 1032(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm2, (%0, %%"REG_S") \n\t" ++ "movq %%mm3, 8(%0, %%"REG_S") \n\t" ++ "movq %%mm4, 1024(%0, %%"REG_S")\n\t" ++ "movq %%mm5, 1032(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1470,31 +1470,31 @@ + asm volatile( + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq 1024(%0, %%esi), %%mm0 \n\t" +- "movq 1032(%0, %%esi), %%mm1 \n\t" +- "pfadd 3072(%0, %%esi), %%mm0 \n\t" +- "pfadd 3080(%0, %%esi), %%mm1 \n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1032(%0, %%"REG_S"), %%mm1\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common +- "movq (%0, %%esi), %%mm2 \n\t" +- "movq 8(%0, %%esi), %%mm3 \n\t" +- "movq 2048(%0, %%esi), %%mm4 \n\t" +- "movq 2056(%0, %%esi), %%mm5 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" ++ "movq 2048(%0, %%"REG_S"), %%mm4\n\t" ++ "movq 2056(%0, %%"REG_S"), %%mm5\n\t" + "pfadd %%mm0, %%mm2 \n\t" + "pfadd %%mm1, %%mm3 \n\t" + "pfadd %%mm0, %%mm4 \n\t" + "pfadd %%mm1, %%mm5 \n\t" +- "movq %%mm2, (%0, %%esi) \n\t" +- "movq %%mm3, 8(%0, %%esi) \n\t" +- "movq %%mm4, 1024(%0, %%esi) \n\t" +- "movq %%mm5, 1032(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm2, (%0, %%"REG_S") \n\t" ++ "movq %%mm3, 8(%0, %%"REG_S") \n\t" ++ "movq %%mm4, 1024(%0, %%"REG_S")\n\t" ++ "movq %%mm5, 1032(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1503,35 +1503,35 @@ + asm volatile( + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq 1024(%0, %%esi), %%mm0 \n\t" +- "movq 1032(%0, %%esi), %%mm1 \n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common +- "movq (%0, %%esi), %%mm2 \n\t" +- "movq 8(%0, %%esi), %%mm3 \n\t" +- "movq 2048(%0, %%esi), %%mm4 \n\t" +- "movq 2056(%0, %%esi), %%mm5 \n\t" ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" ++ "movq 2048(%0, %%"REG_S"), %%mm4\n\t" ++ "movq 2056(%0, %%"REG_S"), %%mm5\n\t" + "pfadd %%mm0, %%mm2 \n\t" + "pfadd %%mm1, %%mm3 \n\t" + "pfadd %%mm0, %%mm4 \n\t" + "pfadd %%mm1, %%mm5 \n\t" +- "movq 3072(%0, %%esi), %%mm0 \n\t" // surround +- "movq 3080(%0, %%esi), %%mm1 \n\t" // surround ++ "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround ++ "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround + "pfsub %%mm0, %%mm2 \n\t" + "pfsub %%mm1, %%mm3 \n\t" + "pfadd %%mm0, %%mm4 \n\t" + "pfadd %%mm1, %%mm5 \n\t" +- "movq %%mm2, (%0, %%esi) \n\t" +- "movq %%mm3, 8(%0, %%esi) \n\t" +- "movq %%mm4, 1024(%0, %%esi) \n\t" +- "movq %%mm5, 1032(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm2, (%0, %%"REG_S") \n\t" ++ "movq %%mm3, 8(%0, %%"REG_S") \n\t" ++ "movq %%mm4, 1024(%0, %%"REG_S")\n\t" ++ "movq %%mm5, 1032(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1540,17 +1540,17 @@ + asm volatile( + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq 2048(%0, %%esi), %%mm0 \n\t" +- "movq 2056(%0, %%esi), %%mm1 \n\t" +- "pfadd 3072(%0, %%esi), %%mm0 \n\t" // surround +- "pfadd 3080(%0, %%esi), %%mm1 \n\t" // surround +- "movq (%0, %%esi), %%mm2 \n\t" +- "movq 8(%0, %%esi), %%mm3 \n\t" +- "movq 1024(%0, %%esi), %%mm4 \n\t" +- "movq 1032(%0, %%esi), %%mm5 \n\t" ++ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 2056(%0, %%"REG_S"), %%mm1\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround ++ "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm4\n\t" ++ "movq 1032(%0, %%"REG_S"), %%mm5\n\t" + "pfadd %%mm7, %%mm2 \n\t" + "pfadd %%mm7, %%mm3 \n\t" + "pfadd %%mm7, %%mm4 \n\t" +@@ -1559,14 +1559,14 @@ + "pfsub %%mm1, %%mm3 \n\t" + "pfadd %%mm0, %%mm4 \n\t" + "pfadd %%mm1, %%mm5 \n\t" +- "movq %%mm2, (%0, %%esi) \n\t" +- "movq %%mm3, 8(%0, %%esi) \n\t" +- "movq %%mm4, 1024(%0, %%esi) \n\t" +- "movq %%mm5, 1032(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm2, (%0, %%"REG_S") \n\t" ++ "movq %%mm3, 8(%0, %%"REG_S") \n\t" ++ "movq %%mm4, 1024(%0, %%"REG_S")\n\t" ++ "movq %%mm5, 1032(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1575,31 +1575,31 @@ + asm volatile( + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq 1024(%0, %%esi), %%mm0 \n\t" +- "movq 1032(%0, %%esi), %%mm1 \n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1032(%0, %%"REG_S"), %%mm1\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common + "movq %%mm0, %%mm2 \n\t" // common + "movq %%mm1, %%mm3 \n\t" // common +- "pfadd (%0, %%esi), %%mm0 \n\t" +- "pfadd 8(%0, %%esi), %%mm1 \n\t" +- "pfadd 2048(%0, %%esi), %%mm2 \n\t" +- "pfadd 2056(%0, %%esi), %%mm3 \n\t" +- "pfadd 3072(%0, %%esi), %%mm0 \n\t" +- "pfadd 3080(%0, %%esi), %%mm1 \n\t" +- "pfadd 4096(%0, %%esi), %%mm2 \n\t" +- "pfadd 4104(%0, %%esi), %%mm3 \n\t" +- "movq %%mm0, (%0, %%esi) \n\t" +- "movq %%mm1, 8(%0, %%esi) \n\t" +- "movq %%mm2, 1024(%0, %%esi) \n\t" +- "movq %%mm3, 1032(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "pfadd (%0, %%"REG_S"), %%mm0 \n\t" ++ "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" ++ "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" ++ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" ++ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" ++ "movq %%mm0, (%0, %%"REG_S") \n\t" ++ "movq %%mm1, 8(%0, %%"REG_S") \n\t" ++ "movq %%mm2, 1024(%0, %%"REG_S")\n\t" ++ "movq %%mm3, 1032(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1607,23 +1607,23 @@ + static void mix32toS_3dnow (sample_t * samples, sample_t bias) + { + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" + "movd %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t" +- "movq 1024(%0, %%esi), %%mm0 \n\t" +- "movq 1032(%0, %%esi), %%mm1 \n\t" +- "movq 3072(%0, %%esi), %%mm4 \n\t" +- "movq 3080(%0, %%esi), %%mm5 \n\t" ++ "movq 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "movq 1032(%0, %%"REG_S"), %%mm1\n\t" ++ "movq 3072(%0, %%"REG_S"), %%mm4\n\t" ++ "movq 3080(%0, %%"REG_S"), %%mm5\n\t" + "pfadd %%mm7, %%mm0 \n\t" // common + "pfadd %%mm7, %%mm1 \n\t" // common +- "pfadd 4096(%0, %%esi), %%mm4 \n\t" // surround +- "pfadd 4104(%0, %%esi), %%mm5 \n\t" // surround +- "movq (%0, %%esi), %%mm2 \n\t" +- "movq 8(%0, %%esi), %%mm3 \n\t" +- "movq 2048(%0, %%esi), %%mm6 \n\t" +- "movq 2056(%0, %%esi), %%mm7 \n\t" ++ "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround ++ "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround ++ "movq (%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm3 \n\t" ++ "movq 2048(%0, %%"REG_S"), %%mm6\n\t" ++ "movq 2056(%0, %%"REG_S"), %%mm7\n\t" + "pfsub %%mm4, %%mm2 \n\t" + "pfsub %%mm5, %%mm3 \n\t" + "pfadd %%mm4, %%mm6 \n\t" +@@ -1632,14 +1632,14 @@ + "pfadd %%mm1, %%mm3 \n\t" + "pfadd %%mm0, %%mm6 \n\t" + "pfadd %%mm1, %%mm7 \n\t" +- "movq %%mm2, (%0, %%esi) \n\t" +- "movq %%mm3, 8(%0, %%esi) \n\t" +- "movq %%mm6, 1024(%0, %%esi) \n\t" +- "movq %%mm7, 1032(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm2, (%0, %%"REG_S") \n\t" ++ "movq %%mm3, 8(%0, %%"REG_S") \n\t" ++ "movq %%mm6, 1024(%0, %%"REG_S")\n\t" ++ "movq %%mm7, 1032(%0, %%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (samples+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1648,29 +1648,29 @@ + asm volatile( + "movd %2, %%mm7 \n\t" + "punpckldq %2, %%mm7 \n\t" +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16\n\t" + "1: \n\t" +- "movq (%0, %%esi), %%mm0 \n\t" +- "movq 8(%0, %%esi), %%mm1 \n\t" +- "movq 16(%0, %%esi), %%mm2 \n\t" +- "movq 24(%0, %%esi), %%mm3 \n\t" +- "pfadd 1024(%0, %%esi), %%mm0 \n\t" +- "pfadd 1032(%0, %%esi), %%mm1 \n\t" +- "pfadd 1040(%0, %%esi), %%mm2 \n\t" +- "pfadd 1048(%0, %%esi), %%mm3 \n\t" ++ "movq (%0, %%"REG_S"), %%mm0 \n\t" ++ "movq 8(%0, %%"REG_S"), %%mm1 \n\t" ++ "movq 16(%0, %%"REG_S"), %%mm2 \n\t" ++ "movq 24(%0, %%"REG_S"), %%mm3 \n\t" ++ "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" ++ "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" ++ "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t" ++ "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t" + "pfadd %%mm7, %%mm0 \n\t" + "pfadd %%mm7, %%mm1 \n\t" + "pfadd %%mm7, %%mm2 \n\t" + "pfadd %%mm7, %%mm3 \n\t" +- "movq %%mm0, (%1, %%esi) \n\t" +- "movq %%mm1, 8(%1, %%esi) \n\t" +- "movq %%mm2, 16(%1, %%esi) \n\t" +- "movq %%mm3, 24(%1, %%esi) \n\t" +- "addl $32, %%esi \n\t" ++ "movq %%mm0, (%1, %%"REG_S") \n\t" ++ "movq %%mm1, 8(%1, %%"REG_S") \n\t" ++ "movq %%mm2, 16(%1, %%"REG_S") \n\t" ++ "movq %%mm3, 24(%1, %%"REG_S") \n\t" ++ "add $32, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (src+256), "r" (dest+256), "m" (bias) +- : "%esi" ++ : "%"REG_S + ); + } + +@@ -1816,4 +1816,4 @@ + __asm __volatile("femms":::"memory"); + } + +-#endif //ARCH_X86 ++#endif // ARCH_X86 || ARCH_X86_64 +Index: liba52/imdct.c +=================================================================== +RCS file: /cvsroot/mplayer/main/liba52/imdct.c,v +retrieving revision 1.27 +diff -u -r1.27 imdct.c +--- liba52/imdct.c 2 Jun 2005 20:54:02 -0000 1.27 ++++ liba52/imdct.c 31 Jul 2005 21:20:09 -0000 +@@ -101,7 +101,7 @@ + 0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b, + 0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f}; + +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + // NOTE: SSE needs 16byte alignment or it will segfault + // + static complex_t __attribute__((aligned(16))) buf[128]; +@@ -442,8 +442,8 @@ + int k; + int p,q; + int m; +- int two_m; +- int two_m_plus_one; ++ long two_m; ++ long two_m_plus_one; + + sample_t tmp_b_i; + sample_t tmp_b_r; +@@ -747,7 +747,7 @@ + + // Stuff below this line is borrowed from libac3 + #include "srfftp.h" +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + #ifndef HAVE_3DNOW + #define HAVE_3DNOW 1 + #endif +@@ -768,9 +768,9 @@ + /* int i,k; + int p,q;*/ + int m; +- int two_m; +- int two_m_plus_one; +- int two_m_plus_one_shl3; ++ long two_m; ++ long two_m_plus_one; ++ long two_m_plus_one_shl3; + complex_t *buf_offset; + + /* sample_t tmp_a_i; +@@ -788,33 +788,33 @@ + /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ + /* Bit reversed shuffling */ + asm volatile( +- "xorl %%esi, %%esi \n\t" +- "leal "MANGLE(bit_reverse_512)", %%eax \n\t" +- "movl $1008, %%edi \n\t" +- "pushl %%ebp \n\t" //use ebp without telling gcc ++ "xor %%"REG_S", %%"REG_S" \n\t" ++ "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" ++ "mov $1008, %%"REG_D" \n\t" ++ "push %%"REG_BP" \n\t" //use ebp without telling gcc + ".balign 16 \n\t" + "1: \n\t" +- "movlps (%0, %%esi), %%xmm0 \n\t" // XXXI +- "movhps 8(%0, %%edi), %%xmm0 \n\t" // RXXI +- "movlps 8(%0, %%esi), %%xmm1 \n\t" // XXXi +- "movhps (%0, %%edi), %%xmm1 \n\t" // rXXi ++ "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI ++ "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI ++ "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi ++ "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi + "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR +- "movaps "MANGLE(sseSinCos1c)"(%%esi), %%xmm2\n\t" ++ "movaps "MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm2\n\t" + "mulps %%xmm0, %%xmm2 \n\t" + "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI +- "mulps "MANGLE(sseSinCos1d)"(%%esi), %%xmm0\n\t" ++ "mulps "MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" + "subps %%xmm0, %%xmm2 \n\t" +- "movzbl (%%eax), %%edx \n\t" +- "movzbl 1(%%eax), %%ebp \n\t" +- "movlps %%xmm2, (%1, %%edx,8) \n\t" +- "movhps %%xmm2, (%1, %%ebp,8) \n\t" +- "addl $16, %%esi \n\t" +- "addl $2, %%eax \n\t" // avoid complex addressing for P4 crap +- "subl $16, %%edi \n\t" +- " jnc 1b \n\t" +- "popl %%ebp \n\t"//no we didnt touch ebp *g* +- :: "b" (data), "c" (buf) +- : "%esi", "%edi", "%eax", "%edx" ++ "movzb (%%"REG_a"), %%"REG_d" \n\t" ++ "movzb 1(%%"REG_a"), %%"REG_BP" \n\t" ++ "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t" ++ "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t" ++ "add $16, %%"REG_S" \n\t" ++ "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap ++ "sub $16, %%"REG_D" \n\t" ++ "jnc 1b \n\t" ++ "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g* ++ :: "r" (data), "r" (buf) ++ : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d + ); + + +@@ -850,44 +850,44 @@ + asm volatile( + "xorps %%xmm1, %%xmm1 \n\t" + "xorps %%xmm2, %%xmm2 \n\t" +- "movl %0, %%esi \n\t" ++ "mov %0, %%"REG_S" \n\t" + ".balign 16 \n\t" + "1: \n\t" +- "movlps (%%esi), %%xmm0 \n\t" //buf[p] +- "movlps 8(%%esi), %%xmm1\n\t" //buf[q] +- "movhps (%%esi), %%xmm0 \n\t" //buf[p] +- "movhps 8(%%esi), %%xmm2\n\t" //buf[q] ++ "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p] ++ "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q] ++ "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p] ++ "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q] + "addps %%xmm1, %%xmm0 \n\t" + "subps %%xmm2, %%xmm0 \n\t" +- "movaps %%xmm0, (%%esi) \n\t" +- "addl $16, %%esi \n\t" +- "cmpl %1, %%esi \n\t" ++ "movaps %%xmm0, (%%"REG_S")\n\t" ++ "add $16, %%"REG_S" \n\t" ++ "cmp %1, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "g" (buf), "r" (buf + 128) +- : "%esi" ++ : "%"REG_S + ); + + /* 2. iteration */ + // Note w[1]={{1,0}, {0,-1}} + asm volatile( + "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 +- "movl %0, %%esi \n\t" ++ "mov %0, %%"REG_S" \n\t" + ".balign 16 \n\t" + "1: \n\t" +- "movaps 16(%%esi), %%xmm2 \n\t" //r2,i2,r3,i3 ++ "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3 + "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 + "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3 +- "movaps (%%esi), %%xmm0 \n\t" //r0,i0,r1,i1 +- "movaps (%%esi), %%xmm1 \n\t" //r0,i0,r1,i1 ++ "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 ++ "movaps (%%"REG_S"), %%xmm1 \n\t" //r0,i0,r1,i1 + "addps %%xmm2, %%xmm0 \n\t" + "subps %%xmm2, %%xmm1 \n\t" +- "movaps %%xmm0, (%%esi) \n\t" +- "movaps %%xmm1, 16(%%esi) \n\t" +- "addl $32, %%esi \n\t" +- "cmpl %1, %%esi \n\t" ++ "movaps %%xmm0, (%%"REG_S") \n\t" ++ "movaps %%xmm1, 16(%%"REG_S") \n\t" ++ "add $32, %%"REG_S" \n\t" ++ "cmp %1, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "g" (buf), "r" (buf + 128) +- : "%esi" ++ : "%"REG_S + ); + + /* 3. iteration */ +@@ -902,11 +902,11 @@ + "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" + "xorps %%xmm5, %%xmm5 \n\t" + "xorps %%xmm2, %%xmm2 \n\t" +- "movl %0, %%esi \n\t" ++ "mov %0, %%"REG_S" \n\t" + ".balign 16 \n\t" + "1: \n\t" +- "movaps 32(%%esi), %%xmm2 \n\t" //r4,i4,r5,i5 +- "movaps 48(%%esi), %%xmm3 \n\t" //r6,i6,r7,i7 ++ "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5 ++ "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7 + "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5 + "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7 + "mulps %%xmm2, %%xmm4 \n\t" +@@ -915,8 +915,8 @@ + "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7 + "mulps %%xmm6, %%xmm3 \n\t" + "mulps %%xmm7, %%xmm2 \n\t" +- "movaps (%%esi), %%xmm0 \n\t" //r0,i0,r1,i1 +- "movaps 16(%%esi), %%xmm1 \n\t" //r2,i2,r3,i3 ++ "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 ++ "movaps 16(%%"REG_S"), %%xmm1 \n\t" //r2,i2,r3,i3 + "addps %%xmm4, %%xmm2 \n\t" + "addps %%xmm5, %%xmm3 \n\t" + "movaps %%xmm2, %%xmm4 \n\t" +@@ -925,15 +925,15 @@ + "addps %%xmm1, %%xmm3 \n\t" + "subps %%xmm4, %%xmm0 \n\t" + "subps %%xmm5, %%xmm1 \n\t" +- "movaps %%xmm2, (%%esi) \n\t" +- "movaps %%xmm3, 16(%%esi) \n\t" +- "movaps %%xmm0, 32(%%esi) \n\t" +- "movaps %%xmm1, 48(%%esi) \n\t" +- "addl $64, %%esi \n\t" +- "cmpl %1, %%esi \n\t" ++ "movaps %%xmm2, (%%"REG_S") \n\t" ++ "movaps %%xmm3, 16(%%"REG_S") \n\t" ++ "movaps %%xmm0, 32(%%"REG_S") \n\t" ++ "movaps %%xmm1, 48(%%"REG_S") \n\t" ++ "add $64, %%"REG_S" \n\t" ++ "cmp %1, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "g" (buf), "r" (buf + 128) +- : "%esi" ++ : "%"REG_S + ); + + /* 4-7. iterations */ +@@ -943,52 +943,52 @@ + two_m_plus_one_shl3 = (two_m_plus_one<<3); + buf_offset = buf+128; + asm volatile( +- "movl %0, %%esi \n\t" ++ "mov %0, %%"REG_S" \n\t" + ".balign 16 \n\t" + "1: \n\t" +- "xorl %%edi, %%edi \n\t" // k +- "leal (%%esi, %3), %%edx \n\t" ++ "xor %%"REG_D", %%"REG_D" \n\t" // k ++ "lea (%%"REG_S", %3), %%"REG_d" \n\t" + "2: \n\t" +- "movaps (%%edx, %%edi), %%xmm1 \n\t" +- "movaps (%4, %%edi, 2), %%xmm2 \n\t" ++ "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t" ++ "movaps (%4, %%"REG_D", 2), %%xmm2 \n\t" + "mulps %%xmm1, %%xmm2 \n\t" + "shufps $0xB1, %%xmm1, %%xmm1 \n\t" +- "mulps 16(%4, %%edi, 2), %%xmm1 \n\t" +- "movaps (%%esi, %%edi), %%xmm0 \n\t" ++ "mulps 16(%4, %%"REG_D", 2), %%xmm1 \n\t" ++ "movaps (%%"REG_S", %%"REG_D"), %%xmm0 \n\t" + "addps %%xmm2, %%xmm1 \n\t" + "movaps %%xmm1, %%xmm2 \n\t" + "addps %%xmm0, %%xmm1 \n\t" + "subps %%xmm2, %%xmm0 \n\t" +- "movaps %%xmm1, (%%esi, %%edi) \n\t" +- "movaps %%xmm0, (%%edx, %%edi) \n\t" +- "addl $16, %%edi \n\t" +- "cmpl %3, %%edi \n\t" //FIXME (opt) count against 0 +- " jb 2b \n\t" +- "addl %2, %%esi \n\t" +- "cmpl %1, %%esi \n\t" ++ "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t" ++ "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t" ++ "add $16, %%"REG_D" \n\t" ++ "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0 ++ "jb 2b \n\t" ++ "add %2, %%"REG_S" \n\t" ++ "cmp %1, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3), + "r" (sseW[m]) +- : "%esi", "%edi", "%edx" ++ : "%"REG_S, "%"REG_D, "%"REG_d + ); + } + + /* Post IFFT complex multiply plus IFFT complex conjugate*/ + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + ".balign 16 \n\t" + "1: \n\t" +- "movaps (%0, %%esi), %%xmm0 \n\t" +- "movaps (%0, %%esi), %%xmm1 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm0 \n\t" ++ "movaps (%0, %%"REG_S"), %%xmm1 \n\t" + "shufps $0xB1, %%xmm0, %%xmm0 \n\t" +- "mulps 1024+"MANGLE(sseSinCos1c)"(%%esi), %%xmm1\n\t" +- "mulps 1024+"MANGLE(sseSinCos1d)"(%%esi), %%xmm0\n\t" ++ "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t" ++ "mulps 1024+"MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t" + "addps %%xmm1, %%xmm0 \n\t" +- "movaps %%xmm0, (%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movaps %%xmm0, (%0, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + :: "r" (buf+128) +- : "%esi" ++ : "%"REG_S + ); + + +@@ -998,54 +998,54 @@ + + /* Window and convert to real valued signal */ + asm volatile( +- "xorl %%edi, %%edi \n\t" // 0 +- "xorl %%esi, %%esi \n\t" // 0 ++ "xor %%"REG_D", %%"REG_D" \n\t" // 0 ++ "xor %%"REG_S", %%"REG_S" \n\t" // 0 + "movss %3, %%xmm2 \n\t" // bias + "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... + ".balign 16 \n\t" + "1: \n\t" +- "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ? +- "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ? +- "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ? +- "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ? ++ "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? ++ "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? ++ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? ++ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? + "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A +- "mulps "MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" +- "addps (%2, %%esi), %%xmm0 \n\t" ++ "mulps "MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" ++ "addps (%2, %%"REG_S"), %%xmm0 \n\t" + "addps %%xmm2, %%xmm0 \n\t" +- "movaps %%xmm0, (%1, %%esi) \n\t" +- "addl $16, %%esi \n\t" +- "subl $16, %%edi \n\t" +- "cmpl $512, %%esi \n\t" ++ "movaps %%xmm0, (%1, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" ++ "sub $16, %%"REG_D" \n\t" ++ "cmp $512, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias) +- : "%esi", "%edi" ++ : "%"REG_S, "%"REG_D + ); + data_ptr+=128; + delay_ptr+=128; + // window_ptr+=128; + + asm volatile( +- "movl $1024, %%edi \n\t" // 512 +- "xorl %%esi, %%esi \n\t" // 0 ++ "mov $1024, %%"REG_D" \n\t" // 512 ++ "xor %%"REG_S", %%"REG_S" \n\t" // 0 + "movss %3, %%xmm2 \n\t" // bias + "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... + ".balign 16 \n\t" + "1: \n\t" +- "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A +- "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C +- "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C +- "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A ++ "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A ++ "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C ++ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C ++ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A + "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A +- "mulps 512+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" +- "addps (%2, %%esi), %%xmm0 \n\t" ++ "mulps 512+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" ++ "addps (%2, %%"REG_S"), %%xmm0 \n\t" + "addps %%xmm2, %%xmm0 \n\t" +- "movaps %%xmm0, (%1, %%esi) \n\t" +- "addl $16, %%esi \n\t" +- "subl $16, %%edi \n\t" +- "cmpl $512, %%esi \n\t" ++ "movaps %%xmm0, (%1, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" ++ "sub $16, %%"REG_D" \n\t" ++ "cmp $512, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias) +- : "%esi", "%edi" ++ : "%"REG_S, "%"REG_D + ); + data_ptr+=128; + // window_ptr+=128; +@@ -1054,48 +1054,48 @@ + delay_ptr = delay; + + asm volatile( +- "xorl %%edi, %%edi \n\t" // 0 +- "xorl %%esi, %%esi \n\t" // 0 ++ "xor %%"REG_D", %%"REG_D" \n\t" // 0 ++ "xor %%"REG_S", %%"REG_S" \n\t" // 0 + ".balign 16 \n\t" + "1: \n\t" +- "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A +- "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C +- "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C +- "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A ++ "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A ++ "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C ++ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C ++ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A + "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A +- "mulps 1024+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" +- "movaps %%xmm0, (%1, %%esi) \n\t" +- "addl $16, %%esi \n\t" +- "subl $16, %%edi \n\t" +- "cmpl $512, %%esi \n\t" ++ "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" ++ "movaps %%xmm0, (%1, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" ++ "sub $16, %%"REG_D" \n\t" ++ "cmp $512, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "r" (buf+64), "r" (delay_ptr) +- : "%esi", "%edi" ++ : "%"REG_S, "%"REG_D + ); + delay_ptr+=128; + // window_ptr-=128; + + asm volatile( +- "movl $1024, %%edi \n\t" // 1024 +- "xorl %%esi, %%esi \n\t" // 0 ++ "mov $1024, %%"REG_D" \n\t" // 1024 ++ "xor %%"REG_S", %%"REG_S" \n\t" // 0 + ".balign 16 \n\t" + "1: \n\t" +- "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ? +- "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ? +- "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ? +- "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ? ++ "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? ++ "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? ++ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? ++ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? + "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A +- "mulps 1536+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" +- "movaps %%xmm0, (%1, %%esi) \n\t" +- "addl $16, %%esi \n\t" +- "subl $16, %%edi \n\t" +- "cmpl $512, %%esi \n\t" ++ "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t" ++ "movaps %%xmm0, (%1, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" ++ "sub $16, %%"REG_D" \n\t" ++ "cmp $512, %%"REG_S" \n\t" + " jb 1b \n\t" + :: "r" (buf), "r" (delay_ptr) +- : "%esi", "%edi" ++ : "%"REG_S, "%"REG_D + ); + } +-#endif //arch_x86 ++#endif // ARCH_X86 || ARCH_X86_64 + + void + imdct_do_256(sample_t data[],sample_t delay[],sample_t bias) +@@ -1242,7 +1242,7 @@ + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); + xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); + } +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + for (i = 0; i < 128; i++) { + sseSinCos1c[2*i+0]= xcos1[i]; + sseSinCos1c[2*i+1]= -xcos1[i]; +@@ -1264,7 +1264,7 @@ + w[i][k].imag = sin (-M_PI * k / j); + } + } +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + for (i = 1; i < 7; i++) { + j = 1 << i; + for (k = 0; k < j; k+=2) { +@@ -1307,10 +1307,10 @@ + sseWindow[384 + 2*i+0]= imdct_window[126 - 2*i+1]; + sseWindow[384 + 2*i+1]= -imdct_window[126 - 2*i+0]; + } +-#endif // arch_x86 ++#endif // ARCH_X86 || ARCH_X86_64 + + imdct_512 = imdct_do_512; +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + if(mm_accel & MM_ACCEL_X86_SSE) + { + fprintf (stderr, "Using SSE optimized IMDCT transform\n"); +@@ -1329,7 +1329,7 @@ + imdct_512 = imdct_do_512_3dnow; + } + else +-#endif // arch_x86 ++#endif // ARCH_X86 || ARCH_X86_64 + #ifdef HAVE_ALTIVEC + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) + { +Index: liba52/resample.c +=================================================================== +RCS file: /cvsroot/mplayer/main/liba52/resample.c,v +retrieving revision 1.16 +diff -u -r1.16 resample.c +--- liba52/resample.c 25 Jan 2004 18:29:11 -0000 1.16 ++++ liba52/resample.c 31 Jul 2005 21:20:10 -0000 +@@ -15,7 +15,7 @@ + + #include "resample_c.c" + +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + #include "resample_mmx.c" + #endif + +@@ -26,7 +26,7 @@ + void* a52_resample_init(uint32_t mm_accel,int flags,int chans){ + void* tmp; + +-#ifdef ARCH_X86 ++#if defined(ARCH_X86) || defined(ARCH_X86_64) + if(mm_accel&MM_ACCEL_X86_MMX){ + tmp=a52_resample_MMX(flags,chans); + if(tmp){ +Index: liba52/resample_mmx.c +=================================================================== +RCS file: /cvsroot/mplayer/main/liba52/resample_mmx.c,v +retrieving revision 1.17 +diff -u -r1.17 resample_mmx.c +--- liba52/resample_mmx.c 26 Apr 2004 19:47:50 -0000 1.17 ++++ liba52/resample_mmx.c 31 Jul 2005 21:20:10 -0000 +@@ -7,6 +7,9 @@ + and it would mean (C / MMX2 / MMX / 3DNOW) versions + */ + ++#include "a52_internal.h" ++ ++ + static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; + static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; + static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; +@@ -15,36 +18,36 @@ + static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( +- "movl $-512, %%esi \n\t" ++ "mov $-512, %%"REG_S" \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "movq "MANGLE(wm1100)", %%mm3 \n\t" + "movq "MANGLE(wm0101)", %%mm4 \n\t" + "movq "MANGLE(wm1010)", %%mm5 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" +- "movq (%1, %%esi, 2), %%mm0 \n\t" +- "movq 8(%1, %%esi, 2), %%mm1 \n\t" +- "leal (%%esi, %%esi, 4), %%edi \n\t" ++ "movq (%1, %%"REG_S", 2), %%mm0 \n\t" ++ "movq 8(%1, %%"REG_S", 2), %%mm1\n\t" ++ "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "movq %%mm0, %%mm1 \n\t" + "pand %%mm4, %%mm0 \n\t" + "pand %%mm5, %%mm1 \n\t" +- "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 +- "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 ++ "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0 ++ "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0 + "pand %%mm3, %%mm0 \n\t" +- "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 +- "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B ++ "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0 ++ "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B + "pand %%mm3, %%mm1 \n\t" +- "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 +- "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 +- "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B +- "addl $8, %%esi \n\t" ++ "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0 ++ "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0 ++ "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B ++ "add $8, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1280), "r" (f+256) +- :"%esi", "%edi", "memory" ++ :"%"REG_S, "%"REG_D, "memory" + ); + return 5*256; + } +@@ -54,29 +57,29 @@ + /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it + #ifdef HAVE_SSE + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "1: \n\t" +- "cvtps2pi (%1, %%esi), %%mm0 \n\t" +- "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" ++ "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t" ++ "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t" + "movq %%mm0, %%mm1 \n\t" + "punpcklwd %%mm2, %%mm0 \n\t" + "punpckhwd %%mm2, %%mm1 \n\t" +- "movq %%mm0, (%0, %%esi) \n\t" +- "movq %%mm1, 8(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm0, (%0, %%"REG_S") \n\t" ++ "movq %%mm1, 8(%0, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+512), "r" (f+256) +- :"%esi", "memory" ++ :"%"REG_S, "memory" + );*/ + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "1: \n\t" +- "movq (%1, %%esi), %%mm0 \n\t" +- "movq 8(%1, %%esi), %%mm1 \n\t" +- "movq 1024(%1, %%esi), %%mm2 \n\t" +- "movq 1032(%1, %%esi), %%mm3 \n\t" ++ "movq (%1, %%"REG_S"), %%mm0 \n\t" ++ "movq 8(%1, %%"REG_S"), %%mm1 \n\t" ++ "movq 1024(%1, %%"REG_S"), %%mm2\n\t" ++ "movq 1032(%1, %%"REG_S"), %%mm3\n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" +@@ -86,13 +89,13 @@ + "movq %%mm0, %%mm1 \n\t" + "punpcklwd %%mm2, %%mm0 \n\t" + "punpckhwd %%mm2, %%mm1 \n\t" +- "movq %%mm0, (%0, %%esi) \n\t" +- "movq %%mm1, 8(%0, %%esi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm0, (%0, %%"REG_S") \n\t" ++ "movq %%mm1, 8(%0, %%"REG_S") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+512), "r" (f+256) +- :"%esi", "memory" ++ :"%"REG_S, "memory" + ); + return 2*256; + } +@@ -100,23 +103,23 @@ + static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "movq %%mm7, %%mm5 \n\t" + "punpckldq %%mm6, %%mm5 \n\t" + "1: \n\t" +- "movd (%1, %%esi), %%mm0 \n\t" +- "punpckldq 2048(%1, %%esi), %%mm0\n\t" +- "movd 1024(%1, %%esi), %%mm1 \n\t" +- "punpckldq 4(%1, %%esi), %%mm1 \n\t" +- "movd 2052(%1, %%esi), %%mm2 \n\t" ++ "movd (%1, %%"REG_S"), %%mm0 \n\t" ++ "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" ++ "movd 1024(%1, %%"REG_S"), %%mm1\n\t" ++ "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t" ++ "movd 2052(%1, %%"REG_S"), %%mm2\n\t" + "movq %%mm7, %%mm3 \n\t" +- "punpckldq 1028(%1, %%esi), %%mm3\n\t" +- "movd 8(%1, %%esi), %%mm4 \n\t" +- "punpckldq 2056(%1, %%esi), %%mm4\n\t" +- "leal (%%esi, %%esi, 4), %%edi \n\t" +- "sarl $1, %%edi \n\t" ++ "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t" ++ "movd 8(%1, %%"REG_S"), %%mm4 \n\t" ++ "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t" ++ "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" ++ "sar $1, %%"REG_D" \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm5, %%mm2 \n\t" +@@ -125,29 +128,28 @@ + "packssdw %%mm6, %%mm0 \n\t" + "packssdw %%mm2, %%mm1 \n\t" + "packssdw %%mm4, %%mm3 \n\t" +- "movq %%mm0, (%0, %%edi) \n\t" +- "movq %%mm1, 8(%0, %%edi) \n\t" +- "movq %%mm3, 16(%0, %%edi) \n\t" +- +- "movd 1032(%1, %%esi), %%mm1 \n\t" +- "punpckldq 12(%1, %%esi), %%mm1\n\t" +- "movd 2060(%1, %%esi), %%mm2 \n\t" ++ "movq %%mm0, (%0, %%"REG_D") \n\t" ++ "movq %%mm1, 8(%0, %%"REG_D") \n\t" ++ "movq %%mm3, 16(%0, %%"REG_D") \n\t" ++ "movd 1032(%1, %%"REG_S"), %%mm1\n\t" ++ "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" ++ "movd 2060(%1, %%"REG_S"), %%mm2\n\t" + "movq %%mm7, %%mm3 \n\t" +- "punpckldq 1036(%1, %%esi), %%mm3\n\t" ++ "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" + "pxor %%mm0, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm5, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" +- "movq %%mm0, 24(%0, %%edi) \n\t" +- "movq %%mm2, 32(%0, %%edi) \n\t" ++ "movq %%mm0, 24(%0, %%"REG_D") \n\t" ++ "movq %%mm2, 32(%0, %%"REG_D") \n\t" + +- "addl $16, %%esi \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1280), "r" (f+256) +- :"%esi", "%edi", "memory" ++ :"%"REG_S, "%"REG_D, "memory" + ); + return 5*256; + } +@@ -155,23 +157,23 @@ + static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "1: \n\t" +- "movq (%1, %%esi), %%mm0 \n\t" +- "movq 8(%1, %%esi), %%mm1 \n\t" +- "movq 1024(%1, %%esi), %%mm2 \n\t" +- "movq 1032(%1, %%esi), %%mm3 \n\t" ++ "movq (%1, %%"REG_S"), %%mm0 \n\t" ++ "movq 8(%1, %%"REG_S"), %%mm1 \n\t" ++ "movq 1024(%1, %%"REG_S"), %%mm2\n\t" ++ "movq 1032(%1, %%"REG_S"), %%mm3\n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" +- "movq 2048(%1, %%esi), %%mm3 \n\t" +- "movq 2056(%1, %%esi), %%mm4 \n\t" +- "movq 3072(%1, %%esi), %%mm5 \n\t" +- "movq 3080(%1, %%esi), %%mm6 \n\t" ++ "movq 2048(%1, %%"REG_S"), %%mm3\n\t" ++ "movq 2056(%1, %%"REG_S"), %%mm4\n\t" ++ "movq 3072(%1, %%"REG_S"), %%mm5\n\t" ++ "movq 3080(%1, %%"REG_S"), %%mm6\n\t" + "psubd %%mm7, %%mm3 \n\t" + "psubd %%mm7, %%mm4 \n\t" + "psubd %%mm7, %%mm5 \n\t" +@@ -190,15 +192,15 @@ + "punpckhdq %%mm3, %%mm2 \n\t" + "punpckldq %%mm4, %%mm1 \n\t" + "punpckhdq %%mm4, %%mm5 \n\t" +- "movq %%mm0, (%0, %%esi,2) \n\t" +- "movq %%mm2, 8(%0, %%esi,2) \n\t" +- "movq %%mm1, 16(%0, %%esi,2) \n\t" +- "movq %%mm5, 24(%0, %%esi,2) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm0, (%0, %%"REG_S",2) \n\t" ++ "movq %%mm2, 8(%0, %%"REG_S",2) \n\t" ++ "movq %%mm1, 16(%0, %%"REG_S",2)\n\t" ++ "movq %%mm5, 24(%0, %%"REG_S",2)\n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1024), "r" (f+256) +- :"%esi", "memory" ++ :"%"REG_S, "memory" + ); + return 4*256; + } +@@ -206,23 +208,23 @@ + static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "1: \n\t" +- "movd (%1, %%esi), %%mm0 \n\t" +- "punpckldq 2048(%1, %%esi), %%mm0\n\t" +- "movd 3072(%1, %%esi), %%mm1 \n\t" +- "punpckldq 4096(%1, %%esi), %%mm1\n\t" +- "movd 1024(%1, %%esi), %%mm2 \n\t" +- "punpckldq 4(%1, %%esi), %%mm2 \n\t" +- "movd 2052(%1, %%esi), %%mm3 \n\t" +- "punpckldq 3076(%1, %%esi), %%mm3\n\t" +- "movd 4100(%1, %%esi), %%mm4 \n\t" +- "punpckldq 1028(%1, %%esi), %%mm4\n\t" +- "movd 8(%1, %%esi), %%mm5 \n\t" +- "punpckldq 2056(%1, %%esi), %%mm5\n\t" +- "leal (%%esi, %%esi, 4), %%edi \n\t" +- "sarl $1, %%edi \n\t" ++ "movd (%1, %%"REG_S"), %%mm0 \n\t" ++ "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" ++ "movd 3072(%1, %%"REG_S"), %%mm1\n\t" ++ "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t" ++ "movd 1024(%1, %%"REG_S"), %%mm2\n\t" ++ "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t" ++ "movd 2052(%1, %%"REG_S"), %%mm3\n\t" ++ "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t" ++ "movd 4100(%1, %%"REG_S"), %%mm4\n\t" ++ "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t" ++ "movd 8(%1, %%"REG_S"), %%mm5 \n\t" ++ "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t" ++ "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" ++ "sar $1, %%"REG_D" \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" +@@ -232,32 +234,32 @@ + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" + "packssdw %%mm5, %%mm4 \n\t" +- "movq %%mm0, (%0, %%edi) \n\t" +- "movq %%mm2, 8(%0, %%edi) \n\t" +- "movq %%mm4, 16(%0, %%edi) \n\t" ++ "movq %%mm0, (%0, %%"REG_D") \n\t" ++ "movq %%mm2, 8(%0, %%"REG_D") \n\t" ++ "movq %%mm4, 16(%0, %%"REG_D") \n\t" + +- "movd 3080(%1, %%esi), %%mm0 \n\t" +- "punpckldq 4104(%1, %%esi), %%mm0\n\t" +- "movd 1032(%1, %%esi), %%mm1 \n\t" +- "punpckldq 12(%1, %%esi), %%mm1\n\t" +- "movd 2060(%1, %%esi), %%mm2 \n\t" +- "punpckldq 3084(%1, %%esi), %%mm2\n\t" +- "movd 4108(%1, %%esi), %%mm3 \n\t" +- "punpckldq 1036(%1, %%esi), %%mm3\n\t" ++ "movd 3080(%1, %%"REG_S"), %%mm0\n\t" ++ "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t" ++ "movd 1032(%1, %%"REG_S"), %%mm1\n\t" ++ "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" ++ "movd 2060(%1, %%"REG_S"), %%mm2\n\t" ++ "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t" ++ "movd 4108(%1, %%"REG_S"), %%mm3\n\t" ++ "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" +- "movq %%mm0, 24(%0, %%edi) \n\t" +- "movq %%mm2, 32(%0, %%edi) \n\t" ++ "movq %%mm0, 24(%0, %%"REG_D") \n\t" ++ "movq %%mm2, 32(%0, %%"REG_D") \n\t" + +- "addl $16, %%esi \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1280), "r" (f+256) +- :"%esi", "%edi", "memory" ++ :"%"REG_S, "%"REG_D, "memory" + ); + return 5*256; + } +@@ -265,14 +267,14 @@ + static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" +- "movq 1024(%1, %%esi), %%mm0 \n\t" +- "movq 1032(%1, %%esi), %%mm1 \n\t" +- "movq (%1, %%esi), %%mm2 \n\t" +- "movq 8(%1, %%esi), %%mm3 \n\t" ++ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" ++ "movq 1032(%1, %%"REG_S"), %%mm1\n\t" ++ "movq (%1, %%"REG_S"), %%mm2 \n\t" ++ "movq 8(%1, %%"REG_S"), %%mm3 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" +@@ -282,22 +284,22 @@ + "movq %%mm0, %%mm1 \n\t" + "punpcklwd %%mm2, %%mm0 \n\t" + "punpckhwd %%mm2, %%mm1 \n\t" +- "leal (%%esi, %%esi, 2), %%edi \n\t" +- "movq %%mm6, (%0, %%edi) \n\t" +- "movd %%mm0, 8(%0, %%edi) \n\t" ++ "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" ++ "movq %%mm6, (%0, %%"REG_D") \n\t" ++ "movd %%mm0, 8(%0, %%"REG_D") \n\t" + "punpckhdq %%mm0, %%mm0 \n\t" +- "movq %%mm6, 12(%0, %%edi) \n\t" +- "movd %%mm0, 20(%0, %%edi) \n\t" +- "movq %%mm6, 24(%0, %%edi) \n\t" +- "movd %%mm1, 32(%0, %%edi) \n\t" ++ "movq %%mm6, 12(%0, %%"REG_D") \n\t" ++ "movd %%mm0, 20(%0, %%"REG_D") \n\t" ++ "movq %%mm6, 24(%0, %%"REG_D") \n\t" ++ "movd %%mm1, 32(%0, %%"REG_D") \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" +- "movq %%mm6, 36(%0, %%edi) \n\t" +- "movd %%mm1, 44(%0, %%edi) \n\t" +- "addl $16, %%esi \n\t" ++ "movq %%mm6, 36(%0, %%"REG_D") \n\t" ++ "movd %%mm1, 44(%0, %%"REG_D") \n\t" ++ "add $16, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1536), "r" (f+256) +- :"%esi", "%edi", "memory" ++ :"%"REG_S, "%"REG_D, "memory" + ); + return 6*256; + } +@@ -305,17 +307,17 @@ + static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" +- "movq 1024(%1, %%esi), %%mm0 \n\t" +- "movq 2048(%1, %%esi), %%mm1 \n\t" +- "movq (%1, %%esi), %%mm5 \n\t" ++ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" ++ "movq 2048(%1, %%"REG_S"), %%mm1\n\t" ++ "movq (%1, %%"REG_S"), %%mm5 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm5 \n\t" +- "leal (%%esi, %%esi, 2), %%edi \n\t" ++ "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" + + "pxor %%mm4, %%mm4 \n\t" + "packssdw %%mm5, %%mm0 \n\t" // FfAa +@@ -327,15 +329,15 @@ + "punpckldq %%mm6, %%mm0 \n\t" // 00ba + "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 + +- "movq %%mm0, (%0, %%edi) \n\t" // 00ba ++ "movq %%mm0, (%0, %%"REG_D") \n\t" // 00ba + "punpckhdq %%mm4, %%mm0 \n\t" // F000 +- "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0 +- "movq %%mm0, 16(%0, %%edi) \n\t" // F000 +- "addl $8, %%esi \n\t" ++ "movq %%mm3, 8(%0, %%"REG_D") \n\t" // BAf0 ++ "movq %%mm0, 16(%0, %%"REG_D") \n\t" // F000 ++ "add $8, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1536), "r" (f+256) +- :"%esi", "%edi", "memory" ++ :"%"REG_S, "%"REG_D, "memory" + ); + return 6*256; + } +@@ -343,19 +345,19 @@ + static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" +- "movq 1024(%1, %%esi), %%mm0 \n\t" +- "movq 3072(%1, %%esi), %%mm1 \n\t" +- "movq 2048(%1, %%esi), %%mm4 \n\t" +- "movq (%1, %%esi), %%mm5 \n\t" ++ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" ++ "movq 3072(%1, %%"REG_S"), %%mm1\n\t" ++ "movq 2048(%1, %%"REG_S"), %%mm4\n\t" ++ "movq (%1, %%"REG_S"), %%mm5 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm4 \n\t" + "psubd %%mm7, %%mm5 \n\t" +- "leal (%%esi, %%esi, 2), %%edi \n\t" ++ "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" + + "packssdw %%mm4, %%mm0 \n\t" // EeAa + "packssdw %%mm5, %%mm1 \n\t" // FfBb +@@ -366,16 +368,16 @@ + "punpckldq %%mm6, %%mm0 \n\t" // 00ba + "punpckhdq %%mm1, %%mm1 \n\t" // BABA + +- "movq %%mm0, (%0, %%edi) \n\t" ++ "movq %%mm0, (%0, %%"REG_D") \n\t" + "punpckhdq %%mm2, %%mm0 \n\t" // FE00 + "punpckldq %%mm1, %%mm2 \n\t" // BAfe +- "movq %%mm2, 8(%0, %%edi) \n\t" +- "movq %%mm0, 16(%0, %%edi) \n\t" +- "addl $8, %%esi \n\t" ++ "movq %%mm2, 8(%0, %%"REG_D") \n\t" ++ "movq %%mm0, 16(%0, %%"REG_D") \n\t" ++ "add $8, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1536), "r" (f+256) +- :"%esi", "%edi", "memory" ++ :"%"REG_S, "%"REG_D, "memory" + ); + return 6*256; + } +@@ -383,21 +385,21 @@ + static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + // "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" +- "movq 1024(%1, %%esi), %%mm0 \n\t" +- "movq 2048(%1, %%esi), %%mm1 \n\t" +- "movq 3072(%1, %%esi), %%mm2 \n\t" +- "movq 4096(%1, %%esi), %%mm3 \n\t" +- "movq (%1, %%esi), %%mm5 \n\t" ++ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" ++ "movq 2048(%1, %%"REG_S"), %%mm1\n\t" ++ "movq 3072(%1, %%"REG_S"), %%mm2\n\t" ++ "movq 4096(%1, %%"REG_S"), %%mm3\n\t" ++ "movq (%1, %%"REG_S"), %%mm5 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "psubd %%mm7, %%mm5 \n\t" +- "leal (%%esi, %%esi, 2), %%edi \n\t" ++ "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" + + "packssdw %%mm2, %%mm0 \n\t" // CcAa + "packssdw %%mm3, %%mm1 \n\t" // DdBb +@@ -414,14 +416,14 @@ + "punpckldq %%mm1, %%mm4 \n\t" // BAf0 + "punpckhdq %%mm3, %%mm2 \n\t" // F0DC + +- "movq %%mm0, (%0, %%edi) \n\t" +- "movq %%mm4, 8(%0, %%edi) \n\t" +- "movq %%mm2, 16(%0, %%edi) \n\t" +- "addl $8, %%esi \n\t" ++ "movq %%mm0, (%0, %%"REG_D") \n\t" ++ "movq %%mm4, 8(%0, %%"REG_D") \n\t" ++ "movq %%mm2, 16(%0, %%"REG_D") \n\t" ++ "add $8, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1536), "r" (f+256) +- :"%esi", "%edi", "memory" ++ :"%"REG_S, "%"REG_D, "memory" + ); + return 6*256; + } +@@ -429,23 +431,23 @@ + static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( +- "movl $-1024, %%esi \n\t" ++ "mov $-1024, %%"REG_S" \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + // "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" +- "movq 1024(%1, %%esi), %%mm0 \n\t" +- "movq 3072(%1, %%esi), %%mm1 \n\t" +- "movq 4096(%1, %%esi), %%mm2 \n\t" +- "movq 5120(%1, %%esi), %%mm3 \n\t" +- "movq 2048(%1, %%esi), %%mm4 \n\t" +- "movq (%1, %%esi), %%mm5 \n\t" ++ "movq 1024(%1, %%"REG_S"), %%mm0\n\t" ++ "movq 3072(%1, %%"REG_S"), %%mm1\n\t" ++ "movq 4096(%1, %%"REG_S"), %%mm2\n\t" ++ "movq 5120(%1, %%"REG_S"), %%mm3\n\t" ++ "movq 2048(%1, %%"REG_S"), %%mm4\n\t" ++ "movq (%1, %%"REG_S"), %%mm5 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "psubd %%mm7, %%mm4 \n\t" + "psubd %%mm7, %%mm5 \n\t" +- "leal (%%esi, %%esi, 2), %%edi \n\t" ++ "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" + + "packssdw %%mm2, %%mm0 \n\t" // CcAa + "packssdw %%mm3, %%mm1 \n\t" // DdBb +@@ -462,14 +464,14 @@ + "punpckldq %%mm1, %%mm4 \n\t" // BAfe + "punpckhdq %%mm3, %%mm2 \n\t" // FEDC + +- "movq %%mm0, (%0, %%edi) \n\t" +- "movq %%mm4, 8(%0, %%edi) \n\t" +- "movq %%mm2, 16(%0, %%edi) \n\t" +- "addl $8, %%esi \n\t" ++ "movq %%mm0, (%0, %%"REG_D") \n\t" ++ "movq %%mm4, 8(%0, %%"REG_D") \n\t" ++ "movq %%mm2, 16(%0, %%"REG_D") \n\t" ++ "add $8, %%"REG_S" \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1536), "r" (f+256) +- :"%esi", "%edi", "memory" ++ :"%"REG_S, "%"REG_D, "memory" + ); + return 6*256; + }