comparison mp3lib/decode_MMX.c @ 23342:e070d7f61e9a

Rewrite generic code in decode_MMX.c in C for easier AMD64 port. Slightly faster than original assembly.
author zuxy
date Mon, 21 May 2007 01:47:27 +0000
parents 8092494fc92c
children ccb70d86d797
comparison
equal deleted inserted replaced
23341:74f5109611e2 23342:e070d7f61e9a
9 */ 9 */
10 #include "config.h" 10 #include "config.h"
11 #include "mangle.h" 11 #include "mangle.h"
12 #define real float /* ugly - but only way */ 12 #define real float /* ugly - but only way */
13 13
14 extern short mp3lib_decwins[];
15 extern void (*dct64_MMX_func)(short*, short*, real*);
14 static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL; 16 static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
15 static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL; 17 static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
16 unsigned long __attribute__((aligned(16))) costab_mmx[] = 18 unsigned long __attribute__((aligned(16))) costab_mmx[] =
17 { 19 {
18 1056974725, 20 1056974725,
46 1057655764, 48 1057655764,
47 1067924853, 49 1067924853,
48 1060439283, 50 1060439283,
49 }; 51 };
50 52
51 static int temp; // buggy gcc 3.x fails if this is moved into the function :( 53 int synth_1to1_MMX(real *bandPtr, int channel, short *samples)
52 void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples,
53 short *buffs, int *bo)
54 { 54 {
55 55 static short buffs[2][2][0x110] __attribute__((aligned(8)));
56 static int bo = 1;
57 short *b0, (*buf)[0x110], *a, *b;
58 short* window;
59 int bo1, i = 8;
60
61 if (channel == 0) {
62 bo = (bo - 1) & 0xf;
63 buf = buffs[1];
64 } else {
65 samples++;
66 buf = buffs[0];
67 }
68
69 if (bo & 1) {
70 b0 = buf[1];
71 bo1 = bo + 1;
72 a = buf[0] + bo;
73 b = buf[1] + ((bo + 1) & 0xf);
74 } else {
75 b0 = buf[0];
76 bo1 = bo;
77 b = buf[0] + bo;
78 a = buf[1] + ((bo + 1) & 0xf);
79 }
80
81 dct64_MMX_func(a, b, bandPtr);
82 window = mp3lib_decwins + 16 - bo1;
83 //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1);
56 __asm __volatile( 84 __asm __volatile(
57 "movl %1,%%ecx\n\t"
58 "movl %2,%%edi\n\t"
59 "movl $15,%%ebx\n\t"
60 "movl %4,%%edx\n\t"
61 "leal (%%edi,%%ecx,2),%%edi\n\t"
62 "decl %%ecx\n\t"
63 "movl %3,%%esi\n\t"
64 "movl (%%edx),%%eax\n\t"
65 "jecxz .L01\n\t"
66 "decl %%eax\n\t"
67 "andl %%ebx,%%eax\n\t"
68 "leal 1088(%%esi),%%esi\n\t"
69 "movl %%eax,(%%edx)\n\t"
70 ".L01:\n\t"
71 "leal (%%esi,%%eax,2),%%edx\n\t"
72 "movl %%eax,%5\n\t"
73 "incl %%eax\n\t"
74 "andl %%ebx,%%eax\n\t"
75 "leal 544(%%esi,%%eax,2),%%ecx\n\t"
76 "incl %%ebx\n\t"
77 "testl $1, %%eax\n\t"
78 "jnz .L02\n\t"
79 "xchgl %%edx,%%ecx\n\t"
80 "incl %5\n\t"
81 "leal 544(%%esi),%%esi\n\t"
82 ".L02:\n\t"
83 "emms\n\t"
84 "pushl %0\n\t"
85 "pushl %%edx\n\t"
86 "pushl %%ecx\n\t"
87 "call *"MANGLE(dct64_MMX_func)"\n\t"
88 "addl $12, %%esp\n\t"
89 "leal 1(%%ebx), %%ecx\n\t"
90 "subl %5,%%ebx\n\t"
91 "pushl %%ecx\n\t"
92 "leal "MANGLE(mp3lib_decwins)"(%%ebx,%%ebx,1), %%edx\n\t"
93 "shrl $1, %%ecx\n\t"
94 ASMALIGN(4) 85 ASMALIGN(4)
95 ".L03:\n\t" 86 ".L03:\n\t"
96 "movq (%%edx),%%mm0\n\t" 87 "movq (%%edx),%%mm0\n\t"
97 "movq 64(%%edx),%%mm4\n\t" 88 "movq 64(%%edx),%%mm4\n\t"
98 "pmaddwd (%%esi),%%mm0\n\t" 89 "pmaddwd (%%esi),%%mm0\n\t"
137 "leal 128(%%edx),%%edx\n\t" 128 "leal 128(%%edx),%%edx\n\t"
138 "leal 8(%%edi),%%edi\n\t" 129 "leal 8(%%edi),%%edi\n\t"
139 130
140 "decl %%ecx\n\t" 131 "decl %%ecx\n\t"
141 "jnz .L03\n\t" 132 "jnz .L03\n\t"
142
143 "popl %%ecx\n\t"
144 "andl $1, %%ecx\n\t"
145 "jecxz .next_loop\n\t"
146 133
147 "movq (%%edx),%%mm0\n\t" 134 "movq (%%edx),%%mm0\n\t"
148 "pmaddwd (%%esi),%%mm0\n\t" 135 "pmaddwd (%%esi),%%mm0\n\t"
149 "movq 8(%%edx),%%mm1\n\t" 136 "movq 8(%%edx),%%mm1\n\t"
150 "pmaddwd 8(%%esi),%%mm1\n\t" 137 "pmaddwd 8(%%esi),%%mm1\n\t"
164 "movw %%ax, (%%edi)\n\t" 151 "movw %%ax, (%%edi)\n\t"
165 "leal 32(%%esi),%%esi\n\t" 152 "leal 32(%%esi),%%esi\n\t"
166 "leal 64(%%edx),%%edx\n\t" 153 "leal 64(%%edx),%%edx\n\t"
167 "leal 4(%%edi),%%edi\n\t" 154 "leal 4(%%edi),%%edi\n\t"
168 155
169 ".next_loop:\n\t"
170 "subl $64,%%esi\n\t" 156 "subl $64,%%esi\n\t"
171 "movl $7,%%ecx\n\t" 157 "movl $7,%%ecx\n\t"
172 ASMALIGN(4) 158 ASMALIGN(4)
173 ".L04:\n\t" 159 ".L04:\n\t"
174 "movq (%%edx),%%mm0\n\t" 160 "movq (%%edx),%%mm0\n\t"
240 "psubd %%mm0,%%mm0\n\t" 226 "psubd %%mm0,%%mm0\n\t"
241 "psubsw %%mm1,%%mm0\n\t" 227 "psubsw %%mm1,%%mm0\n\t"
242 "movd %%mm0,%%eax\n\t" 228 "movd %%mm0,%%eax\n\t"
243 "movw %%ax,(%%edi)\n\t" 229 "movw %%ax,(%%edi)\n\t"
244 "emms\n\t" 230 "emms\n\t"
245 : 231 :"+c"(i), "+d"(window), "+S"(b0), "+D"(samples)
246 :"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo), "m"(temp) 232 :
247 :"memory","%edi","%esi","%eax","%ebx","%ecx","%edx","%esp"); 233 :"memory", "%eax");
234 return 0;
248 } 235 }
236