Mercurial > mplayer.hg
comparison mp3lib/decode_MMX.c @ 23342:e070d7f61e9a
Rewrite generic code in decode_MMX.c in C for easier AMD64 port. Slightly faster than original assembly.
author | zuxy |
---|---|
date | Mon, 21 May 2007 01:47:27 +0000 |
parents | 8092494fc92c |
children | ccb70d86d797 |
comparison
equal
deleted
inserted
replaced
23341:74f5109611e2 | 23342:e070d7f61e9a |
---|---|
9 */ | 9 */ |
10 #include "config.h" | 10 #include "config.h" |
11 #include "mangle.h" | 11 #include "mangle.h" |
12 #define real float /* ugly - but only way */ | 12 #define real float /* ugly - but only way */ |
13 | 13 |
14 extern short mp3lib_decwins[]; | |
15 extern void (*dct64_MMX_func)(short*, short*, real*); | |
14 static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL; | 16 static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL; |
15 static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL; | 17 static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL; |
16 unsigned long __attribute__((aligned(16))) costab_mmx[] = | 18 unsigned long __attribute__((aligned(16))) costab_mmx[] = |
17 { | 19 { |
18 1056974725, | 20 1056974725, |
46 1057655764, | 48 1057655764, |
47 1067924853, | 49 1067924853, |
48 1060439283, | 50 1060439283, |
49 }; | 51 }; |
50 | 52 |
51 static int temp; // buggy gcc 3.x fails if this is moved into the function :( | 53 int synth_1to1_MMX(real *bandPtr, int channel, short *samples) |
52 void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples, | |
53 short *buffs, int *bo) | |
54 { | 54 { |
55 | 55 static short buffs[2][2][0x110] __attribute__((aligned(8))); |
56 static int bo = 1; | |
57 short *b0, (*buf)[0x110], *a, *b; | |
58 short* window; | |
59 int bo1, i = 8; | |
60 | |
61 if (channel == 0) { | |
62 bo = (bo - 1) & 0xf; | |
63 buf = buffs[1]; | |
64 } else { | |
65 samples++; | |
66 buf = buffs[0]; | |
67 } | |
68 | |
69 if (bo & 1) { | |
70 b0 = buf[1]; | |
71 bo1 = bo + 1; | |
72 a = buf[0] + bo; | |
73 b = buf[1] + ((bo + 1) & 0xf); | |
74 } else { | |
75 b0 = buf[0]; | |
76 bo1 = bo; | |
77 b = buf[0] + bo; | |
78 a = buf[1] + ((bo + 1) & 0xf); | |
79 } | |
80 | |
81 dct64_MMX_func(a, b, bandPtr); | |
82 window = mp3lib_decwins + 16 - bo1; | |
83 //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1); | |
56 __asm __volatile( | 84 __asm __volatile( |
57 "movl %1,%%ecx\n\t" | |
58 "movl %2,%%edi\n\t" | |
59 "movl $15,%%ebx\n\t" | |
60 "movl %4,%%edx\n\t" | |
61 "leal (%%edi,%%ecx,2),%%edi\n\t" | |
62 "decl %%ecx\n\t" | |
63 "movl %3,%%esi\n\t" | |
64 "movl (%%edx),%%eax\n\t" | |
65 "jecxz .L01\n\t" | |
66 "decl %%eax\n\t" | |
67 "andl %%ebx,%%eax\n\t" | |
68 "leal 1088(%%esi),%%esi\n\t" | |
69 "movl %%eax,(%%edx)\n\t" | |
70 ".L01:\n\t" | |
71 "leal (%%esi,%%eax,2),%%edx\n\t" | |
72 "movl %%eax,%5\n\t" | |
73 "incl %%eax\n\t" | |
74 "andl %%ebx,%%eax\n\t" | |
75 "leal 544(%%esi,%%eax,2),%%ecx\n\t" | |
76 "incl %%ebx\n\t" | |
77 "testl $1, %%eax\n\t" | |
78 "jnz .L02\n\t" | |
79 "xchgl %%edx,%%ecx\n\t" | |
80 "incl %5\n\t" | |
81 "leal 544(%%esi),%%esi\n\t" | |
82 ".L02:\n\t" | |
83 "emms\n\t" | |
84 "pushl %0\n\t" | |
85 "pushl %%edx\n\t" | |
86 "pushl %%ecx\n\t" | |
87 "call *"MANGLE(dct64_MMX_func)"\n\t" | |
88 "addl $12, %%esp\n\t" | |
89 "leal 1(%%ebx), %%ecx\n\t" | |
90 "subl %5,%%ebx\n\t" | |
91 "pushl %%ecx\n\t" | |
92 "leal "MANGLE(mp3lib_decwins)"(%%ebx,%%ebx,1), %%edx\n\t" | |
93 "shrl $1, %%ecx\n\t" | |
94 ASMALIGN(4) | 85 ASMALIGN(4) |
95 ".L03:\n\t" | 86 ".L03:\n\t" |
96 "movq (%%edx),%%mm0\n\t" | 87 "movq (%%edx),%%mm0\n\t" |
97 "movq 64(%%edx),%%mm4\n\t" | 88 "movq 64(%%edx),%%mm4\n\t" |
98 "pmaddwd (%%esi),%%mm0\n\t" | 89 "pmaddwd (%%esi),%%mm0\n\t" |
137 "leal 128(%%edx),%%edx\n\t" | 128 "leal 128(%%edx),%%edx\n\t" |
138 "leal 8(%%edi),%%edi\n\t" | 129 "leal 8(%%edi),%%edi\n\t" |
139 | 130 |
140 "decl %%ecx\n\t" | 131 "decl %%ecx\n\t" |
141 "jnz .L03\n\t" | 132 "jnz .L03\n\t" |
142 | |
143 "popl %%ecx\n\t" | |
144 "andl $1, %%ecx\n\t" | |
145 "jecxz .next_loop\n\t" | |
146 | 133 |
147 "movq (%%edx),%%mm0\n\t" | 134 "movq (%%edx),%%mm0\n\t" |
148 "pmaddwd (%%esi),%%mm0\n\t" | 135 "pmaddwd (%%esi),%%mm0\n\t" |
149 "movq 8(%%edx),%%mm1\n\t" | 136 "movq 8(%%edx),%%mm1\n\t" |
150 "pmaddwd 8(%%esi),%%mm1\n\t" | 137 "pmaddwd 8(%%esi),%%mm1\n\t" |
164 "movw %%ax, (%%edi)\n\t" | 151 "movw %%ax, (%%edi)\n\t" |
165 "leal 32(%%esi),%%esi\n\t" | 152 "leal 32(%%esi),%%esi\n\t" |
166 "leal 64(%%edx),%%edx\n\t" | 153 "leal 64(%%edx),%%edx\n\t" |
167 "leal 4(%%edi),%%edi\n\t" | 154 "leal 4(%%edi),%%edi\n\t" |
168 | 155 |
169 ".next_loop:\n\t" | |
170 "subl $64,%%esi\n\t" | 156 "subl $64,%%esi\n\t" |
171 "movl $7,%%ecx\n\t" | 157 "movl $7,%%ecx\n\t" |
172 ASMALIGN(4) | 158 ASMALIGN(4) |
173 ".L04:\n\t" | 159 ".L04:\n\t" |
174 "movq (%%edx),%%mm0\n\t" | 160 "movq (%%edx),%%mm0\n\t" |
240 "psubd %%mm0,%%mm0\n\t" | 226 "psubd %%mm0,%%mm0\n\t" |
241 "psubsw %%mm1,%%mm0\n\t" | 227 "psubsw %%mm1,%%mm0\n\t" |
242 "movd %%mm0,%%eax\n\t" | 228 "movd %%mm0,%%eax\n\t" |
243 "movw %%ax,(%%edi)\n\t" | 229 "movw %%ax,(%%edi)\n\t" |
244 "emms\n\t" | 230 "emms\n\t" |
245 : | 231 :"+c"(i), "+d"(window), "+S"(b0), "+D"(samples) |
246 :"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo), "m"(temp) | 232 : |
247 :"memory","%edi","%esi","%eax","%ebx","%ecx","%edx","%esp"); | 233 :"memory", "%eax"); |
234 return 0; | |
248 } | 235 } |
236 |