4142
|
1 /*
|
|
2 * this code comes under GPL
|
|
3 * This code was taken from http://www.mpg123.org
|
|
4 * See ChangeLog of mpg123-0.59s-pre.1 for detail
|
|
5 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
|
|
6 *
|
|
7 * Local ChangeLog:
|
|
8 * - Partial loops unrolling and removing MOVW insn from loops
|
|
9 */
|
|
10 #define real float /* ugly - but only way */
|
|
11
|
|
12 static unsigned long long __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
|
|
13 static unsigned long long __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
|
|
14 unsigned long __attribute__((aligned(8))) costab_mmx[] =
|
|
15 {
|
|
16 1056974725,
|
|
17 1057056395,
|
|
18 1057223771,
|
|
19 1057485416,
|
|
20 1057855544,
|
|
21 1058356026,
|
|
22 1059019886,
|
|
23 1059897405,
|
|
24 1061067246,
|
|
25 1062657950,
|
|
26 1064892987,
|
|
27 1066774581,
|
|
28 1069414683,
|
|
29 1073984175,
|
|
30 1079645762,
|
|
31 1092815430,
|
|
32 1057005197,
|
|
33 1057342072,
|
|
34 1058087743,
|
|
35 1059427869,
|
|
36 1061799040,
|
|
37 1065862217,
|
|
38 1071413542,
|
|
39 1084439708,
|
|
40 1057128951,
|
|
41 1058664893,
|
|
42 1063675095,
|
|
43 1076102863,
|
|
44 1057655764,
|
|
45 1067924853,
|
|
46 1060439283,
|
|
47 };
|
|
48
|
|
49 void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples,
|
|
50 short *buffs, int *bo)
|
|
51 {
|
|
52
|
|
53 __asm __volatile(
|
|
54 "movl %1,%%ecx\n\t"
|
|
55 "movl %2,%%edi\n\t"
|
|
56 "movl $15,%%ebx\n\t"
|
|
57 "movl %4,%%edx\n\t"
|
|
58 "leal (%%edi,%%ecx,2),%%edi\n\t"
|
|
59 "decl %%ecx\n\t"
|
|
60 "movl %3,%%esi\n\t"
|
|
61 "movl (%%edx),%%eax\n\t"
|
|
62 "jecxz .L1\n\t"
|
|
63 "decl %%eax\n\t"
|
|
64 "andl %%ebx,%%eax\n\t"
|
|
65 "leal 1088(%%esi),%%esi\n\t"
|
|
66 "movl %%eax,(%%edx)\n\t"
|
|
67 ".L1:\n\t"
|
|
68 "leal (%%esi,%%eax,2),%%edx\n\t"
|
|
69 "movl %%eax,%%ebp\n\t"
|
|
70 "incl %%eax\n\t"
|
|
71 "pushl %0\n\t"
|
|
72 "andl %%ebx,%%eax\n\t"
|
|
73 "leal 544(%%esi,%%eax,2),%%ecx\n\t"
|
|
74 "incl %%ebx\n\t"
|
|
75 "testl $1, %%eax\n\t"
|
|
76 "jnz .L2\n\t"
|
|
77 "xchgl %%edx,%%ecx\n\t"
|
|
78 "incl %%ebp\n\t"
|
|
79 "leal 544(%%esi),%%esi\n\t"
|
|
80 ".L2:\n\t"
|
|
81 "emms\n\t"
|
|
82 "pushl %%edx\n\t"
|
|
83 "pushl %%ecx\n\t"
|
|
84 "call *dct64_MMX_func\n\t"
|
|
85 "leal 1(%%ebx), %%ecx\n\t"
|
|
86 "subl %%ebp,%%ebx\n\t"
|
|
87 "pushl %%ecx\n\t"
|
|
88 "leal decwins(%%ebx,%%ebx,1), %%edx\n\t"
|
|
89 "shrl $1, %%ecx\n\t"
|
|
90 ".align 16\n\t"
|
|
91 ".L3:\n\t"
|
|
92 "movq (%%edx),%%mm0\n\t"
|
|
93 "movq 64(%%edx),%%mm4\n\t"
|
|
94 "pmaddwd (%%esi),%%mm0\n\t"
|
|
95 "pmaddwd 32(%%esi),%%mm4\n\t"
|
|
96 "movq 8(%%edx),%%mm1\n\t"
|
|
97 "movq 72(%%edx),%%mm5\n\t"
|
|
98 "pmaddwd 8(%%esi),%%mm1\n\t"
|
|
99 "pmaddwd 40(%%esi),%%mm5\n\t"
|
|
100 "movq 16(%%edx),%%mm2\n\t"
|
|
101 "movq 80(%%edx),%%mm6\n\t"
|
|
102 "pmaddwd 16(%%esi),%%mm2\n\t"
|
|
103 "pmaddwd 48(%%esi),%%mm6\n\t"
|
|
104 "movq 24(%%edx),%%mm3\n\t"
|
|
105 "movq 88(%%edx),%%mm7\n\t"
|
|
106 "pmaddwd 24(%%esi),%%mm3\n\t"
|
|
107 "pmaddwd 56(%%esi),%%mm7\n\t"
|
|
108 "paddd %%mm1,%%mm0\n\t"
|
|
109 "paddd %%mm5,%%mm4\n\t"
|
|
110 "paddd %%mm2,%%mm0\n\t"
|
|
111 "paddd %%mm6,%%mm4\n\t"
|
|
112 "paddd %%mm3,%%mm0\n\t"
|
|
113 "paddd %%mm7,%%mm4\n\t"
|
|
114 "movq %%mm0,%%mm1\n\t"
|
|
115 "movq %%mm4,%%mm5\n\t"
|
|
116 "psrlq $32,%%mm1\n\t"
|
|
117 "psrlq $32,%%mm5\n\t"
|
|
118 "paddd %%mm1,%%mm0\n\t"
|
|
119 "paddd %%mm5,%%mm4\n\t"
|
|
120 "psrad $13,%%mm0\n\t"
|
|
121 "psrad $13,%%mm4\n\t"
|
|
122 "packssdw %%mm0,%%mm0\n\t"
|
|
123 "packssdw %%mm4,%%mm4\n\t"
|
|
124
|
|
125 "movq (%%edi), %%mm1\n\t"
|
|
126 "punpckldq %%mm4, %%mm0\n\t"
|
|
127 "pand one_null, %%mm1\n\t"
|
|
128 "pand null_one, %%mm0\n\t"
|
|
129 "por %%mm0, %%mm1\n\t"
|
|
130 "movq %%mm1,(%%edi)\n\t"
|
|
131
|
|
132 "leal 64(%%esi),%%esi\n\t"
|
|
133 "leal 128(%%edx),%%edx\n\t"
|
|
134 "leal 8(%%edi),%%edi\n\t"
|
|
135
|
|
136 "decl %%ecx\n\t"
|
|
137 "jnz .L3\n\t"
|
|
138
|
|
139 "popl %%ecx\n\t"
|
|
140 "andl $1, %%ecx\n\t"
|
|
141 "jecxz .next_loop\n\t"
|
|
142
|
|
143 "movq (%%edx),%%mm0\n\t"
|
|
144 "pmaddwd (%%esi),%%mm0\n\t"
|
|
145 "movq 8(%%edx),%%mm1\n\t"
|
|
146 "pmaddwd 8(%%esi),%%mm1\n\t"
|
|
147 "movq 16(%%edx),%%mm2\n\t"
|
|
148 "pmaddwd 16(%%esi),%%mm2\n\t"
|
|
149 "movq 24(%%edx),%%mm3\n\t"
|
|
150 "pmaddwd 24(%%esi),%%mm3\n\t"
|
|
151 "paddd %%mm1,%%mm0\n\t"
|
|
152 "paddd %%mm2,%%mm0\n\t"
|
|
153 "paddd %%mm3,%%mm0\n\t"
|
|
154 "movq %%mm0,%%mm1\n\t"
|
|
155 "psrlq $32,%%mm1\n\t"
|
|
156 "paddd %%mm1,%%mm0\n\t"
|
|
157 "psrad $13,%%mm0\n\t"
|
|
158 "packssdw %%mm0,%%mm0\n\t"
|
|
159 "movd %%mm0,%%eax\n\t"
|
|
160 "movw %%ax, (%%edi)\n\t"
|
|
161 "leal 32(%%esi),%%esi\n\t"
|
|
162 "leal 64(%%edx),%%edx\n\t"
|
|
163 "leal 4(%%edi),%%edi\n\t"
|
|
164
|
|
165 ".next_loop:\n\t"
|
|
166 "subl $64,%%esi\n\t"
|
|
167 "movl $7,%%ecx\n\t"
|
|
168 ".align 16\n\t"
|
|
169 ".L4:\n\t"
|
|
170 "movq (%%edx),%%mm0\n\t"
|
|
171 "movq 64(%%edx),%%mm4\n\t"
|
|
172 "pmaddwd (%%esi),%%mm0\n\t"
|
|
173 "pmaddwd -32(%%esi),%%mm4\n\t"
|
|
174 "movq 8(%%edx),%%mm1\n\t"
|
|
175 "movq 72(%%edx),%%mm5\n\t"
|
|
176 "pmaddwd 8(%%esi),%%mm1\n\t"
|
|
177 "pmaddwd -24(%%esi),%%mm5\n\t"
|
|
178 "movq 16(%%edx),%%mm2\n\t"
|
|
179 "movq 80(%%edx),%%mm6\n\t"
|
|
180 "pmaddwd 16(%%esi),%%mm2\n\t"
|
|
181 "pmaddwd -16(%%esi),%%mm6\n\t"
|
|
182 "movq 24(%%edx),%%mm3\n\t"
|
|
183 "movq 88(%%edx),%%mm7\n\t"
|
|
184 "pmaddwd 24(%%esi),%%mm3\n\t"
|
|
185 "pmaddwd -8(%%esi),%%mm7\n\t"
|
|
186 "paddd %%mm1,%%mm0\n\t"
|
|
187 "paddd %%mm5,%%mm4\n\t"
|
|
188 "paddd %%mm2,%%mm0\n\t"
|
|
189 "paddd %%mm6,%%mm4\n\t"
|
|
190 "paddd %%mm3,%%mm0\n\t"
|
|
191 "paddd %%mm7,%%mm4\n\t"
|
|
192 "movq %%mm0,%%mm1\n\t"
|
|
193 "movq %%mm4,%%mm5\n\t"
|
|
194 "psrlq $32,%%mm1\n\t"
|
|
195 "psrlq $32,%%mm5\n\t"
|
|
196 "paddd %%mm0,%%mm1\n\t"
|
|
197 "paddd %%mm4,%%mm5\n\t"
|
|
198 "psrad $13,%%mm1\n\t"
|
|
199 "psrad $13,%%mm5\n\t"
|
|
200 "packssdw %%mm1,%%mm1\n\t"
|
|
201 "packssdw %%mm5,%%mm5\n\t"
|
|
202 "psubd %%mm0,%%mm0\n\t"
|
|
203 "psubd %%mm4,%%mm4\n\t"
|
|
204 "psubsw %%mm1,%%mm0\n\t"
|
|
205 "psubsw %%mm5,%%mm4\n\t"
|
|
206
|
|
207 "movq (%%edi), %%mm1\n\t"
|
|
208 "punpckldq %%mm4, %%mm0\n\t"
|
|
209 "pand one_null, %%mm1\n\t"
|
|
210 "pand null_one, %%mm0\n\t"
|
|
211 "por %%mm0, %%mm1\n\t"
|
|
212 "movq %%mm1,(%%edi)\n\t"
|
|
213
|
|
214 "subl $64,%%esi\n\t"
|
|
215 "addl $128,%%edx\n\t"
|
|
216 "leal 8(%%edi),%%edi\n\t"
|
|
217 "decl %%ecx\n\t"
|
|
218 "jnz .L4\n\t"
|
|
219
|
|
220 "movq (%%edx),%%mm0\n\t"
|
|
221 "pmaddwd (%%esi),%%mm0\n\t"
|
|
222 "movq 8(%%edx),%%mm1\n\t"
|
|
223 "pmaddwd 8(%%esi),%%mm1\n\t"
|
|
224 "movq 16(%%edx),%%mm2\n\t"
|
|
225 "pmaddwd 16(%%esi),%%mm2\n\t"
|
|
226 "movq 24(%%edx),%%mm3\n\t"
|
|
227 "pmaddwd 24(%%esi),%%mm3\n\t"
|
|
228 "paddd %%mm1,%%mm0\n\t"
|
|
229 "paddd %%mm2,%%mm0\n\t"
|
|
230 "paddd %%mm3,%%mm0\n\t"
|
|
231 "movq %%mm0,%%mm1\n\t"
|
|
232 "psrlq $32,%%mm1\n\t"
|
|
233 "paddd %%mm0,%%mm1\n\t"
|
|
234 "psrad $13,%%mm1\n\t"
|
|
235 "packssdw %%mm1,%%mm1\n\t"
|
|
236 "psubd %%mm0,%%mm0\n\t"
|
|
237 "psubsw %%mm1,%%mm0\n\t"
|
|
238 "movd %%mm0,%%eax\n\t"
|
|
239 "movw %%ax,(%%edi)\n\t"
|
|
240 "emms\n\t"
|
|
241 :
|
|
242 :"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo)
|
|
243 :"memory","%ebp","%edi","%esi","%ebx");
|
|
244 }
|