annotate liba52/resample_mmx.c @ 8763:19e96e60a3d0

Speed optimizations (runs twise as fast) and bugfix (wrong cutoff frequency buffer over run noise and garbeled output when wrong input format)
author anders
date Sat, 04 Jan 2003 06:19:25 +0000
parents 9fc45fe0d444
children f881c918739b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3412
21d65a4ae3c9 resample.c added - float->int conversion and channel ordering
arpi
parents:
diff changeset
1
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
2 // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL)
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
3
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
4 /* optimization TODO / NOTES
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
5 movntq is slightly faster (0.5% with the current test.c benchmark)
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
6 (but thats just test.c so that needs to be testd in reallity)
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
7 and it would mean (C / MMX2 / MMX / 3DNOW) versions
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
8 */
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
9
3574
8600f40003de mmx opt
michael
parents: 3569
diff changeset
10 static uint64_t __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL;
8600f40003de mmx opt
michael
parents: 3569
diff changeset
11 static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL;
8600f40003de mmx opt
michael
parents: 3569
diff changeset
12 static uint64_t __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL;
8600f40003de mmx opt
michael
parents: 3569
diff changeset
13 static uint64_t __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL;
3412
21d65a4ae3c9 resample.c added - float->int conversion and channel ordering
arpi
parents:
diff changeset
14
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
15 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){
3412
21d65a4ae3c9 resample.c added - float->int conversion and channel ordering
arpi
parents:
diff changeset
16 int32_t * f = (int32_t *) _f;
3574
8600f40003de mmx opt
michael
parents: 3569
diff changeset
17 asm volatile(
8600f40003de mmx opt
michael
parents: 3569
diff changeset
18 "movl $-512, %%esi \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
19 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
20 "movq "MANGLE(wm1100)", %%mm3 \n\t"
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
21 "movq "MANGLE(wm0101)", %%mm4 \n\t"
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
22 "movq "MANGLE(wm1010)", %%mm5 \n\t"
3574
8600f40003de mmx opt
michael
parents: 3569
diff changeset
23 "pxor %%mm6, %%mm6 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
24 "1: \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
25 "movq (%1, %%esi, 2), %%mm0 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
26 "movq 8(%1, %%esi, 2), %%mm1 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
27 "leal (%%esi, %%esi, 4), %%edi \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
28 "psubd %%mm7, %%mm0 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
29 "psubd %%mm7, %%mm1 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
30 "packssdw %%mm1, %%mm0 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
31 "movq %%mm0, %%mm1 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
32 "pand %%mm4, %%mm0 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
33 "pand %%mm5, %%mm1 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
34 "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0
8600f40003de mmx opt
michael
parents: 3569
diff changeset
35 "movd %%mm0, 8(%0, %%edi) \n\t" // A 0
8600f40003de mmx opt
michael
parents: 3569
diff changeset
36 "pand %%mm3, %%mm0 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
37 "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0
8600f40003de mmx opt
michael
parents: 3569
diff changeset
38 "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B
8600f40003de mmx opt
michael
parents: 3569
diff changeset
39 "pand %%mm3, %%mm1 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
40 "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0
8600f40003de mmx opt
michael
parents: 3569
diff changeset
41 "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0
8600f40003de mmx opt
michael
parents: 3569
diff changeset
42 "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B
8600f40003de mmx opt
michael
parents: 3569
diff changeset
43 "addl $8, %%esi \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
44 " jnz 1b \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
45 "emms \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
46 :: "r" (s16+1280), "r" (f+256)
8600f40003de mmx opt
michael
parents: 3569
diff changeset
47 :"%esi", "%edi", "memory"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
48 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
49 return 5*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
50 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
51
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
52 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
53 int32_t * f = (int32_t *) _f;
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
54 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
55 #ifdef HAVE_SSE
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
56 asm volatile(
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
57 "movl $-1024, %%esi \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
58 "1: \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
59 "cvtps2pi (%1, %%esi), %%mm0 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
60 "cvtps2pi 1024(%1, %%esi), %%mm2\n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
61 "movq %%mm0, %%mm1 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
62 "punpcklwd %%mm2, %%mm0 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
63 "punpckhwd %%mm2, %%mm1 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
64 "movq %%mm0, (%0, %%esi) \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
65 "movq %%mm1, 8(%0, %%esi) \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
66 "addl $16, %%esi \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
67 " jnz 1b \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
68 "emms \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
69 :: "r" (s16+512), "r" (f+256)
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
70 :"%esi", "memory"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
71 );*/
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
72 asm volatile(
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
73 "movl $-1024, %%esi \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
74 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
75 "1: \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
76 "movq (%1, %%esi), %%mm0 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
77 "movq 8(%1, %%esi), %%mm1 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
78 "movq 1024(%1, %%esi), %%mm2 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
79 "movq 1032(%1, %%esi), %%mm3 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
80 "psubd %%mm7, %%mm0 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
81 "psubd %%mm7, %%mm1 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
82 "psubd %%mm7, %%mm2 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
83 "psubd %%mm7, %%mm3 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
84 "packssdw %%mm1, %%mm0 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
85 "packssdw %%mm3, %%mm2 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
86 "movq %%mm0, %%mm1 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
87 "punpcklwd %%mm2, %%mm0 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
88 "punpckhwd %%mm2, %%mm1 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
89 "movq %%mm0, (%0, %%esi) \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
90 "movq %%mm1, 8(%0, %%esi) \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
91 "addl $16, %%esi \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
92 " jnz 1b \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
93 "emms \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
94 :: "r" (s16+512), "r" (f+256)
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
95 :"%esi", "memory"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
96 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
97 return 2*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
98 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
99
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
100 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
101 int32_t * f = (int32_t *) _f;
3654
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
102 asm volatile(
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
103 "movl $-1024, %%esi \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
104 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3654
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
105 "pxor %%mm6, %%mm6 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
106 "movq %%mm7, %%mm5 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
107 "punpckldq %%mm6, %%mm5 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
108 "1: \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
109 "movd (%1, %%esi), %%mm0 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
110 "punpckldq 2048(%1, %%esi), %%mm0\n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
111 "movd 1024(%1, %%esi), %%mm1 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
112 "punpckldq 4(%1, %%esi), %%mm1 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
113 "movd 2052(%1, %%esi), %%mm2 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
114 "movq %%mm7, %%mm3 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
115 "punpckldq 1028(%1, %%esi), %%mm3\n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
116 "movd 8(%1, %%esi), %%mm4 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
117 "punpckldq 2056(%1, %%esi), %%mm4\n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
118 "leal (%%esi, %%esi, 4), %%edi \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
119 "sarl $1, %%edi \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
120 "psubd %%mm7, %%mm0 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
121 "psubd %%mm7, %%mm1 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
122 "psubd %%mm5, %%mm2 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
123 "psubd %%mm7, %%mm3 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
124 "psubd %%mm7, %%mm4 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
125 "packssdw %%mm6, %%mm0 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
126 "packssdw %%mm2, %%mm1 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
127 "packssdw %%mm4, %%mm3 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
128 "movq %%mm0, (%0, %%edi) \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
129 "movq %%mm1, 8(%0, %%edi) \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
130 "movq %%mm3, 16(%0, %%edi) \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
131
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
132 "movd 1032(%1, %%esi), %%mm1 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
133 "punpckldq 12(%1, %%esi), %%mm1\n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
134 "movd 2060(%1, %%esi), %%mm2 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
135 "movq %%mm7, %%mm3 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
136 "punpckldq 1036(%1, %%esi), %%mm3\n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
137 "pxor %%mm0, %%mm0 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
138 "psubd %%mm7, %%mm1 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
139 "psubd %%mm5, %%mm2 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
140 "psubd %%mm7, %%mm3 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
141 "packssdw %%mm1, %%mm0 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
142 "packssdw %%mm3, %%mm2 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
143 "movq %%mm0, 24(%0, %%edi) \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
144 "movq %%mm2, 32(%0, %%edi) \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
145
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
146 "addl $16, %%esi \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
147 " jnz 1b \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
148 "emms \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
149 :: "r" (s16+1280), "r" (f+256)
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
150 :"%esi", "%edi", "memory"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
151 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
152 return 5*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
153 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
154
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
155 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
156 int32_t * f = (int32_t *) _f;
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
157 asm volatile(
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
158 "movl $-1024, %%esi \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
159 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
160 "1: \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
161 "movq (%1, %%esi), %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
162 "movq 8(%1, %%esi), %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
163 "movq 1024(%1, %%esi), %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
164 "movq 1032(%1, %%esi), %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
165 "psubd %%mm7, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
166 "psubd %%mm7, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
167 "psubd %%mm7, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
168 "psubd %%mm7, %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
169 "packssdw %%mm1, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
170 "packssdw %%mm3, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
171 "movq 2048(%1, %%esi), %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
172 "movq 2056(%1, %%esi), %%mm4 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
173 "movq 3072(%1, %%esi), %%mm5 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
174 "movq 3080(%1, %%esi), %%mm6 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
175 "psubd %%mm7, %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
176 "psubd %%mm7, %%mm4 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
177 "psubd %%mm7, %%mm5 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
178 "psubd %%mm7, %%mm6 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
179 "packssdw %%mm4, %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
180 "packssdw %%mm6, %%mm5 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
181 "movq %%mm0, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
182 "movq %%mm3, %%mm4 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
183 "punpcklwd %%mm2, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
184 "punpckhwd %%mm2, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
185 "punpcklwd %%mm5, %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
186 "punpckhwd %%mm5, %%mm4 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
187 "movq %%mm0, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
188 "movq %%mm1, %%mm5 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
189 "punpckldq %%mm3, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
190 "punpckhdq %%mm3, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
191 "punpckldq %%mm4, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
192 "punpckhdq %%mm4, %%mm5 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
193 "movq %%mm0, (%0, %%esi,2) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
194 "movq %%mm2, 8(%0, %%esi,2) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
195 "movq %%mm1, 16(%0, %%esi,2) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
196 "movq %%mm5, 24(%0, %%esi,2) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
197 "addl $16, %%esi \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
198 " jnz 1b \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
199 "emms \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
200 :: "r" (s16+1024), "r" (f+256)
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
201 :"%esi", "memory"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
202 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
203 return 4*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
204 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
205
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
206 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
207 int32_t * f = (int32_t *) _f;
3653
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
208 asm volatile(
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
209 "movl $-1024, %%esi \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
210 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3653
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
211 "1: \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
212 "movd (%1, %%esi), %%mm0 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
213 "punpckldq 2048(%1, %%esi), %%mm0\n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
214 "movd 3072(%1, %%esi), %%mm1 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
215 "punpckldq 4096(%1, %%esi), %%mm1\n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
216 "movd 1024(%1, %%esi), %%mm2 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
217 "punpckldq 4(%1, %%esi), %%mm2 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
218 "movd 2052(%1, %%esi), %%mm3 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
219 "punpckldq 3076(%1, %%esi), %%mm3\n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
220 "movd 4100(%1, %%esi), %%mm4 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
221 "punpckldq 1028(%1, %%esi), %%mm4\n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
222 "movd 8(%1, %%esi), %%mm5 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
223 "punpckldq 2056(%1, %%esi), %%mm5\n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
224 "leal (%%esi, %%esi, 4), %%edi \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
225 "sarl $1, %%edi \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
226 "psubd %%mm7, %%mm0 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
227 "psubd %%mm7, %%mm1 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
228 "psubd %%mm7, %%mm2 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
229 "psubd %%mm7, %%mm3 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
230 "psubd %%mm7, %%mm4 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
231 "psubd %%mm7, %%mm5 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
232 "packssdw %%mm1, %%mm0 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
233 "packssdw %%mm3, %%mm2 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
234 "packssdw %%mm5, %%mm4 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
235 "movq %%mm0, (%0, %%edi) \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
236 "movq %%mm2, 8(%0, %%edi) \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
237 "movq %%mm4, 16(%0, %%edi) \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
238
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
239 "movd 3080(%1, %%esi), %%mm0 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
240 "punpckldq 4104(%1, %%esi), %%mm0\n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
241 "movd 1032(%1, %%esi), %%mm1 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
242 "punpckldq 12(%1, %%esi), %%mm1\n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
243 "movd 2060(%1, %%esi), %%mm2 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
244 "punpckldq 3084(%1, %%esi), %%mm2\n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
245 "movd 4108(%1, %%esi), %%mm3 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
246 "punpckldq 1036(%1, %%esi), %%mm3\n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
247 "psubd %%mm7, %%mm0 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
248 "psubd %%mm7, %%mm1 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
249 "psubd %%mm7, %%mm2 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
250 "psubd %%mm7, %%mm3 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
251 "packssdw %%mm1, %%mm0 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
252 "packssdw %%mm3, %%mm2 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
253 "movq %%mm0, 24(%0, %%edi) \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
254 "movq %%mm2, 32(%0, %%edi) \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
255
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
256 "addl $16, %%esi \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
257 " jnz 1b \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
258 "emms \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
259 :: "r" (s16+1280), "r" (f+256)
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
260 :"%esi", "%edi", "memory"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
261 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
262 return 5*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
263 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
264
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
265 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
266 int32_t * f = (int32_t *) _f;
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
267 asm volatile(
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
268 "movl $-1024, %%esi \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
269 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
270 "pxor %%mm6, %%mm6 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
271 "1: \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
272 "movq 1024(%1, %%esi), %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
273 "movq 1032(%1, %%esi), %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
274 "movq (%1, %%esi), %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
275 "movq 8(%1, %%esi), %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
276 "psubd %%mm7, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
277 "psubd %%mm7, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
278 "psubd %%mm7, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
279 "psubd %%mm7, %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
280 "packssdw %%mm1, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
281 "packssdw %%mm3, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
282 "movq %%mm0, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
283 "punpcklwd %%mm2, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
284 "punpckhwd %%mm2, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
285 "leal (%%esi, %%esi, 2), %%edi \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
286 "movq %%mm6, (%0, %%edi) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
287 "movd %%mm0, 8(%0, %%edi) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
288 "punpckhdq %%mm0, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
289 "movq %%mm6, 12(%0, %%edi) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
290 "movd %%mm0, 20(%0, %%edi) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
291 "movq %%mm6, 24(%0, %%edi) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
292 "movd %%mm1, 32(%0, %%edi) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
293 "punpckhdq %%mm1, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
294 "movq %%mm6, 36(%0, %%edi) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
295 "movd %%mm1, 44(%0, %%edi) \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
296 "addl $16, %%esi \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
297 " jnz 1b \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
298 "emms \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
299 :: "r" (s16+1536), "r" (f+256)
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
300 :"%esi", "%edi", "memory"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
301 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
302 return 6*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
303 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
304
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
305 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
306 int32_t * f = (int32_t *) _f;
3576
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
307 asm volatile(
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
308 "movl $-1024, %%esi \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
309 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3576
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
310 "pxor %%mm6, %%mm6 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
311 "1: \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
312 "movq 1024(%1, %%esi), %%mm0 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
313 "movq 2048(%1, %%esi), %%mm1 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
314 "movq (%1, %%esi), %%mm5 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
315 "psubd %%mm7, %%mm0 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
316 "psubd %%mm7, %%mm1 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
317 "psubd %%mm7, %%mm5 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
318 "leal (%%esi, %%esi, 2), %%edi \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
319
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
320 "pxor %%mm4, %%mm4 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
321 "packssdw %%mm5, %%mm0 \n\t" // FfAa
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
322 "packssdw %%mm4, %%mm1 \n\t" // 00Bb
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
323 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
324 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
325 "movq %%mm0, %%mm1 \n\t" // BAba
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
326 "punpckldq %%mm4, %%mm3 \n\t" // f0XX
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
327 "punpckldq %%mm6, %%mm0 \n\t" // 00ba
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
328 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
329
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
330 "movq %%mm0, (%0, %%edi) \n\t" // 00ba
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
331 "punpckhdq %%mm4, %%mm0 \n\t" // F000
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
332 "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
333 "movq %%mm0, 16(%0, %%edi) \n\t" // F000
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
334 "addl $8, %%esi \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
335 " jnz 1b \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
336 "emms \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
337 :: "r" (s16+1536), "r" (f+256)
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
338 :"%esi", "%edi", "memory"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
339 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
340 return 6*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
341 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
342
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
343 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
344 int32_t * f = (int32_t *) _f;
3578
79759c05911e mmx opt
michael
parents: 3577
diff changeset
345 asm volatile(
79759c05911e mmx opt
michael
parents: 3577
diff changeset
346 "movl $-1024, %%esi \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
347 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3578
79759c05911e mmx opt
michael
parents: 3577
diff changeset
348 "pxor %%mm6, %%mm6 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
349 "1: \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
350 "movq 1024(%1, %%esi), %%mm0 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
351 "movq 3072(%1, %%esi), %%mm1 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
352 "movq 2048(%1, %%esi), %%mm4 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
353 "movq (%1, %%esi), %%mm5 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
354 "psubd %%mm7, %%mm0 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
355 "psubd %%mm7, %%mm1 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
356 "psubd %%mm7, %%mm4 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
357 "psubd %%mm7, %%mm5 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
358 "leal (%%esi, %%esi, 2), %%edi \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
359
79759c05911e mmx opt
michael
parents: 3577
diff changeset
360 "packssdw %%mm4, %%mm0 \n\t" // EeAa
79759c05911e mmx opt
michael
parents: 3577
diff changeset
361 "packssdw %%mm5, %%mm1 \n\t" // FfBb
79759c05911e mmx opt
michael
parents: 3577
diff changeset
362 "movq %%mm0, %%mm2 \n\t" // EeAa
79759c05911e mmx opt
michael
parents: 3577
diff changeset
363 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
79759c05911e mmx opt
michael
parents: 3577
diff changeset
364 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe
79759c05911e mmx opt
michael
parents: 3577
diff changeset
365 "movq %%mm0, %%mm1 \n\t" // BAba
79759c05911e mmx opt
michael
parents: 3577
diff changeset
366 "punpckldq %%mm6, %%mm0 \n\t" // 00ba
79759c05911e mmx opt
michael
parents: 3577
diff changeset
367 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
79759c05911e mmx opt
michael
parents: 3577
diff changeset
368
79759c05911e mmx opt
michael
parents: 3577
diff changeset
369 "movq %%mm0, (%0, %%edi) \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
370 "punpckhdq %%mm2, %%mm0 \n\t" // FE00
79759c05911e mmx opt
michael
parents: 3577
diff changeset
371 "punpckldq %%mm1, %%mm2 \n\t" // BAfe
79759c05911e mmx opt
michael
parents: 3577
diff changeset
372 "movq %%mm2, 8(%0, %%edi) \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
373 "movq %%mm0, 16(%0, %%edi) \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
374 "addl $8, %%esi \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
375 " jnz 1b \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
376 "emms \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
377 :: "r" (s16+1536), "r" (f+256)
79759c05911e mmx opt
michael
parents: 3577
diff changeset
378 :"%esi", "%edi", "memory"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
379 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
380 return 6*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
381 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
382
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
383 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
384 int32_t * f = (int32_t *) _f;
3577
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
385 asm volatile(
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
386 "movl $-1024, %%esi \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
387 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3577
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
388 // "pxor %%mm6, %%mm6 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
389 "1: \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
390 "movq 1024(%1, %%esi), %%mm0 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
391 "movq 2048(%1, %%esi), %%mm1 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
392 "movq 3072(%1, %%esi), %%mm2 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
393 "movq 4096(%1, %%esi), %%mm3 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
394 "movq (%1, %%esi), %%mm5 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
395 "psubd %%mm7, %%mm0 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
396 "psubd %%mm7, %%mm1 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
397 "psubd %%mm7, %%mm2 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
398 "psubd %%mm7, %%mm3 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
399 "psubd %%mm7, %%mm5 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
400 "leal (%%esi, %%esi, 2), %%edi \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
401
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
402 "packssdw %%mm2, %%mm0 \n\t" // CcAa
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
403 "packssdw %%mm3, %%mm1 \n\t" // DdBb
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
404 "packssdw %%mm5, %%mm5 \n\t" // FfFf
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
405 "movq %%mm0, %%mm2 \n\t" // CcAa
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
406 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
407 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
408 "pxor %%mm4, %%mm4 \n\t" // 0000
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
409 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
410 "movq %%mm0, %%mm1 \n\t" // BAba
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
411 "movq %%mm4, %%mm3 \n\t" // F0f0
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
412 "punpckldq %%mm2, %%mm0 \n\t" // dcba
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
413 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
414 "punpckldq %%mm1, %%mm4 \n\t" // BAf0
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
415 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
416
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
417 "movq %%mm0, (%0, %%edi) \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
418 "movq %%mm4, 8(%0, %%edi) \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
419 "movq %%mm2, 16(%0, %%edi) \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
420 "addl $8, %%esi \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
421 " jnz 1b \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
422 "emms \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
423 :: "r" (s16+1536), "r" (f+256)
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
424 :"%esi", "%edi", "memory"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
425 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
426 return 6*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
427 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
428
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
429 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
430 int32_t * f = (int32_t *) _f;
3575
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
431 asm volatile(
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
432 "movl $-1024, %%esi \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
433 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3575
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
434 // "pxor %%mm6, %%mm6 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
435 "1: \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
436 "movq 1024(%1, %%esi), %%mm0 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
437 "movq 3072(%1, %%esi), %%mm1 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
438 "movq 4096(%1, %%esi), %%mm2 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
439 "movq 5120(%1, %%esi), %%mm3 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
440 "movq 2048(%1, %%esi), %%mm4 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
441 "movq (%1, %%esi), %%mm5 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
442 "psubd %%mm7, %%mm0 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
443 "psubd %%mm7, %%mm1 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
444 "psubd %%mm7, %%mm2 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
445 "psubd %%mm7, %%mm3 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
446 "psubd %%mm7, %%mm4 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
447 "psubd %%mm7, %%mm5 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
448 "leal (%%esi, %%esi, 2), %%edi \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
449
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
450 "packssdw %%mm2, %%mm0 \n\t" // CcAa
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
451 "packssdw %%mm3, %%mm1 \n\t" // DdBb
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
452 "packssdw %%mm4, %%mm4 \n\t" // EeEe
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
453 "packssdw %%mm5, %%mm5 \n\t" // FfFf
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
454 "movq %%mm0, %%mm2 \n\t" // CcAa
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
455 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
456 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
457 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
458 "movq %%mm0, %%mm1 \n\t" // BAba
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
459 "movq %%mm4, %%mm3 \n\t" // FEfe
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
460 "punpckldq %%mm2, %%mm0 \n\t" // dcba
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
461 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
462 "punpckldq %%mm1, %%mm4 \n\t" // BAfe
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
463 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
464
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
465 "movq %%mm0, (%0, %%edi) \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
466 "movq %%mm4, 8(%0, %%edi) \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
467 "movq %%mm2, 16(%0, %%edi) \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
468 "addl $8, %%esi \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
469 " jnz 1b \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
470 "emms \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
471 :: "r" (s16+1536), "r" (f+256)
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
472 :"%esi", "%edi", "memory"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
473 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
474 return 6*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
475 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
476
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
477
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
478 static void* a52_resample_MMX(int flags, int ch){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
479 switch (flags) {
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
480 case A52_MONO:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
481 if(ch==5) return a52_resample_MONO_to_5_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
482 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
483 case A52_CHANNEL:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
484 case A52_STEREO:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
485 case A52_DOLBY:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
486 if(ch==2) return a52_resample_STEREO_to_2_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
487 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
488 case A52_3F:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
489 if(ch==5) return a52_resample_3F_to_5_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
490 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
491 case A52_2F2R:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
492 if(ch==4) return a52_resample_2F_2R_to_4_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
493 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
494 case A52_3F2R:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
495 if(ch==5) return a52_resample_3F_2R_to_5_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
496 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
497 case A52_MONO | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
498 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
499 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
500 case A52_CHANNEL | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
501 case A52_STEREO | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
502 case A52_DOLBY | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
503 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
504 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
505 case A52_3F | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
506 if(ch==6) return a52_resample_3F_LFE_to_6_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
507 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
508 case A52_2F2R | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
509 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
510 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
511 case A52_3F2R | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
512 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX;
3412
21d65a4ae3c9 resample.c added - float->int conversion and channel ordering
arpi
parents:
diff changeset
513 break;
21d65a4ae3c9 resample.c added - float->int conversion and channel ordering
arpi
parents:
diff changeset
514 }
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
515 return NULL;
3626
e22ff7ebdc05 runtime cpu detection for the resample stuff
michael
parents: 3578
diff changeset
516 }
e22ff7ebdc05 runtime cpu detection for the resample stuff
michael
parents: 3578
diff changeset
517
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
518