annotate liba52/resample_mmx.c @ 30017:7119354805e7

Use on-stack subtitle struct for temporary storage for passing subtitles on for rendering by libass. This avoids mangling the static subtitle struct that is supposed to contain the subtitles that will actually be displayed and it also minimally reduces memory usage by freeing the subtitle lines again as early as possible.
author reimar
date Fri, 18 Dec 2009 19:29:33 +0000
parents 25337a2147e7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25483
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
1 /*
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
2 * resample_mmx.c
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
3 * Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
4 *
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
5 * This file is part of a52dec, a free ATSC A-52 stream decoder.
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
6 * See http://liba52.sourceforge.net/ for updates.
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
7 *
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
8 * File added for use with MPlayer and not part of original a52dec.
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
9 *
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
10 * a52dec is free software; you can redistribute it and/or modify
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
11 * it under the terms of the GNU General Public License as published by
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
12 * the Free Software Foundation; either version 2 of the License, or
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
13 * (at your option) any later version.
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
14 *
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
15 * a52dec is distributed in the hope that it will be useful,
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
18 * GNU General Public License for more details.
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
19 *
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
20 * You should have received a copy of the GNU General Public License
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
21 * along with this program; if not, write to the Free Software
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
170fc6d9dfa1 Add proper copyright/license headers.
diego
parents: 25480
diff changeset
23 */
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
24
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
25 /* optimization TODO / NOTES
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
26 * movntq is slightly faster (0.5% with the current test.c benchmark)
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
27 * (but that is just test.c so that needs to be tested in reality)
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
28 * and it would mean (C / MMX2 / MMX / 3DNOW) versions.
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
29 */
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
30
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
31 #include "a52_internal.h"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
32
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
33
12303
f881c918739b attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents: 8123
diff changeset
34 static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL;
f881c918739b attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents: 8123
diff changeset
35 static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL;
f881c918739b attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents: 8123
diff changeset
36 static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL;
f881c918739b attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents: 8123
diff changeset
37 static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL;
3412
21d65a4ae3c9 resample.c added - float->int conversion and channel ordering
arpi
parents:
diff changeset
38
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
39 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){
3412
21d65a4ae3c9 resample.c added - float->int conversion and channel ordering
arpi
parents:
diff changeset
40 int32_t * f = (int32_t *) _f;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
41 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
42 "mov $-512, %%"REG_S" \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
43 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
44 "movq "MANGLE(wm1100)", %%mm3 \n\t"
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
45 "movq "MANGLE(wm0101)", %%mm4 \n\t"
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
46 "movq "MANGLE(wm1010)", %%mm5 \n\t"
3574
8600f40003de mmx opt
michael
parents: 3569
diff changeset
47 "pxor %%mm6, %%mm6 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
48 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
49 "movq (%1, %%"REG_S", 2), %%mm0 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
50 "movq 8(%1, %%"REG_S", 2), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
51 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
3574
8600f40003de mmx opt
michael
parents: 3569
diff changeset
52 "psubd %%mm7, %%mm0 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
53 "psubd %%mm7, %%mm1 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
54 "packssdw %%mm1, %%mm0 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
55 "movq %%mm0, %%mm1 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
56 "pand %%mm4, %%mm0 \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
57 "pand %%mm5, %%mm1 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
58 "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
59 "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0
3574
8600f40003de mmx opt
michael
parents: 3569
diff changeset
60 "pand %%mm3, %%mm0 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
61 "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
62 "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B
3574
8600f40003de mmx opt
michael
parents: 3569
diff changeset
63 "pand %%mm3, %%mm1 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
64 "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
65 "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
66 "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
67 "add $8, %%"REG_S" \n\t"
3574
8600f40003de mmx opt
michael
parents: 3569
diff changeset
68 " jnz 1b \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
69 "emms \n\t"
8600f40003de mmx opt
michael
parents: 3569
diff changeset
70 :: "r" (s16+1280), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
71 :"%"REG_S, "%"REG_D, "memory"
3574
8600f40003de mmx opt
michael
parents: 3569
diff changeset
72 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
73 return 5*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
74 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
75
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
76 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
77 int32_t * f = (int32_t *) _f;
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
78 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
79 #if HAVE_SSE
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
80 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
81 "mov $-1024, %%"REG_S" \n\t"
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
82 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
83 "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
84 "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t"
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
85 "movq %%mm0, %%mm1 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
86 "punpcklwd %%mm2, %%mm0 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
87 "punpckhwd %%mm2, %%mm1 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
88 "movq %%mm0, (%0, %%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
89 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
90 "add $16, %%"REG_S" \n\t"
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
91 " jnz 1b \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
92 "emms \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
93 :: "r" (s16+512), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
94 :"%"REG_S, "memory"
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
95 );*/
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
96 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
97 "mov $-1024, %%"REG_S" \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
98 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
99 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
100 "movq (%1, %%"REG_S"), %%mm0 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
101 "movq 8(%1, %%"REG_S"), %%mm1 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
102 "movq 1024(%1, %%"REG_S"), %%mm2\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
103 "movq 1032(%1, %%"REG_S"), %%mm3\n\t"
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
104 "psubd %%mm7, %%mm0 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
105 "psubd %%mm7, %%mm1 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
106 "psubd %%mm7, %%mm2 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
107 "psubd %%mm7, %%mm3 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
108 "packssdw %%mm1, %%mm0 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
109 "packssdw %%mm3, %%mm2 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
110 "movq %%mm0, %%mm1 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
111 "punpcklwd %%mm2, %%mm0 \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
112 "punpckhwd %%mm2, %%mm1 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
113 "movq %%mm0, (%0, %%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
114 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
115 "add $16, %%"REG_S" \n\t"
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
116 " jnz 1b \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
117 "emms \n\t"
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
118 :: "r" (s16+512), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
119 :"%"REG_S, "memory"
3567
9e1e88b3ca18 mmx opt
michael
parents: 3412
diff changeset
120 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
121 return 2*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
122 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
123
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
124 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
125 int32_t * f = (int32_t *) _f;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
126 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
127 "mov $-1024, %%"REG_S" \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
128 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3654
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
129 "pxor %%mm6, %%mm6 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
130 "movq %%mm7, %%mm5 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
131 "punpckldq %%mm6, %%mm5 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
132 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
133 "movd (%1, %%"REG_S"), %%mm0 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
134 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
135 "movd 1024(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
136 "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
137 "movd 2052(%1, %%"REG_S"), %%mm2\n\t"
3654
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
138 "movq %%mm7, %%mm3 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
139 "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
140 "movd 8(%1, %%"REG_S"), %%mm4 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
141 "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
142 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
143 "sar $1, %%"REG_D" \n\t"
3654
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
144 "psubd %%mm7, %%mm0 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
145 "psubd %%mm7, %%mm1 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
146 "psubd %%mm5, %%mm2 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
147 "psubd %%mm7, %%mm3 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
148 "psubd %%mm7, %%mm4 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
149 "packssdw %%mm6, %%mm0 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
150 "packssdw %%mm2, %%mm1 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
151 "packssdw %%mm4, %%mm3 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
152 "movq %%mm0, (%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
153 "movq %%mm1, 8(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
154 "movq %%mm3, 16(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
155 "movd 1032(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
156 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
157 "movd 2060(%1, %%"REG_S"), %%mm2\n\t"
3654
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
158 "movq %%mm7, %%mm3 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
159 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t"
3654
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
160 "pxor %%mm0, %%mm0 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
161 "psubd %%mm7, %%mm1 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
162 "psubd %%mm5, %%mm2 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
163 "psubd %%mm7, %%mm3 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
164 "packssdw %%mm1, %%mm0 \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
165 "packssdw %%mm3, %%mm2 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
166 "movq %%mm0, 24(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
167 "movq %%mm2, 32(%0, %%"REG_D") \n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
168
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
169 "add $16, %%"REG_S" \n\t"
3654
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
170 " jnz 1b \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
171 "emms \n\t"
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
172 :: "r" (s16+1280), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
173 :"%"REG_S, "%"REG_D, "memory"
3654
33c3cff374a1 sse optimize of 3F
michael
parents: 3653
diff changeset
174 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
175 return 5*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
176 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
177
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
178 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
179 int32_t * f = (int32_t *) _f;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
180 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
181 "mov $-1024, %%"REG_S" \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
182 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
183 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
184 "movq (%1, %%"REG_S"), %%mm0 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
185 "movq 8(%1, %%"REG_S"), %%mm1 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
186 "movq 1024(%1, %%"REG_S"), %%mm2\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
187 "movq 1032(%1, %%"REG_S"), %%mm3\n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
188 "psubd %%mm7, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
189 "psubd %%mm7, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
190 "psubd %%mm7, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
191 "psubd %%mm7, %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
192 "packssdw %%mm1, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
193 "packssdw %%mm3, %%mm2 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
194 "movq 2048(%1, %%"REG_S"), %%mm3\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
195 "movq 2056(%1, %%"REG_S"), %%mm4\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
196 "movq 3072(%1, %%"REG_S"), %%mm5\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
197 "movq 3080(%1, %%"REG_S"), %%mm6\n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
198 "psubd %%mm7, %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
199 "psubd %%mm7, %%mm4 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
200 "psubd %%mm7, %%mm5 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
201 "psubd %%mm7, %%mm6 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
202 "packssdw %%mm4, %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
203 "packssdw %%mm6, %%mm5 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
204 "movq %%mm0, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
205 "movq %%mm3, %%mm4 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
206 "punpcklwd %%mm2, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
207 "punpckhwd %%mm2, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
208 "punpcklwd %%mm5, %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
209 "punpckhwd %%mm5, %%mm4 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
210 "movq %%mm0, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
211 "movq %%mm1, %%mm5 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
212 "punpckldq %%mm3, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
213 "punpckhdq %%mm3, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
214 "punpckldq %%mm4, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
215 "punpckhdq %%mm4, %%mm5 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
216 "movq %%mm0, (%0, %%"REG_S",2) \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
217 "movq %%mm2, 8(%0, %%"REG_S",2) \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
218 "movq %%mm1, 16(%0, %%"REG_S",2)\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
219 "movq %%mm5, 24(%0, %%"REG_S",2)\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
220 "add $16, %%"REG_S" \n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
221 " jnz 1b \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
222 "emms \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
223 :: "r" (s16+1024), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
224 :"%"REG_S, "memory"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
225 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
226 return 4*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
227 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
228
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
229 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
230 int32_t * f = (int32_t *) _f;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
231 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
232 "mov $-1024, %%"REG_S" \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
233 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3653
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
234 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
235 "movd (%1, %%"REG_S"), %%mm0 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
236 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
237 "movd 3072(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
238 "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
239 "movd 1024(%1, %%"REG_S"), %%mm2\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
240 "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
241 "movd 2052(%1, %%"REG_S"), %%mm3\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
242 "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
243 "movd 4100(%1, %%"REG_S"), %%mm4\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
244 "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
245 "movd 8(%1, %%"REG_S"), %%mm5 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
246 "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
247 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
248 "sar $1, %%"REG_D" \n\t"
3653
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
249 "psubd %%mm7, %%mm0 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
250 "psubd %%mm7, %%mm1 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
251 "psubd %%mm7, %%mm2 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
252 "psubd %%mm7, %%mm3 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
253 "psubd %%mm7, %%mm4 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
254 "psubd %%mm7, %%mm5 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
255 "packssdw %%mm1, %%mm0 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
256 "packssdw %%mm3, %%mm2 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
257 "packssdw %%mm5, %%mm4 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
258 "movq %%mm0, (%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
259 "movq %%mm2, 8(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
260 "movq %%mm4, 16(%0, %%"REG_D") \n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
261
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
262 "movd 3080(%1, %%"REG_S"), %%mm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
263 "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
264 "movd 1032(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
265 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
266 "movd 2060(%1, %%"REG_S"), %%mm2\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
267 "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
268 "movd 4108(%1, %%"REG_S"), %%mm3\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
269 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t"
3653
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
270 "psubd %%mm7, %%mm0 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
271 "psubd %%mm7, %%mm1 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
272 "psubd %%mm7, %%mm2 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
273 "psubd %%mm7, %%mm3 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
274 "packssdw %%mm1, %%mm0 \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
275 "packssdw %%mm3, %%mm2 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
276 "movq %%mm0, 24(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
277 "movq %%mm2, 32(%0, %%"REG_D") \n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
278
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
279 "add $16, %%"REG_S" \n\t"
3653
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
280 " jnz 1b \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
281 "emms \n\t"
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
282 :: "r" (s16+1280), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
283 :"%"REG_S, "%"REG_D, "memory"
3653
b11b15df02ed 3F2R sse optimized
michael
parents: 3626
diff changeset
284 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
285 return 5*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
286 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
287
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
288 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
289 int32_t * f = (int32_t *) _f;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
290 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
291 "mov $-1024, %%"REG_S" \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
292 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
293 "pxor %%mm6, %%mm6 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
294 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
295 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
296 "movq 1032(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
297 "movq (%1, %%"REG_S"), %%mm2 \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
298 "movq 8(%1, %%"REG_S"), %%mm3 \n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
299 "psubd %%mm7, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
300 "psubd %%mm7, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
301 "psubd %%mm7, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
302 "psubd %%mm7, %%mm3 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
303 "packssdw %%mm1, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
304 "packssdw %%mm3, %%mm2 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
305 "movq %%mm0, %%mm1 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
306 "punpcklwd %%mm2, %%mm0 \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
307 "punpckhwd %%mm2, %%mm1 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
308 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
309 "movq %%mm6, (%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
310 "movd %%mm0, 8(%0, %%"REG_D") \n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
311 "punpckhdq %%mm0, %%mm0 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
312 "movq %%mm6, 12(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
313 "movd %%mm0, 20(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
314 "movq %%mm6, 24(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
315 "movd %%mm1, 32(%0, %%"REG_D") \n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
316 "punpckhdq %%mm1, %%mm1 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
317 "movq %%mm6, 36(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
318 "movd %%mm1, 44(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
319 "add $16, %%"REG_S" \n\t"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
320 " jnz 1b \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
321 "emms \n\t"
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
322 :: "r" (s16+1536), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
323 :"%"REG_S, "%"REG_D, "memory"
3569
d14494d84c29 MMX opt
michael
parents: 3567
diff changeset
324 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
325 return 6*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
326 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
327
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
328 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
329 int32_t * f = (int32_t *) _f;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
330 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
331 "mov $-1024, %%"REG_S" \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
332 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3576
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
333 "pxor %%mm6, %%mm6 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
334 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
335 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
336 "movq 2048(%1, %%"REG_S"), %%mm1\n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
337 "movq (%1, %%"REG_S"), %%mm5 \n\t"
3576
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
338 "psubd %%mm7, %%mm0 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
339 "psubd %%mm7, %%mm1 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
340 "psubd %%mm7, %%mm5 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
341 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
342
3576
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
343 "pxor %%mm4, %%mm4 \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
344 "packssdw %%mm5, %%mm0 \n\t" // FfAa
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
345 "packssdw %%mm4, %%mm1 \n\t" // 00Bb
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
346 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
347 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
348 "movq %%mm0, %%mm1 \n\t" // BAba
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
349 "punpckldq %%mm4, %%mm3 \n\t" // f0XX
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
350 "punpckldq %%mm6, %%mm0 \n\t" // 00ba
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
351 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
352
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
353 "movq %%mm0, (%0, %%"REG_D") \n\t" // 00ba
3576
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
354 "punpckhdq %%mm4, %%mm0 \n\t" // F000
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
355 "movq %%mm3, 8(%0, %%"REG_D") \n\t" // BAf0
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
356 "movq %%mm0, 16(%0, %%"REG_D") \n\t" // F000
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
357 "add $8, %%"REG_S" \n\t"
3576
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
358 " jnz 1b \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
359 "emms \n\t"
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
360 :: "r" (s16+1536), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
361 :"%"REG_S, "%"REG_D, "memory"
3576
c282fd9e8534 mmx opt
michael
parents: 3575
diff changeset
362 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
363 return 6*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
364 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
365
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
366 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
367 int32_t * f = (int32_t *) _f;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
368 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
369 "mov $-1024, %%"REG_S" \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
370 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3578
79759c05911e mmx opt
michael
parents: 3577
diff changeset
371 "pxor %%mm6, %%mm6 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
372 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
373 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
374 "movq 3072(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
375 "movq 2048(%1, %%"REG_S"), %%mm4\n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
376 "movq (%1, %%"REG_S"), %%mm5 \n\t"
3578
79759c05911e mmx opt
michael
parents: 3577
diff changeset
377 "psubd %%mm7, %%mm0 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
378 "psubd %%mm7, %%mm1 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
379 "psubd %%mm7, %%mm4 \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
380 "psubd %%mm7, %%mm5 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
381 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
382
3578
79759c05911e mmx opt
michael
parents: 3577
diff changeset
383 "packssdw %%mm4, %%mm0 \n\t" // EeAa
79759c05911e mmx opt
michael
parents: 3577
diff changeset
384 "packssdw %%mm5, %%mm1 \n\t" // FfBb
79759c05911e mmx opt
michael
parents: 3577
diff changeset
385 "movq %%mm0, %%mm2 \n\t" // EeAa
79759c05911e mmx opt
michael
parents: 3577
diff changeset
386 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
79759c05911e mmx opt
michael
parents: 3577
diff changeset
387 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe
79759c05911e mmx opt
michael
parents: 3577
diff changeset
388 "movq %%mm0, %%mm1 \n\t" // BAba
79759c05911e mmx opt
michael
parents: 3577
diff changeset
389 "punpckldq %%mm6, %%mm0 \n\t" // 00ba
79759c05911e mmx opt
michael
parents: 3577
diff changeset
390 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
391
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
392 "movq %%mm0, (%0, %%"REG_D") \n\t"
3578
79759c05911e mmx opt
michael
parents: 3577
diff changeset
393 "punpckhdq %%mm2, %%mm0 \n\t" // FE00
79759c05911e mmx opt
michael
parents: 3577
diff changeset
394 "punpckldq %%mm1, %%mm2 \n\t" // BAfe
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
395 "movq %%mm2, 8(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
396 "movq %%mm0, 16(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
397 "add $8, %%"REG_S" \n\t"
3578
79759c05911e mmx opt
michael
parents: 3577
diff changeset
398 " jnz 1b \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
399 "emms \n\t"
79759c05911e mmx opt
michael
parents: 3577
diff changeset
400 :: "r" (s16+1536), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
401 :"%"REG_S, "%"REG_D, "memory"
3578
79759c05911e mmx opt
michael
parents: 3577
diff changeset
402 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
403 return 6*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
404 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
405
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
406 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
407 int32_t * f = (int32_t *) _f;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
408 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
409 "mov $-1024, %%"REG_S" \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
410 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3577
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
411 // "pxor %%mm6, %%mm6 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
412 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
413 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
414 "movq 2048(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
415 "movq 3072(%1, %%"REG_S"), %%mm2\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
416 "movq 4096(%1, %%"REG_S"), %%mm3\n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
417 "movq (%1, %%"REG_S"), %%mm5 \n\t"
3577
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
418 "psubd %%mm7, %%mm0 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
419 "psubd %%mm7, %%mm1 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
420 "psubd %%mm7, %%mm2 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
421 "psubd %%mm7, %%mm3 \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
422 "psubd %%mm7, %%mm5 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
423 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
424
3577
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
425 "packssdw %%mm2, %%mm0 \n\t" // CcAa
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
426 "packssdw %%mm3, %%mm1 \n\t" // DdBb
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
427 "packssdw %%mm5, %%mm5 \n\t" // FfFf
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
428 "movq %%mm0, %%mm2 \n\t" // CcAa
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
429 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
430 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
431 "pxor %%mm4, %%mm4 \n\t" // 0000
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
432 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
433 "movq %%mm0, %%mm1 \n\t" // BAba
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
434 "movq %%mm4, %%mm3 \n\t" // F0f0
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
435 "punpckldq %%mm2, %%mm0 \n\t" // dcba
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
436 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
437 "punpckldq %%mm1, %%mm4 \n\t" // BAf0
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
438 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
439
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
440 "movq %%mm0, (%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
441 "movq %%mm4, 8(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
442 "movq %%mm2, 16(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
443 "add $8, %%"REG_S" \n\t"
3577
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
444 " jnz 1b \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
445 "emms \n\t"
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
446 :: "r" (s16+1536), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
447 :"%"REG_S, "%"REG_D, "memory"
3577
6bf4dbfb941c mmx opt
michael
parents: 3576
diff changeset
448 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
449 return 6*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
450 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
451
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
452 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
453 int32_t * f = (int32_t *) _f;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25484
diff changeset
454 __asm__ volatile(
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
455 "mov $-1024, %%"REG_S" \n\t"
4247
2dbd637ffe05 mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents: 3909
diff changeset
456 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
3575
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
457 // "pxor %%mm6, %%mm6 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
458 "1: \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
459 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
460 "movq 3072(%1, %%"REG_S"), %%mm1\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
461 "movq 4096(%1, %%"REG_S"), %%mm2\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
462 "movq 5120(%1, %%"REG_S"), %%mm3\n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
463 "movq 2048(%1, %%"REG_S"), %%mm4\n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
464 "movq (%1, %%"REG_S"), %%mm5 \n\t"
3575
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
465 "psubd %%mm7, %%mm0 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
466 "psubd %%mm7, %%mm1 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
467 "psubd %%mm7, %%mm2 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
468 "psubd %%mm7, %%mm3 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
469 "psubd %%mm7, %%mm4 \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
470 "psubd %%mm7, %%mm5 \n\t"
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
471 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
472
3575
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
473 "packssdw %%mm2, %%mm0 \n\t" // CcAa
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
474 "packssdw %%mm3, %%mm1 \n\t" // DdBb
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
475 "packssdw %%mm4, %%mm4 \n\t" // EeEe
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
476 "packssdw %%mm5, %%mm5 \n\t" // FfFf
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
477 "movq %%mm0, %%mm2 \n\t" // CcAa
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
478 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
479 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
480 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
481 "movq %%mm0, %%mm1 \n\t" // BAba
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
482 "movq %%mm4, %%mm3 \n\t" // FEfe
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
483 "punpckldq %%mm2, %%mm0 \n\t" // dcba
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
484 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
485 "punpckldq %%mm1, %%mm4 \n\t" // BAfe
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
486 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC
25484
943f37a4323d cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents: 25483
diff changeset
487
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
488 "movq %%mm0, (%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
489 "movq %%mm4, 8(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
490 "movq %%mm2, 16(%0, %%"REG_D") \n\t"
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
491 "add $8, %%"REG_S" \n\t"
3575
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
492 " jnz 1b \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
493 "emms \n\t"
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
494 :: "r" (s16+1536), "r" (f+256)
16173
d6219ce521e9 liba52 asm optimizations ported to amd64
aurel
parents: 12303
diff changeset
495 :"%"REG_S, "%"REG_D, "memory"
3575
01a2466e035e mmx opt
michael
parents: 3574
diff changeset
496 );
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
497 return 6*256;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
498 }
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
499
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
500
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
501 static void* a52_resample_MMX(int flags, int ch){
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
502 switch (flags) {
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
503 case A52_MONO:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
504 if(ch==5) return a52_resample_MONO_to_5_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
505 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
506 case A52_CHANNEL:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
507 case A52_STEREO:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
508 case A52_DOLBY:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
509 if(ch==2) return a52_resample_STEREO_to_2_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
510 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
511 case A52_3F:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
512 if(ch==5) return a52_resample_3F_to_5_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
513 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
514 case A52_2F2R:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
515 if(ch==4) return a52_resample_2F_2R_to_4_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
516 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
517 case A52_3F2R:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
518 if(ch==5) return a52_resample_3F_2R_to_5_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
519 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
520 case A52_MONO | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
521 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
522 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
523 case A52_CHANNEL | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
524 case A52_STEREO | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
525 case A52_DOLBY | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
526 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
527 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
528 case A52_3F | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
529 if(ch==6) return a52_resample_3F_LFE_to_6_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
530 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
531 case A52_2F2R | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
532 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
533 break;
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
534 case A52_3F2R | A52_LFE:
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
535 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX;
3412
21d65a4ae3c9 resample.c added - float->int conversion and channel ordering
arpi
parents:
diff changeset
536 break;
21d65a4ae3c9 resample.c added - float->int conversion and channel ordering
arpi
parents:
diff changeset
537 }
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
538 return NULL;
3626
e22ff7ebdc05 runtime cpu detection for the resample stuff
michael
parents: 3578
diff changeset
539 }
e22ff7ebdc05 runtime cpu detection for the resample stuff
michael
parents: 3578
diff changeset
540
3909
ef32c8bdee81 c, mmx versions separated. a52 style runtime stuff
arpi
parents: 3908
diff changeset
541