Mercurial > mplayer.hg
comparison liba52/resample_c.c @ 3569:d14494d84c29
MMX opt
author | michael |
---|---|
date | Mon, 17 Dec 2001 21:19:57 +0000 |
parents | 9e1e88b3ca18 |
children | 8600f40003de |
comparison
equal
deleted
inserted
replaced
3568:454dfb7787d7 | 3569:d14494d84c29 |
---|---|
4 | 4 |
5 // a52_resample_init should find the requested converter (from type flags -> | 5 // a52_resample_init should find the requested converter (from type flags -> |
6 // given number of channels) and set up some function pointers... | 6 // given number of channels) and set up some function pointers... |
7 | 7 |
8 // a52_resample() should do the conversion. | 8 // a52_resample() should do the conversion. |
9 | |
10 // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) | |
11 | |
12 /* optimization TODO / NOTES | |
13 movntq is slightly faster (0.5% with the current test.c benchmark) | |
14 (but thats just test.c so that needs to be testd in reallity) | |
15 and it would mean (C / MMX2 / MMX / 3DNOW) versions | |
16 */ | |
9 | 17 |
10 #include <inttypes.h> | 18 #include <inttypes.h> |
11 #include "a52.h" | 19 #include "a52.h" |
12 #include "../config.h" | 20 #include "../config.h" |
13 | 21 |
106 s16[5*i+2] = s16[5*i+3] = 0; | 114 s16[5*i+2] = s16[5*i+3] = 0; |
107 s16[5*i+4] = convert (f[i+256]); | 115 s16[5*i+4] = convert (f[i+256]); |
108 } | 116 } |
109 break; | 117 break; |
110 case A52_2F2R: | 118 case A52_2F2R: |
119 #ifdef HAVE_MMX | |
120 asm volatile( | |
121 "movl $-1024, %%esi \n\t" | |
122 "movq magicF2W, %%mm7 \n\t" | |
123 "1: \n\t" | |
124 "movq (%1, %%esi), %%mm0 \n\t" | |
125 "movq 8(%1, %%esi), %%mm1 \n\t" | |
126 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
127 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
128 "psubd %%mm7, %%mm0 \n\t" | |
129 "psubd %%mm7, %%mm1 \n\t" | |
130 "psubd %%mm7, %%mm2 \n\t" | |
131 "psubd %%mm7, %%mm3 \n\t" | |
132 "packssdw %%mm1, %%mm0 \n\t" | |
133 "packssdw %%mm3, %%mm2 \n\t" | |
134 "movq 2048(%1, %%esi), %%mm3 \n\t" | |
135 "movq 2056(%1, %%esi), %%mm4 \n\t" | |
136 "movq 3072(%1, %%esi), %%mm5 \n\t" | |
137 "movq 3080(%1, %%esi), %%mm6 \n\t" | |
138 "psubd %%mm7, %%mm3 \n\t" | |
139 "psubd %%mm7, %%mm4 \n\t" | |
140 "psubd %%mm7, %%mm5 \n\t" | |
141 "psubd %%mm7, %%mm6 \n\t" | |
142 "packssdw %%mm4, %%mm3 \n\t" | |
143 "packssdw %%mm6, %%mm5 \n\t" | |
144 "movq %%mm0, %%mm1 \n\t" | |
145 "movq %%mm3, %%mm4 \n\t" | |
146 "punpcklwd %%mm2, %%mm0 \n\t" | |
147 "punpckhwd %%mm2, %%mm1 \n\t" | |
148 "punpcklwd %%mm5, %%mm3 \n\t" | |
149 "punpckhwd %%mm5, %%mm4 \n\t" | |
150 "movq %%mm0, %%mm2 \n\t" | |
151 "movq %%mm1, %%mm5 \n\t" | |
152 "punpckldq %%mm3, %%mm0 \n\t" | |
153 "punpckhdq %%mm3, %%mm2 \n\t" | |
154 "punpckldq %%mm4, %%mm1 \n\t" | |
155 "punpckhdq %%mm4, %%mm5 \n\t" | |
156 "movq %%mm0, (%0, %%esi,2) \n\t" | |
157 "movq %%mm2, 8(%0, %%esi,2) \n\t" | |
158 "movq %%mm1, 16(%0, %%esi,2) \n\t" | |
159 "movq %%mm5, 24(%0, %%esi,2) \n\t" | |
160 "addl $16, %%esi \n\t" | |
161 " jnz 1b \n\t" | |
162 "emms \n\t" | |
163 :: "r" (s16+1024), "r" (f+256) | |
164 :"%esi", "memory" | |
165 ); | |
166 #else | |
111 for (i = 0; i < 256; i++) { | 167 for (i = 0; i < 256; i++) { |
112 s16[4*i] = convert (f[i]); | 168 s16[4*i] = convert (f[i]); |
113 s16[4*i+1] = convert (f[i+256]); | 169 s16[4*i+1] = convert (f[i+256]); |
114 s16[4*i+2] = convert (f[i+512]); | 170 s16[4*i+2] = convert (f[i+512]); |
115 s16[4*i+3] = convert (f[i+768]); | 171 s16[4*i+3] = convert (f[i+768]); |
116 } | 172 } |
173 #endif | |
117 break; | 174 break; |
118 case A52_3F2R: | 175 case A52_3F2R: |
119 for (i = 0; i < 256; i++) { | 176 for (i = 0; i < 256; i++) { |
120 s16[5*i] = convert (f[i]); | 177 s16[5*i] = convert (f[i]); |
121 s16[5*i+1] = convert (f[i+512]); | 178 s16[5*i+1] = convert (f[i+512]); |
123 s16[5*i+3] = convert (f[i+1024]); | 180 s16[5*i+3] = convert (f[i+1024]); |
124 s16[5*i+4] = convert (f[i+256]); | 181 s16[5*i+4] = convert (f[i+256]); |
125 } | 182 } |
126 break; | 183 break; |
127 case A52_MONO | A52_LFE: | 184 case A52_MONO | A52_LFE: |
185 #ifdef HAVE_MMX | |
186 asm volatile( | |
187 "movl $-1024, %%esi \n\t" | |
188 "movq magicF2W, %%mm7 \n\t" | |
189 "pxor %%mm6, %%mm6 \n\t" | |
190 "1: \n\t" | |
191 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
192 "movq 1032(%1, %%esi), %%mm1 \n\t" | |
193 "movq (%1, %%esi), %%mm2 \n\t" | |
194 "movq 8(%1, %%esi), %%mm3 \n\t" | |
195 "psubd %%mm7, %%mm0 \n\t" | |
196 "psubd %%mm7, %%mm1 \n\t" | |
197 "psubd %%mm7, %%mm2 \n\t" | |
198 "psubd %%mm7, %%mm3 \n\t" | |
199 "packssdw %%mm1, %%mm0 \n\t" | |
200 "packssdw %%mm3, %%mm2 \n\t" | |
201 "movq %%mm0, %%mm1 \n\t" | |
202 "punpcklwd %%mm2, %%mm0 \n\t" | |
203 "punpckhwd %%mm2, %%mm1 \n\t" | |
204 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
205 "movq %%mm6, (%0, %%edi) \n\t" | |
206 "movd %%mm0, 8(%0, %%edi) \n\t" | |
207 "punpckhdq %%mm0, %%mm0 \n\t" | |
208 "movq %%mm6, 12(%0, %%edi) \n\t" | |
209 "movd %%mm0, 20(%0, %%edi) \n\t" | |
210 "movq %%mm6, 24(%0, %%edi) \n\t" | |
211 "movd %%mm1, 32(%0, %%edi) \n\t" | |
212 "punpckhdq %%mm1, %%mm1 \n\t" | |
213 "movq %%mm6, 36(%0, %%edi) \n\t" | |
214 "movd %%mm1, 44(%0, %%edi) \n\t" | |
215 "addl $16, %%esi \n\t" | |
216 " jnz 1b \n\t" | |
217 "emms \n\t" | |
218 :: "r" (s16+1536), "r" (f+256) | |
219 :"%esi", "%edi", "memory" | |
220 ); | |
221 #else | |
128 for (i = 0; i < 256; i++) { | 222 for (i = 0; i < 256; i++) { |
129 s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0; | 223 s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0; |
130 s16[6*i+4] = convert (f[i+256]); | 224 s16[6*i+4] = convert (f[i+256]); |
131 s16[6*i+5] = convert (f[i]); | 225 s16[6*i+5] = convert (f[i]); |
132 } | 226 } |
227 #endif | |
133 break; | 228 break; |
134 case A52_CHANNEL | A52_LFE: | 229 case A52_CHANNEL | A52_LFE: |
135 case A52_STEREO | A52_LFE: | 230 case A52_STEREO | A52_LFE: |
136 case A52_DOLBY | A52_LFE: | 231 case A52_DOLBY | A52_LFE: |
137 for (i = 0; i < 256; i++) { | 232 for (i = 0; i < 256; i++) { |