Mercurial > mplayer.hg
comparison libmpcodecs/vf_ilpack.c @ 11645:3837fd1bfa5b
mmx optimizations
author | rfelker |
---|---|
date | Mon, 15 Dec 2003 04:07:17 +0000 |
parents | 3ddfe9316ca9 |
children | 57372aa1d655 |
comparison
equal
deleted
inserted
replaced
11644:2e6fc0ab578f | 11645:3837fd1bfa5b |
---|---|
106 : | 106 : |
107 : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8) | 107 : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8) |
108 : "memory" | 108 : "memory" |
109 ); | 109 ); |
110 pack_nn_C(dst, y, u, v, (w&7)); | 110 pack_nn_C(dst, y, u, v, (w&7)); |
111 } | |
112 | |
113 static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, | |
114 unsigned char *u, unsigned char *v, int w, int us, int vs) | |
115 { | |
116 asm volatile ("" | |
117 "pushl %%ebp \n\t" | |
118 "movl 4(%%edx), %%ebp \n\t" | |
119 "movl (%%edx), %%edx \n\t" | |
120 "pxor %%mm0, %%mm0 \n\t" | |
121 | |
122 ".balign 16 \n\t" | |
123 ".Lli0: \n\t" | |
124 "movq (%%esi), %%mm1 \n\t" | |
125 "movq (%%esi), %%mm2 \n\t" | |
126 "punpcklbw %%mm0, %%mm1 \n\t" | |
127 "punpckhbw %%mm0, %%mm2 \n\t" | |
128 | |
129 "movq (%%eax,%%edx,2), %%mm4 \n\t" | |
130 "movq (%%ebx,%%ebp,2), %%mm6 \n\t" | |
131 "punpcklbw %%mm0, %%mm4 \n\t" | |
132 "punpcklbw %%mm0, %%mm6 \n\t" | |
133 "movq (%%eax), %%mm3 \n\t" | |
134 "movq (%%ebx), %%mm5 \n\t" | |
135 "punpcklbw %%mm0, %%mm3 \n\t" | |
136 "punpcklbw %%mm0, %%mm5 \n\t" | |
137 "paddw %%mm3, %%mm4 \n\t" | |
138 "paddw %%mm5, %%mm6 \n\t" | |
139 "paddw %%mm3, %%mm4 \n\t" | |
140 "paddw %%mm5, %%mm6 \n\t" | |
141 "paddw %%mm3, %%mm4 \n\t" | |
142 "paddw %%mm5, %%mm6 \n\t" | |
143 "paddw %%mm3, %%mm4 \n\t" | |
144 "paddw %%mm5, %%mm6 \n\t" | |
145 "paddw %%mm3, %%mm4 \n\t" | |
146 "paddw %%mm5, %%mm6 \n\t" | |
147 "paddw %%mm3, %%mm4 \n\t" | |
148 "paddw %%mm5, %%mm6 \n\t" | |
149 "paddw %%mm3, %%mm4 \n\t" | |
150 "paddw %%mm5, %%mm6 \n\t" | |
151 "psrlw $3, %%mm4 \n\t" | |
152 "psrlw $3, %%mm6 \n\t" | |
153 "movq %%mm4, %%mm3 \n\t" | |
154 "movq %%mm6, %%mm5 \n\t" | |
155 "punpcklwd %%mm0, %%mm3 \n\t" | |
156 "punpckhwd %%mm0, %%mm4 \n\t" | |
157 "punpcklwd %%mm0, %%mm5 \n\t" | |
158 "punpckhwd %%mm0, %%mm6 \n\t" | |
159 "pslld $8, %%mm3 \n\t" | |
160 "pslld $8, %%mm4 \n\t" | |
161 "pslld $24, %%mm5 \n\t" | |
162 "pslld $24, %%mm6 \n\t" | |
163 | |
164 "por %%mm3, %%mm1 \n\t" | |
165 "por %%mm4, %%mm2 \n\t" | |
166 "por %%mm5, %%mm1 \n\t" | |
167 "por %%mm6, %%mm2 \n\t" | |
168 | |
169 "movq %%mm1, (%%edi) \n\t" | |
170 "movq %%mm2, 8(%%edi) \n\t" | |
171 | |
172 "movq 8(%%esi), %%mm1 \n\t" | |
173 "movq 8(%%esi), %%mm2 \n\t" | |
174 "punpcklbw %%mm0, %%mm1 \n\t" | |
175 "punpckhbw %%mm0, %%mm2 \n\t" | |
176 | |
177 "movq (%%eax,%%edx,2), %%mm4 \n\t" | |
178 "movq (%%ebx,%%ebp,2), %%mm6 \n\t" | |
179 "punpckhbw %%mm0, %%mm4 \n\t" | |
180 "punpckhbw %%mm0, %%mm6 \n\t" | |
181 "movq (%%eax), %%mm3 \n\t" | |
182 "movq (%%ebx), %%mm5 \n\t" | |
183 "punpckhbw %%mm0, %%mm3 \n\t" | |
184 "punpckhbw %%mm0, %%mm5 \n\t" | |
185 "paddw %%mm3, %%mm4 \n\t" | |
186 "paddw %%mm5, %%mm6 \n\t" | |
187 "paddw %%mm3, %%mm4 \n\t" | |
188 "paddw %%mm5, %%mm6 \n\t" | |
189 "paddw %%mm3, %%mm4 \n\t" | |
190 "paddw %%mm5, %%mm6 \n\t" | |
191 "paddw %%mm3, %%mm4 \n\t" | |
192 "paddw %%mm5, %%mm6 \n\t" | |
193 "paddw %%mm3, %%mm4 \n\t" | |
194 "paddw %%mm5, %%mm6 \n\t" | |
195 "paddw %%mm3, %%mm4 \n\t" | |
196 "paddw %%mm5, %%mm6 \n\t" | |
197 "paddw %%mm3, %%mm4 \n\t" | |
198 "paddw %%mm5, %%mm6 \n\t" | |
199 "psrlw $3, %%mm4 \n\t" | |
200 "psrlw $3, %%mm6 \n\t" | |
201 "movq %%mm4, %%mm3 \n\t" | |
202 "movq %%mm6, %%mm5 \n\t" | |
203 "punpcklwd %%mm0, %%mm3 \n\t" | |
204 "punpckhwd %%mm0, %%mm4 \n\t" | |
205 "punpcklwd %%mm0, %%mm5 \n\t" | |
206 "punpckhwd %%mm0, %%mm6 \n\t" | |
207 "pslld $8, %%mm3 \n\t" | |
208 "pslld $8, %%mm4 \n\t" | |
209 "pslld $24, %%mm5 \n\t" | |
210 "pslld $24, %%mm6 \n\t" | |
211 | |
212 "por %%mm3, %%mm1 \n\t" | |
213 "por %%mm4, %%mm2 \n\t" | |
214 "por %%mm5, %%mm1 \n\t" | |
215 "por %%mm6, %%mm2 \n\t" | |
216 | |
217 "addl $16, %%esi \n\t" | |
218 "addl $8, %%eax \n\t" | |
219 "addl $8, %%ebx \n\t" | |
220 | |
221 "movq %%mm1, 16(%%edi) \n\t" | |
222 "movq %%mm2, 24(%%edi) \n\t" | |
223 "addl $32, %%edi \n\t" | |
224 | |
225 "decl %%ecx \n\t" | |
226 "jnz .Lli0 \n\t" | |
227 "emms \n\t" | |
228 "popl %%ebp \n\t" | |
229 : | |
230 : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16) | |
231 : "memory" | |
232 ); | |
233 pack_li_0_C(dst, y, u, v, (w&15), us, vs); | |
234 } | |
235 | |
236 static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, | |
237 unsigned char *u, unsigned char *v, int w, int us, int vs) | |
238 { | |
239 asm volatile ("" | |
240 "pushl %%ebp \n\t" | |
241 "movl 4(%%edx), %%ebp \n\t" | |
242 "movl (%%edx), %%edx \n\t" | |
243 "pxor %%mm0, %%mm0 \n\t" | |
244 | |
245 ".balign 16 \n\t" | |
246 ".Lli1: \n\t" | |
247 "movq (%%esi), %%mm1 \n\t" | |
248 "movq (%%esi), %%mm2 \n\t" | |
249 "punpcklbw %%mm0, %%mm1 \n\t" | |
250 "punpckhbw %%mm0, %%mm2 \n\t" | |
251 | |
252 "movq (%%eax,%%edx,2), %%mm4 \n\t" | |
253 "movq (%%ebx,%%ebp,2), %%mm6 \n\t" | |
254 "punpcklbw %%mm0, %%mm4 \n\t" | |
255 "punpcklbw %%mm0, %%mm6 \n\t" | |
256 "movq (%%eax), %%mm3 \n\t" | |
257 "movq (%%ebx), %%mm5 \n\t" | |
258 "punpcklbw %%mm0, %%mm3 \n\t" | |
259 "punpcklbw %%mm0, %%mm5 \n\t" | |
260 "movq %%mm4, %%mm7 \n\t" | |
261 "paddw %%mm4, %%mm4 \n\t" | |
262 "paddw %%mm7, %%mm4 \n\t" | |
263 "movq %%mm6, %%mm7 \n\t" | |
264 "paddw %%mm6, %%mm6 \n\t" | |
265 "paddw %%mm7, %%mm6 \n\t" | |
266 "paddw %%mm3, %%mm4 \n\t" | |
267 "paddw %%mm5, %%mm6 \n\t" | |
268 "paddw %%mm3, %%mm4 \n\t" | |
269 "paddw %%mm5, %%mm6 \n\t" | |
270 "paddw %%mm3, %%mm4 \n\t" | |
271 "paddw %%mm5, %%mm6 \n\t" | |
272 "paddw %%mm3, %%mm4 \n\t" | |
273 "paddw %%mm5, %%mm6 \n\t" | |
274 "paddw %%mm3, %%mm4 \n\t" | |
275 "paddw %%mm5, %%mm6 \n\t" | |
276 "psrlw $3, %%mm4 \n\t" | |
277 "psrlw $3, %%mm6 \n\t" | |
278 "movq %%mm4, %%mm3 \n\t" | |
279 "movq %%mm6, %%mm5 \n\t" | |
280 "punpcklwd %%mm0, %%mm3 \n\t" | |
281 "punpckhwd %%mm0, %%mm4 \n\t" | |
282 "punpcklwd %%mm0, %%mm5 \n\t" | |
283 "punpckhwd %%mm0, %%mm6 \n\t" | |
284 "pslld $8, %%mm3 \n\t" | |
285 "pslld $8, %%mm4 \n\t" | |
286 "pslld $24, %%mm5 \n\t" | |
287 "pslld $24, %%mm6 \n\t" | |
288 | |
289 "por %%mm3, %%mm1 \n\t" | |
290 "por %%mm4, %%mm2 \n\t" | |
291 "por %%mm5, %%mm1 \n\t" | |
292 "por %%mm6, %%mm2 \n\t" | |
293 | |
294 "movq %%mm1, (%%edi) \n\t" | |
295 "movq %%mm2, 8(%%edi) \n\t" | |
296 | |
297 "movq 8(%%esi), %%mm1 \n\t" | |
298 "movq 8(%%esi), %%mm2 \n\t" | |
299 "punpcklbw %%mm0, %%mm1 \n\t" | |
300 "punpckhbw %%mm0, %%mm2 \n\t" | |
301 | |
302 "movq (%%eax,%%edx,2), %%mm4 \n\t" | |
303 "movq (%%ebx,%%ebp,2), %%mm6 \n\t" | |
304 "punpckhbw %%mm0, %%mm4 \n\t" | |
305 "punpckhbw %%mm0, %%mm6 \n\t" | |
306 "movq (%%eax), %%mm3 \n\t" | |
307 "movq (%%ebx), %%mm5 \n\t" | |
308 "punpckhbw %%mm0, %%mm3 \n\t" | |
309 "punpckhbw %%mm0, %%mm5 \n\t" | |
310 "movq %%mm4, %%mm7 \n\t" | |
311 "paddw %%mm4, %%mm4 \n\t" | |
312 "paddw %%mm7, %%mm4 \n\t" | |
313 "movq %%mm6, %%mm7 \n\t" | |
314 "paddw %%mm6, %%mm6 \n\t" | |
315 "paddw %%mm7, %%mm6 \n\t" | |
316 "paddw %%mm3, %%mm4 \n\t" | |
317 "paddw %%mm5, %%mm6 \n\t" | |
318 "paddw %%mm3, %%mm4 \n\t" | |
319 "paddw %%mm5, %%mm6 \n\t" | |
320 "paddw %%mm3, %%mm4 \n\t" | |
321 "paddw %%mm5, %%mm6 \n\t" | |
322 "paddw %%mm3, %%mm4 \n\t" | |
323 "paddw %%mm5, %%mm6 \n\t" | |
324 "paddw %%mm3, %%mm4 \n\t" | |
325 "paddw %%mm5, %%mm6 \n\t" | |
326 "psrlw $3, %%mm4 \n\t" | |
327 "psrlw $3, %%mm6 \n\t" | |
328 "movq %%mm4, %%mm3 \n\t" | |
329 "movq %%mm6, %%mm5 \n\t" | |
330 "punpcklwd %%mm0, %%mm3 \n\t" | |
331 "punpckhwd %%mm0, %%mm4 \n\t" | |
332 "punpcklwd %%mm0, %%mm5 \n\t" | |
333 "punpckhwd %%mm0, %%mm6 \n\t" | |
334 "pslld $8, %%mm3 \n\t" | |
335 "pslld $8, %%mm4 \n\t" | |
336 "pslld $24, %%mm5 \n\t" | |
337 "pslld $24, %%mm6 \n\t" | |
338 | |
339 "por %%mm3, %%mm1 \n\t" | |
340 "por %%mm4, %%mm2 \n\t" | |
341 "por %%mm5, %%mm1 \n\t" | |
342 "por %%mm6, %%mm2 \n\t" | |
343 | |
344 "addl $16, %%esi \n\t" | |
345 "addl $8, %%eax \n\t" | |
346 "addl $8, %%ebx \n\t" | |
347 | |
348 "movq %%mm1, 16(%%edi) \n\t" | |
349 "movq %%mm2, 24(%%edi) \n\t" | |
350 "addl $32, %%edi \n\t" | |
351 | |
352 "decl %%ecx \n\t" | |
353 "jnz .Lli1 \n\t" | |
354 "emms \n\t" | |
355 "popl %%ebp \n\t" | |
356 : | |
357 : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16) | |
358 : "memory" | |
359 ); | |
360 pack_li_1_C(dst, y, u, v, (w&15), us, vs); | |
111 } | 361 } |
112 #endif | 362 #endif |
113 | 363 |
114 static pack_func_t *pack_nn; | 364 static pack_func_t *pack_nn; |
115 static pack_func_t *pack_li_0; | 365 static pack_func_t *pack_li_0; |
197 | 447 |
198 pack_nn = (pack_func_t *)pack_nn_C; | 448 pack_nn = (pack_func_t *)pack_nn_C; |
199 pack_li_0 = pack_li_0_C; | 449 pack_li_0 = pack_li_0_C; |
200 pack_li_1 = pack_li_1_C; | 450 pack_li_1 = pack_li_1_C; |
201 #ifdef HAVE_MMX | 451 #ifdef HAVE_MMX |
202 if(gCpuCaps.hasMMX) pack_nn = (pack_func_t *)pack_nn_MMX; | 452 if(gCpuCaps.hasMMX) { |
453 pack_nn = (pack_func_t *)pack_nn_MMX; | |
454 pack_li_0 = pack_li_0_MMX; | |
455 pack_li_1 = pack_li_1_MMX; | |
456 } | |
203 #endif | 457 #endif |
204 | 458 |
205 switch(vf->priv->mode) { | 459 switch(vf->priv->mode) { |
206 case 0: | 460 case 0: |
207 vf->priv->pack[0] = vf->priv->pack[1] = pack_nn; | 461 vf->priv->pack[0] = vf->priv->pack[1] = pack_nn; |