comparison libmpcodecs/vf_ilpack.c @ 11645:3837fd1bfa5b

mmx optimizations
author rfelker
date Mon, 15 Dec 2003 04:07:17 +0000
parents 3ddfe9316ca9
children 57372aa1d655
comparison
equal deleted inserted replaced
11644:2e6fc0ab578f 11645:3837fd1bfa5b
106 : 106 :
107 : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8) 107 : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
108 : "memory" 108 : "memory"
109 ); 109 );
110 pack_nn_C(dst, y, u, v, (w&7)); 110 pack_nn_C(dst, y, u, v, (w&7));
111 }
112
113 static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
114 unsigned char *u, unsigned char *v, int w, int us, int vs)
115 {
116 asm volatile (""
117 "pushl %%ebp \n\t"
118 "movl 4(%%edx), %%ebp \n\t"
119 "movl (%%edx), %%edx \n\t"
120 "pxor %%mm0, %%mm0 \n\t"
121
122 ".balign 16 \n\t"
123 ".Lli0: \n\t"
124 "movq (%%esi), %%mm1 \n\t"
125 "movq (%%esi), %%mm2 \n\t"
126 "punpcklbw %%mm0, %%mm1 \n\t"
127 "punpckhbw %%mm0, %%mm2 \n\t"
128
129 "movq (%%eax,%%edx,2), %%mm4 \n\t"
130 "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
131 "punpcklbw %%mm0, %%mm4 \n\t"
132 "punpcklbw %%mm0, %%mm6 \n\t"
133 "movq (%%eax), %%mm3 \n\t"
134 "movq (%%ebx), %%mm5 \n\t"
135 "punpcklbw %%mm0, %%mm3 \n\t"
136 "punpcklbw %%mm0, %%mm5 \n\t"
137 "paddw %%mm3, %%mm4 \n\t"
138 "paddw %%mm5, %%mm6 \n\t"
139 "paddw %%mm3, %%mm4 \n\t"
140 "paddw %%mm5, %%mm6 \n\t"
141 "paddw %%mm3, %%mm4 \n\t"
142 "paddw %%mm5, %%mm6 \n\t"
143 "paddw %%mm3, %%mm4 \n\t"
144 "paddw %%mm5, %%mm6 \n\t"
145 "paddw %%mm3, %%mm4 \n\t"
146 "paddw %%mm5, %%mm6 \n\t"
147 "paddw %%mm3, %%mm4 \n\t"
148 "paddw %%mm5, %%mm6 \n\t"
149 "paddw %%mm3, %%mm4 \n\t"
150 "paddw %%mm5, %%mm6 \n\t"
151 "psrlw $3, %%mm4 \n\t"
152 "psrlw $3, %%mm6 \n\t"
153 "movq %%mm4, %%mm3 \n\t"
154 "movq %%mm6, %%mm5 \n\t"
155 "punpcklwd %%mm0, %%mm3 \n\t"
156 "punpckhwd %%mm0, %%mm4 \n\t"
157 "punpcklwd %%mm0, %%mm5 \n\t"
158 "punpckhwd %%mm0, %%mm6 \n\t"
159 "pslld $8, %%mm3 \n\t"
160 "pslld $8, %%mm4 \n\t"
161 "pslld $24, %%mm5 \n\t"
162 "pslld $24, %%mm6 \n\t"
163
164 "por %%mm3, %%mm1 \n\t"
165 "por %%mm4, %%mm2 \n\t"
166 "por %%mm5, %%mm1 \n\t"
167 "por %%mm6, %%mm2 \n\t"
168
169 "movq %%mm1, (%%edi) \n\t"
170 "movq %%mm2, 8(%%edi) \n\t"
171
172 "movq 8(%%esi), %%mm1 \n\t"
173 "movq 8(%%esi), %%mm2 \n\t"
174 "punpcklbw %%mm0, %%mm1 \n\t"
175 "punpckhbw %%mm0, %%mm2 \n\t"
176
177 "movq (%%eax,%%edx,2), %%mm4 \n\t"
178 "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
179 "punpckhbw %%mm0, %%mm4 \n\t"
180 "punpckhbw %%mm0, %%mm6 \n\t"
181 "movq (%%eax), %%mm3 \n\t"
182 "movq (%%ebx), %%mm5 \n\t"
183 "punpckhbw %%mm0, %%mm3 \n\t"
184 "punpckhbw %%mm0, %%mm5 \n\t"
185 "paddw %%mm3, %%mm4 \n\t"
186 "paddw %%mm5, %%mm6 \n\t"
187 "paddw %%mm3, %%mm4 \n\t"
188 "paddw %%mm5, %%mm6 \n\t"
189 "paddw %%mm3, %%mm4 \n\t"
190 "paddw %%mm5, %%mm6 \n\t"
191 "paddw %%mm3, %%mm4 \n\t"
192 "paddw %%mm5, %%mm6 \n\t"
193 "paddw %%mm3, %%mm4 \n\t"
194 "paddw %%mm5, %%mm6 \n\t"
195 "paddw %%mm3, %%mm4 \n\t"
196 "paddw %%mm5, %%mm6 \n\t"
197 "paddw %%mm3, %%mm4 \n\t"
198 "paddw %%mm5, %%mm6 \n\t"
199 "psrlw $3, %%mm4 \n\t"
200 "psrlw $3, %%mm6 \n\t"
201 "movq %%mm4, %%mm3 \n\t"
202 "movq %%mm6, %%mm5 \n\t"
203 "punpcklwd %%mm0, %%mm3 \n\t"
204 "punpckhwd %%mm0, %%mm4 \n\t"
205 "punpcklwd %%mm0, %%mm5 \n\t"
206 "punpckhwd %%mm0, %%mm6 \n\t"
207 "pslld $8, %%mm3 \n\t"
208 "pslld $8, %%mm4 \n\t"
209 "pslld $24, %%mm5 \n\t"
210 "pslld $24, %%mm6 \n\t"
211
212 "por %%mm3, %%mm1 \n\t"
213 "por %%mm4, %%mm2 \n\t"
214 "por %%mm5, %%mm1 \n\t"
215 "por %%mm6, %%mm2 \n\t"
216
217 "addl $16, %%esi \n\t"
218 "addl $8, %%eax \n\t"
219 "addl $8, %%ebx \n\t"
220
221 "movq %%mm1, 16(%%edi) \n\t"
222 "movq %%mm2, 24(%%edi) \n\t"
223 "addl $32, %%edi \n\t"
224
225 "decl %%ecx \n\t"
226 "jnz .Lli0 \n\t"
227 "emms \n\t"
228 "popl %%ebp \n\t"
229 :
230 : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16)
231 : "memory"
232 );
233 pack_li_0_C(dst, y, u, v, (w&15), us, vs);
234 }
235
236 static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
237 unsigned char *u, unsigned char *v, int w, int us, int vs)
238 {
239 asm volatile (""
240 "pushl %%ebp \n\t"
241 "movl 4(%%edx), %%ebp \n\t"
242 "movl (%%edx), %%edx \n\t"
243 "pxor %%mm0, %%mm0 \n\t"
244
245 ".balign 16 \n\t"
246 ".Lli1: \n\t"
247 "movq (%%esi), %%mm1 \n\t"
248 "movq (%%esi), %%mm2 \n\t"
249 "punpcklbw %%mm0, %%mm1 \n\t"
250 "punpckhbw %%mm0, %%mm2 \n\t"
251
252 "movq (%%eax,%%edx,2), %%mm4 \n\t"
253 "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
254 "punpcklbw %%mm0, %%mm4 \n\t"
255 "punpcklbw %%mm0, %%mm6 \n\t"
256 "movq (%%eax), %%mm3 \n\t"
257 "movq (%%ebx), %%mm5 \n\t"
258 "punpcklbw %%mm0, %%mm3 \n\t"
259 "punpcklbw %%mm0, %%mm5 \n\t"
260 "movq %%mm4, %%mm7 \n\t"
261 "paddw %%mm4, %%mm4 \n\t"
262 "paddw %%mm7, %%mm4 \n\t"
263 "movq %%mm6, %%mm7 \n\t"
264 "paddw %%mm6, %%mm6 \n\t"
265 "paddw %%mm7, %%mm6 \n\t"
266 "paddw %%mm3, %%mm4 \n\t"
267 "paddw %%mm5, %%mm6 \n\t"
268 "paddw %%mm3, %%mm4 \n\t"
269 "paddw %%mm5, %%mm6 \n\t"
270 "paddw %%mm3, %%mm4 \n\t"
271 "paddw %%mm5, %%mm6 \n\t"
272 "paddw %%mm3, %%mm4 \n\t"
273 "paddw %%mm5, %%mm6 \n\t"
274 "paddw %%mm3, %%mm4 \n\t"
275 "paddw %%mm5, %%mm6 \n\t"
276 "psrlw $3, %%mm4 \n\t"
277 "psrlw $3, %%mm6 \n\t"
278 "movq %%mm4, %%mm3 \n\t"
279 "movq %%mm6, %%mm5 \n\t"
280 "punpcklwd %%mm0, %%mm3 \n\t"
281 "punpckhwd %%mm0, %%mm4 \n\t"
282 "punpcklwd %%mm0, %%mm5 \n\t"
283 "punpckhwd %%mm0, %%mm6 \n\t"
284 "pslld $8, %%mm3 \n\t"
285 "pslld $8, %%mm4 \n\t"
286 "pslld $24, %%mm5 \n\t"
287 "pslld $24, %%mm6 \n\t"
288
289 "por %%mm3, %%mm1 \n\t"
290 "por %%mm4, %%mm2 \n\t"
291 "por %%mm5, %%mm1 \n\t"
292 "por %%mm6, %%mm2 \n\t"
293
294 "movq %%mm1, (%%edi) \n\t"
295 "movq %%mm2, 8(%%edi) \n\t"
296
297 "movq 8(%%esi), %%mm1 \n\t"
298 "movq 8(%%esi), %%mm2 \n\t"
299 "punpcklbw %%mm0, %%mm1 \n\t"
300 "punpckhbw %%mm0, %%mm2 \n\t"
301
302 "movq (%%eax,%%edx,2), %%mm4 \n\t"
303 "movq (%%ebx,%%ebp,2), %%mm6 \n\t"
304 "punpckhbw %%mm0, %%mm4 \n\t"
305 "punpckhbw %%mm0, %%mm6 \n\t"
306 "movq (%%eax), %%mm3 \n\t"
307 "movq (%%ebx), %%mm5 \n\t"
308 "punpckhbw %%mm0, %%mm3 \n\t"
309 "punpckhbw %%mm0, %%mm5 \n\t"
310 "movq %%mm4, %%mm7 \n\t"
311 "paddw %%mm4, %%mm4 \n\t"
312 "paddw %%mm7, %%mm4 \n\t"
313 "movq %%mm6, %%mm7 \n\t"
314 "paddw %%mm6, %%mm6 \n\t"
315 "paddw %%mm7, %%mm6 \n\t"
316 "paddw %%mm3, %%mm4 \n\t"
317 "paddw %%mm5, %%mm6 \n\t"
318 "paddw %%mm3, %%mm4 \n\t"
319 "paddw %%mm5, %%mm6 \n\t"
320 "paddw %%mm3, %%mm4 \n\t"
321 "paddw %%mm5, %%mm6 \n\t"
322 "paddw %%mm3, %%mm4 \n\t"
323 "paddw %%mm5, %%mm6 \n\t"
324 "paddw %%mm3, %%mm4 \n\t"
325 "paddw %%mm5, %%mm6 \n\t"
326 "psrlw $3, %%mm4 \n\t"
327 "psrlw $3, %%mm6 \n\t"
328 "movq %%mm4, %%mm3 \n\t"
329 "movq %%mm6, %%mm5 \n\t"
330 "punpcklwd %%mm0, %%mm3 \n\t"
331 "punpckhwd %%mm0, %%mm4 \n\t"
332 "punpcklwd %%mm0, %%mm5 \n\t"
333 "punpckhwd %%mm0, %%mm6 \n\t"
334 "pslld $8, %%mm3 \n\t"
335 "pslld $8, %%mm4 \n\t"
336 "pslld $24, %%mm5 \n\t"
337 "pslld $24, %%mm6 \n\t"
338
339 "por %%mm3, %%mm1 \n\t"
340 "por %%mm4, %%mm2 \n\t"
341 "por %%mm5, %%mm1 \n\t"
342 "por %%mm6, %%mm2 \n\t"
343
344 "addl $16, %%esi \n\t"
345 "addl $8, %%eax \n\t"
346 "addl $8, %%ebx \n\t"
347
348 "movq %%mm1, 16(%%edi) \n\t"
349 "movq %%mm2, 24(%%edi) \n\t"
350 "addl $32, %%edi \n\t"
351
352 "decl %%ecx \n\t"
353 "jnz .Lli1 \n\t"
354 "emms \n\t"
355 "popl %%ebp \n\t"
356 :
357 : "S" (y), "D" (dst), "a" (u), "b" (v), "d" (&us), "c" (w/16)
358 : "memory"
359 );
360 pack_li_1_C(dst, y, u, v, (w&15), us, vs);
111 } 361 }
112 #endif 362 #endif
113 363
114 static pack_func_t *pack_nn; 364 static pack_func_t *pack_nn;
115 static pack_func_t *pack_li_0; 365 static pack_func_t *pack_li_0;
197 447
198 pack_nn = (pack_func_t *)pack_nn_C; 448 pack_nn = (pack_func_t *)pack_nn_C;
199 pack_li_0 = pack_li_0_C; 449 pack_li_0 = pack_li_0_C;
200 pack_li_1 = pack_li_1_C; 450 pack_li_1 = pack_li_1_C;
201 #ifdef HAVE_MMX 451 #ifdef HAVE_MMX
202 if(gCpuCaps.hasMMX) pack_nn = (pack_func_t *)pack_nn_MMX; 452 if(gCpuCaps.hasMMX) {
453 pack_nn = (pack_func_t *)pack_nn_MMX;
454 pack_li_0 = pack_li_0_MMX;
455 pack_li_1 = pack_li_1_MMX;
456 }
203 #endif 457 #endif
204 458
205 switch(vf->priv->mode) { 459 switch(vf->priv->mode) {
206 case 0: 460 case 0:
207 vf->priv->pack[0] = vf->priv->pack[1] = pack_nn; 461 vf->priv->pack[0] = vf->priv->pack[1] = pack_nn;