Mercurial > mplayer.hg
comparison postproc/postprocess.c @ 2413:32e733ec8a88
optimizations (+2% speedup)
author | michael |
---|---|
date | Tue, 23 Oct 2001 10:29:48 +0000 |
parents | bc69d7c0e1dc |
children | 25e7342b5171 |
comparison
equal
deleted
inserted
replaced
2412:c2ed312a9d98 | 2413:32e733ec8a88 |
---|---|
212 static inline int isVertDC(uint8_t src[], int stride){ | 212 static inline int isVertDC(uint8_t src[], int stride){ |
213 int numEq= 0; | 213 int numEq= 0; |
214 int y; | 214 int y; |
215 src+= stride*4; // src points to begin of the 8x8 Block | 215 src+= stride*4; // src points to begin of the 8x8 Block |
216 #ifdef HAVE_MMX | 216 #ifdef HAVE_MMX |
217 asm volatile( | 217 asm volatile( |
218 "pushl %1\n\t" | 218 "leal (%1, %2), %%eax \n\t" |
219 "leal (%%eax, %2, 4), %%ebx \n\t" | |
220 // 0 1 2 3 4 5 6 7 8 9 | |
221 // %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2 | |
219 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F | 222 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F |
220 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D | 223 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D |
221 "movq (%1), %%mm0 \n\t" | 224 "movq (%1), %%mm0 \n\t" |
222 "addl %2, %1 \n\t" | 225 "movq (%%eax), %%mm1 \n\t" |
223 "movq (%1), %%mm1 \n\t" | |
224 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece | 226 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece |
225 "paddb %%mm7, %%mm0 \n\t" | 227 "paddb %%mm7, %%mm0 \n\t" |
226 "pcmpgtb %%mm6, %%mm0 \n\t" | 228 "pcmpgtb %%mm6, %%mm0 \n\t" |
227 | 229 |
228 "addl %2, %1 \n\t" | 230 "movq (%%eax,%2), %%mm2 \n\t" |
229 "movq (%1), %%mm2 \n\t" | |
230 "psubb %%mm2, %%mm1 \n\t" | 231 "psubb %%mm2, %%mm1 \n\t" |
231 "paddb %%mm7, %%mm1 \n\t" | 232 "paddb %%mm7, %%mm1 \n\t" |
232 "pcmpgtb %%mm6, %%mm1 \n\t" | 233 "pcmpgtb %%mm6, %%mm1 \n\t" |
233 "paddb %%mm1, %%mm0 \n\t" | 234 "paddb %%mm1, %%mm0 \n\t" |
234 | 235 |
235 "addl %2, %1 \n\t" | 236 "movq (%%eax, %2, 2), %%mm1 \n\t" |
236 "movq (%1), %%mm1 \n\t" | |
237 "psubb %%mm1, %%mm2 \n\t" | 237 "psubb %%mm1, %%mm2 \n\t" |
238 "paddb %%mm7, %%mm2 \n\t" | 238 "paddb %%mm7, %%mm2 \n\t" |
239 "pcmpgtb %%mm6, %%mm2 \n\t" | 239 "pcmpgtb %%mm6, %%mm2 \n\t" |
240 "paddb %%mm2, %%mm0 \n\t" | 240 "paddb %%mm2, %%mm0 \n\t" |
241 | 241 |
242 "addl %2, %1 \n\t" | 242 "movq (%1, %2, 4), %%mm2 \n\t" |
243 "movq (%1), %%mm2 \n\t" | |
244 "psubb %%mm2, %%mm1 \n\t" | 243 "psubb %%mm2, %%mm1 \n\t" |
245 "paddb %%mm7, %%mm1 \n\t" | 244 "paddb %%mm7, %%mm1 \n\t" |
246 "pcmpgtb %%mm6, %%mm1 \n\t" | 245 "pcmpgtb %%mm6, %%mm1 \n\t" |
247 "paddb %%mm1, %%mm0 \n\t" | 246 "paddb %%mm1, %%mm0 \n\t" |
248 | 247 |
249 "addl %2, %1 \n\t" | 248 "movq (%%ebx), %%mm1 \n\t" |
250 "movq (%1), %%mm1 \n\t" | |
251 "psubb %%mm1, %%mm2 \n\t" | 249 "psubb %%mm1, %%mm2 \n\t" |
252 "paddb %%mm7, %%mm2 \n\t" | 250 "paddb %%mm7, %%mm2 \n\t" |
253 "pcmpgtb %%mm6, %%mm2 \n\t" | 251 "pcmpgtb %%mm6, %%mm2 \n\t" |
254 "paddb %%mm2, %%mm0 \n\t" | 252 "paddb %%mm2, %%mm0 \n\t" |
255 | 253 |
256 "addl %2, %1 \n\t" | 254 "movq (%%ebx, %2), %%mm2 \n\t" |
257 "movq (%1), %%mm2 \n\t" | |
258 "psubb %%mm2, %%mm1 \n\t" | 255 "psubb %%mm2, %%mm1 \n\t" |
259 "paddb %%mm7, %%mm1 \n\t" | 256 "paddb %%mm7, %%mm1 \n\t" |
260 "pcmpgtb %%mm6, %%mm1 \n\t" | 257 "pcmpgtb %%mm6, %%mm1 \n\t" |
261 "paddb %%mm1, %%mm0 \n\t" | 258 "paddb %%mm1, %%mm0 \n\t" |
262 | 259 |
263 "addl %2, %1 \n\t" | 260 "movq (%%ebx, %2, 2), %%mm1 \n\t" |
264 "movq (%1), %%mm1 \n\t" | |
265 "psubb %%mm1, %%mm2 \n\t" | 261 "psubb %%mm1, %%mm2 \n\t" |
266 "paddb %%mm7, %%mm2 \n\t" | 262 "paddb %%mm7, %%mm2 \n\t" |
267 "pcmpgtb %%mm6, %%mm2 \n\t" | 263 "pcmpgtb %%mm6, %%mm2 \n\t" |
268 "paddb %%mm2, %%mm0 \n\t" | 264 "paddb %%mm2, %%mm0 \n\t" |
269 | 265 |
275 "psrlq $16, %%mm0 \n\t" | 271 "psrlq $16, %%mm0 \n\t" |
276 "paddb %%mm1, %%mm0 \n\t" | 272 "paddb %%mm1, %%mm0 \n\t" |
277 "movq %%mm0, %%mm1 \n\t" | 273 "movq %%mm0, %%mm1 \n\t" |
278 "psrlq $32, %%mm0 \n\t" | 274 "psrlq $32, %%mm0 \n\t" |
279 "paddb %%mm1, %%mm0 \n\t" | 275 "paddb %%mm1, %%mm0 \n\t" |
280 "popl %1\n\t" | |
281 "movd %%mm0, %0 \n\t" | 276 "movd %%mm0, %0 \n\t" |
282 : "=r" (numEq) | 277 : "=r" (numEq) |
283 : "r" (src), "r" (stride) | 278 : "r" (src), "r" (stride) |
284 ); | 279 ); |
285 // printf("%d\n", numEq); | 280 |
286 numEq= (256 - (numEq & 0xFF)) &0xFF; | 281 numEq= (256 - numEq) &0xFF; |
287 | |
288 // int asmEq= numEq; | |
289 // numEq=0; | |
290 // uint8_t *temp= src; | |
291 | 282 |
292 #else | 283 #else |
293 for(y=0; y<BLOCK_SIZE-1; y++) | 284 for(y=0; y<BLOCK_SIZE-1; y++) |
294 { | 285 { |
295 if(((src[0] - src[0+stride] + 1)&0xFFFF) < 3) numEq++; | 286 if(((src[0] - src[0+stride] + 1)&0xFFFF) < 3) numEq++; |
2489 int i; | 2480 int i; |
2490 if(levelFix) | 2481 if(levelFix) |
2491 { | 2482 { |
2492 #ifdef HAVE_MMX | 2483 #ifdef HAVE_MMX |
2493 asm volatile( | 2484 asm volatile( |
2494 "pushl %0 \n\t" | |
2495 "pushl %1 \n\t" | |
2496 "leal (%2,%2), %%eax \n\t" | 2485 "leal (%2,%2), %%eax \n\t" |
2497 "leal (%3,%3), %%ebx \n\t" | 2486 "leal (%3,%3), %%ebx \n\t" |
2498 "movq packedYOffset, %%mm2 \n\t" | 2487 "movq packedYOffset, %%mm2 \n\t" |
2499 "movq packedYScale, %%mm3 \n\t" | 2488 "movq packedYScale, %%mm3 \n\t" |
2500 "pxor %%mm4, %%mm4 \n\t" | 2489 "pxor %%mm4, %%mm4 \n\t" |
2532 "addl %%ebx, %1 \n\t" | 2521 "addl %%ebx, %1 \n\t" |
2533 SCALED_CPY | 2522 SCALED_CPY |
2534 "addl %%ebx, %1 \n\t" | 2523 "addl %%ebx, %1 \n\t" |
2535 SCALED_CPY | 2524 SCALED_CPY |
2536 | 2525 |
2537 "popl %1 \n\t" | 2526 : "+r"(src), |
2538 "popl %0 \n\t" | 2527 "+r"(dst) |
2539 : : "r" (src), | 2528 :"r" (srcStride), |
2540 "r" (dst), | |
2541 "r" (srcStride), | |
2542 "r" (dstStride) | 2529 "r" (dstStride) |
2543 : "%eax", "%ebx" | 2530 : "%eax", "%ebx" |
2544 ); | 2531 ); |
2545 #else | 2532 #else |
2546 for(i=0; i<numLines; i++) | 2533 for(i=0; i<numLines; i++) |