Mercurial > libavcodec.hg
comparison cabac.h @ 3969:fc6e0942353b libavcodec
first try of a handwritten get_cabac() for x86, this is 10-20% faster on P3 depening on if you try to subtract the START/STOP_TIMER overhead
author | michael |
---|---|
date | Mon, 09 Oct 2006 14:15:14 +0000 |
parents | 01e1a12aa380 |
children | 508d759b6906 |
comparison
equal
deleted
inserted
replaced
3968:c86c7a54ba92 | 3969:fc6e0942353b |
---|---|
362 } | 362 } |
363 | 363 |
364 static int get_cabac(CABACContext *c, uint8_t * const state){ | 364 static int get_cabac(CABACContext *c, uint8_t * const state){ |
365 //FIXME gcc generates duplicate load/stores for c->low and c->range | 365 //FIXME gcc generates duplicate load/stores for c->low and c->range |
366 //START_TIMER | 366 //START_TIMER |
367 #ifdef ARCH_X86 | |
368 int bit; | |
369 | |
370 #define LOW "0" | |
371 #define RANGE "4" | |
372 #define LPS_RANGE "12" | |
373 #define LPS_STATE "12+2*66*4" | |
374 #define MPS_STATE "12+2*66*4+2*65" | |
375 #define BYTESTART "12+2*66*4+4*65" | |
376 #define BYTE "16+2*66*4+4*65" | |
377 #define BYTEEND "20+2*66*4+4*65" | |
378 | |
379 asm volatile( | |
380 "movzbl (%1), %%eax \n\t" | |
381 "movl "RANGE "(%2), %%ebx \n\t" | |
382 "movl "RANGE "(%2), %%edx \n\t" | |
383 "shrl $23, %%ebx \n\t" | |
384 "leal "LPS_RANGE"(%2, %%eax, 4), %%esi \n\t" | |
385 "movzbl (%%ebx, %%esi), %%esi \n\t" | |
386 "shll $17, %%esi \n\t" | |
387 "movl "LOW "(%2), %%ebx \n\t" | |
388 //eax:state ebx:low, edx:range, esi:RangeLPS | |
389 "subl %%esi, %%edx \n\t" | |
390 "cmpl %%edx, %%ebx \n\t" | |
391 " ja 1f \n\t" | |
392 "cmp $0x2000000, %%edx \n\t" //FIXME avoidable | |
393 "setb %%cl \n\t" | |
394 "shl %%cl, %%edx \n\t" | |
395 "shl %%cl, %%ebx \n\t" | |
396 "movb "MPS_STATE"(%2, %%eax), %%cl \n\t" | |
397 "movb %%cl, (%1) \n\t" | |
398 //eax:state ebx:low, edx:range, esi:RangeLPS | |
399 "test %%bx, %%bx \n\t" | |
400 " jnz 2f \n\t" | |
401 "movl "BYTE "(%2), %%esi \n\t" | |
402 "subl $0xFFFF, %%ebx \n\t" | |
403 "movzwl (%%esi), %%ecx \n\t" | |
404 "bswap %%ecx \n\t" | |
405 "shrl $15, %%ecx \n\t" | |
406 "addl $2, %%esi \n\t" | |
407 "addl %%ecx, %%ebx \n\t" | |
408 "movl %%esi, "BYTE "(%2) \n\t" | |
409 "jmp 2f \n\t" | |
410 "1: \n\t" | |
411 //eax:state ebx:low, edx:range, esi:RangeLPS | |
412 "subl %%edx, %%ebx \n\t" | |
413 "movl %%esi, %%edx \n\t" | |
414 "shr $19, %%esi \n\t" | |
415 "movb " MANGLE(ff_h264_norm_shift) "(%%esi), %%cl \n\t" | |
416 "shll %%cl, %%ebx \n\t" | |
417 "shll %%cl, %%edx \n\t" | |
418 "movb "LPS_STATE"(%2, %%eax), %%cl \n\t" | |
419 "movb %%cl, (%1) \n\t" | |
420 "incl %%eax \n\t" | |
421 "test %%bx, %%bx \n\t" | |
422 " jnz 2f \n\t" | |
423 | |
424 "movl "BYTE "(%2), %%ecx \n\t" | |
425 "movzwl (%%ecx), %%esi \n\t" | |
426 "bswap %%esi \n\t" | |
427 "shrl $15, %%esi \n\t" | |
428 "subl $0xFFFF, %%esi \n\t" | |
429 "addl $2, %%ecx \n\t" | |
430 "movl %%ecx, "BYTE "(%2) \n\t" | |
431 | |
432 "leal -1(%%ebx), %%ecx \n\t" | |
433 "xorl %%ebx, %%ecx \n\t" | |
434 "shrl $17, %%ecx \n\t" | |
435 "movb " MANGLE(ff_h264_norm_shift) "(%%ecx), %%cl \n\t" | |
436 "neg %%cl \n\t" | |
437 "add $7, %%cl \n\t" | |
438 | |
439 "shll %%cl , %%esi \n\t" | |
440 "addl %%esi, %%ebx \n\t" | |
441 "2: \n\t" | |
442 "movl %%edx, "RANGE "(%2) \n\t" | |
443 "movl %%ebx, "LOW "(%2) \n\t" | |
444 "andl $1, %%eax \n\t" | |
445 | |
446 :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used | |
447 :"r"(state), "r"(c) | |
448 : "%ecx", "%ebx", "%edx", "%esi" | |
449 ); | |
450 #else | |
367 int s = *state; | 451 int s = *state; |
368 int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); | 452 int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); |
369 int bit, lps_mask attribute_unused; | 453 int bit, lps_mask attribute_unused; |
370 | 454 |
371 c->range -= RangeLPS; | 455 c->range -= RangeLPS; |
415 c->range<<= lps_mask; | 499 c->range<<= lps_mask; |
416 c->low <<= lps_mask; | 500 c->low <<= lps_mask; |
417 if(!(c->low & CABAC_MASK)) | 501 if(!(c->low & CABAC_MASK)) |
418 refill2(c); | 502 refill2(c); |
419 #endif | 503 #endif |
504 #endif | |
420 //STOP_TIMER("get_cabac") | 505 //STOP_TIMER("get_cabac") |
421 return bit; | 506 return bit; |
422 } | 507 } |
423 | 508 |
424 static int get_cabac_bypass(CABACContext *c){ | 509 static int get_cabac_bypass(CABACContext *c){ |