comparison h264_cabac.c @ 11206:cb29d50bf6d4 libavcodec

Move abs() from decode_cabac_mb_mvd() to the code that writes mvd_cache. 4-8 cycles faster
author michael
date Thu, 18 Feb 2010 23:37:48 +0000
parents 4e2d74d7bad8
children 22ca00416118
comparison
equal deleted inserted replaced
11205:f5d50932acc0 11206:cb29d50bf6d4
908 } 908 }
909 return ref; 909 return ref;
910 } 910 }
911 911
912 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) { 912 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
913 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) + 913 int amvd = h->mvd_cache[list][scan8[n] - 1][l] +
914 abs( h->mvd_cache[list][scan8[n] - 8][l] ); 914 h->mvd_cache[list][scan8[n] - 8][l];
915 int ctxbase = (l == 0) ? 40 : 47; 915 int ctxbase = (l == 0) ? 40 : 47;
916 int mvd; 916 int mvd;
917 int ctx = (amvd>2) + (amvd>32); 917 int ctx = (amvd>2) + (amvd>32);
918 918
919 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx])) 919 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
1437 1437
1438 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 ); 1438 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
1439 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 ); 1439 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
1440 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 1440 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1441 1441
1442 mpx= abs(mpx-mx);
1443 mpy= abs(mpy-my);
1442 if(IS_SUB_8X8(sub_mb_type)){ 1444 if(IS_SUB_8X8(sub_mb_type)){
1443 mv_cache[ 1 ][0]= 1445 mv_cache[ 1 ][0]=
1444 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx; 1446 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
1445 mv_cache[ 1 ][1]= 1447 mv_cache[ 1 ][1]=
1446 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my; 1448 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
1447 1449
1448 mvd_cache[ 1 ][0]= 1450 mvd_cache[ 1 ][0]=
1449 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx; 1451 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mpx;
1450 mvd_cache[ 1 ][1]= 1452 mvd_cache[ 1 ][1]=
1451 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy; 1453 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= mpy;
1452 }else if(IS_SUB_8X4(sub_mb_type)){ 1454 }else if(IS_SUB_8X4(sub_mb_type)){
1453 mv_cache[ 1 ][0]= mx; 1455 mv_cache[ 1 ][0]= mx;
1454 mv_cache[ 1 ][1]= my; 1456 mv_cache[ 1 ][1]= my;
1455 1457
1456 mvd_cache[ 1 ][0]= mx - mpx; 1458 mvd_cache[ 1 ][0]= mpx;
1457 mvd_cache[ 1 ][1]= my - mpy; 1459 mvd_cache[ 1 ][1]= mpy;
1458 }else if(IS_SUB_4X8(sub_mb_type)){ 1460 }else if(IS_SUB_4X8(sub_mb_type)){
1459 mv_cache[ 8 ][0]= mx; 1461 mv_cache[ 8 ][0]= mx;
1460 mv_cache[ 8 ][1]= my; 1462 mv_cache[ 8 ][1]= my;
1461 1463
1462 mvd_cache[ 8 ][0]= mx - mpx; 1464 mvd_cache[ 8 ][0]= mpx;
1463 mvd_cache[ 8 ][1]= my - mpy; 1465 mvd_cache[ 8 ][1]= mpy;
1464 } 1466 }
1465 mv_cache[ 0 ][0]= mx; 1467 mv_cache[ 0 ][0]= mx;
1466 mv_cache[ 0 ][1]= my; 1468 mv_cache[ 0 ][1]= my;
1467 1469
1468 mvd_cache[ 0 ][0]= mx - mpx; 1470 mvd_cache[ 0 ][0]= mpx;
1469 mvd_cache[ 0 ][1]= my - mpy; 1471 mvd_cache[ 0 ][1]= mpy;
1470 } 1472 }
1471 }else{ 1473 }else{
1472 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0]; 1474 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
1473 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0]; 1475 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
1474 p[0] = p[1] = p[8] = p[9] = 0; 1476 p[0] = p[1] = p[8] = p[9] = 0;
1505 1507
1506 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 ); 1508 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
1507 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 ); 1509 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
1508 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 1510 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1509 1511
1510 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4); 1512 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(abs(mx-mpx),abs(my-mpy)), 4);
1511 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); 1513 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
1512 }else 1514 }else
1513 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4); 1515 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
1514 } 1516 }
1515 } 1517 }
1537 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy); 1539 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
1538 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 ); 1540 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
1539 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 ); 1541 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
1540 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 1542 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1541 1543
1542 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4); 1544 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(abs(mx-mpx),abs(my-mpy)), 4);
1543 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4); 1545 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
1544 }else{ 1546 }else{
1545 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); 1547 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
1546 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); 1548 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
1547 } 1549 }
1572 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy); 1574 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
1573 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 ); 1575 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
1574 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 ); 1576 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
1575 1577
1576 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 1578 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1577 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4); 1579 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(abs(mx-mpx),abs(my-mpy)), 4);
1578 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4); 1580 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
1579 }else{ 1581 }else{
1580 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); 1582 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
1581 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); 1583 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
1582 } 1584 }