Mercurial > libavcodec.hg
comparison h264_cabac.c @ 11277:c12d6c6c027e libavcodec
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
for high resolution videos.
about 20cycles faster per MB for cathederal.
author | michael |
---|---|
date | Wed, 24 Feb 2010 20:43:06 +0000 |
parents | 63d329cd8d80 |
children | 1a349d669184 |
comparison
equal
deleted
inserted
replaced
11276:dd948aea1c20 | 11277:c12d6c6c027e |
---|---|
936 } | 936 } |
937 } | 937 } |
938 while( k-- ) { | 938 while( k-- ) { |
939 mvd += get_cabac_bypass( &h->cabac )<<k; | 939 mvd += get_cabac_bypass( &h->cabac )<<k; |
940 } | 940 } |
941 } | 941 *mvda=mvd < 70 ? mvd : 70; |
942 *mvda=mvd; | 942 }else |
943 *mvda=mvd; | |
943 return get_cabac_bypass_sign( &h->cabac, -mvd ); | 944 return get_cabac_bypass_sign( &h->cabac, -mvd ); |
944 } | 945 } |
945 | 946 |
946 #define DECODE_CABAC_MB_MVD( h, list, n )\ | 947 #define DECODE_CABAC_MB_MVD( h, list, n )\ |
947 {\ | 948 {\ |
1427 | 1428 |
1428 for(list=0; list<h->list_count; list++){ | 1429 for(list=0; list<h->list_count; list++){ |
1429 for(i=0; i<4; i++){ | 1430 for(i=0; i<4; i++){ |
1430 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]; | 1431 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]; |
1431 if(IS_DIRECT(h->sub_mb_type[i])){ | 1432 if(IS_DIRECT(h->sub_mb_type[i])){ |
1432 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4); | 1433 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 2); |
1433 continue; | 1434 continue; |
1434 } | 1435 } |
1435 | 1436 |
1436 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){ | 1437 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){ |
1437 const int sub_mb_type= h->sub_mb_type[i]; | 1438 const int sub_mb_type= h->sub_mb_type[i]; |
1439 for(j=0; j<sub_partition_count[i]; j++){ | 1440 for(j=0; j<sub_partition_count[i]; j++){ |
1440 int mpx, mpy; | 1441 int mpx, mpy; |
1441 int mx, my; | 1442 int mx, my; |
1442 const int index= 4*i + block_width*j; | 1443 const int index= 4*i + block_width*j; |
1443 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ]; | 1444 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ]; |
1444 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ]; | 1445 uint8_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ]; |
1445 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my); | 1446 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my); |
1446 | |
1447 DECODE_CABAC_MB_MVD( h, list, index) | 1447 DECODE_CABAC_MB_MVD( h, list, index) |
1448 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | 1448 tprintf(s->avctx, "final mv:%d %d\n", mx, my); |
1449 | 1449 |
1450 if(IS_SUB_8X8(sub_mb_type)){ | 1450 if(IS_SUB_8X8(sub_mb_type)){ |
1451 mv_cache[ 1 ][0]= | 1451 mv_cache[ 1 ][0]= |
1476 mvd_cache[ 0 ][0]= mpx; | 1476 mvd_cache[ 0 ][0]= mpx; |
1477 mvd_cache[ 0 ][1]= mpy; | 1477 mvd_cache[ 0 ][1]= mpy; |
1478 } | 1478 } |
1479 }else{ | 1479 }else{ |
1480 fill_rectangle(h->mv_cache [list][ scan8[4*i] ], 2, 2, 8, 0, 4); | 1480 fill_rectangle(h->mv_cache [list][ scan8[4*i] ], 2, 2, 8, 0, 4); |
1481 fill_rectangle(h->mvd_cache[list][ scan8[4*i] ], 2, 2, 8, 0, 4); | 1481 fill_rectangle(h->mvd_cache[list][ scan8[4*i] ], 2, 2, 8, 0, 2); |
1482 } | 1482 } |
1483 } | 1483 } |
1484 } | 1484 } |
1485 } else if( IS_DIRECT(mb_type) ) { | 1485 } else if( IS_DIRECT(mb_type) ) { |
1486 ff_h264_pred_direct_motion(h, &mb_type); | 1486 ff_h264_pred_direct_motion(h, &mb_type); |
1487 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); | 1487 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 2); |
1488 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4); | 1488 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 2); |
1489 dct8x8_allowed &= h->sps.direct_8x8_inference_flag; | 1489 dct8x8_allowed &= h->sps.direct_8x8_inference_flag; |
1490 } else { | 1490 } else { |
1491 int list, i; | 1491 int list, i; |
1492 if(IS_16X16(mb_type)){ | 1492 if(IS_16X16(mb_type)){ |
1493 for(list=0; list<h->list_count; list++){ | 1493 for(list=0; list<h->list_count; list++){ |
1510 int mx,my,mpx,mpy; | 1510 int mx,my,mpx,mpy; |
1511 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my); | 1511 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my); |
1512 DECODE_CABAC_MB_MVD( h, list, 0) | 1512 DECODE_CABAC_MB_MVD( h, list, 0) |
1513 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | 1513 tprintf(s->avctx, "final mv:%d %d\n", mx, my); |
1514 | 1514 |
1515 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mpx,mpy), 4); | 1515 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack8to16(mpx,mpy), 2); |
1516 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); | 1516 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); |
1517 }else | 1517 }else |
1518 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4); | 1518 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4); |
1519 } | 1519 } |
1520 } | 1520 } |
1542 int mx,my,mpx,mpy; | 1542 int mx,my,mpx,mpy; |
1543 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my); | 1543 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my); |
1544 DECODE_CABAC_MB_MVD( h, list, 8*i) | 1544 DECODE_CABAC_MB_MVD( h, list, 8*i) |
1545 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | 1545 tprintf(s->avctx, "final mv:%d %d\n", mx, my); |
1546 | 1546 |
1547 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mpx,mpy), 4); | 1547 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack8to16(mpx,mpy), 2); |
1548 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4); | 1548 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4); |
1549 }else{ | 1549 }else{ |
1550 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); | 1550 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 2); |
1551 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); | 1551 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); |
1552 } | 1552 } |
1553 } | 1553 } |
1554 } | 1554 } |
1555 }else{ | 1555 }else{ |
1577 int mx,my,mpx,mpy; | 1577 int mx,my,mpx,mpy; |
1578 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my); | 1578 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my); |
1579 DECODE_CABAC_MB_MVD( h, list, 4*i) | 1579 DECODE_CABAC_MB_MVD( h, list, 4*i) |
1580 | 1580 |
1581 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | 1581 tprintf(s->avctx, "final mv:%d %d\n", mx, my); |
1582 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mpx,mpy), 4); | 1582 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack8to16(mpx,mpy), 2); |
1583 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4); | 1583 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4); |
1584 }else{ | 1584 }else{ |
1585 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); | 1585 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 2); |
1586 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); | 1586 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); |
1587 } | 1587 } |
1588 } | 1588 } |
1589 } | 1589 } |
1590 } | 1590 } |