comparison h264.c @ 5642:d2598034f2a9 libavcodec

Add slice-based parallel H.264 decoding Patch by Andreas ªÓman % andreas A olebyn P nu % NB: depends on having a thread library activated at config time, and on having a source encoded with multiple slices Original threads: date: May 18, 2007 11:00 PM subject: [FFmpeg-devel] Parallelized h264 proof-of-concept date: Jun 15, 2007 10:10 PM subject: [FFmpeg-devel] [PATCH] h264 parallelized, (was: Parallelized h264 proof-of-concept) date: Jun 25, 2007 7:02 PM subject: Re: [FFmpeg-devel] [PATCH] h264 parallelized
author gpoirier
date Wed, 05 Sep 2007 16:18:15 +0000
parents 4a26dc4ca11d
children bf02fa211648
comparison
equal deleted inserted replaced
5641:1e93e637fa21 5642:d2598034f2a9
2003 } 2003 }
2004 } 2004 }
2005 2005
2006 static void free_tables(H264Context *h){ 2006 static void free_tables(H264Context *h){
2007 int i; 2007 int i;
2008 H264Context *hx;
2008 av_freep(&h->intra4x4_pred_mode); 2009 av_freep(&h->intra4x4_pred_mode);
2009 av_freep(&h->chroma_pred_mode_table); 2010 av_freep(&h->chroma_pred_mode_table);
2010 av_freep(&h->cbp_table); 2011 av_freep(&h->cbp_table);
2011 av_freep(&h->mvd_table[0]); 2012 av_freep(&h->mvd_table[0]);
2012 av_freep(&h->mvd_table[1]); 2013 av_freep(&h->mvd_table[1]);
2013 av_freep(&h->direct_table); 2014 av_freep(&h->direct_table);
2014 av_freep(&h->non_zero_count); 2015 av_freep(&h->non_zero_count);
2015 av_freep(&h->slice_table_base); 2016 av_freep(&h->slice_table_base);
2016 av_freep(&h->top_borders[1]);
2017 av_freep(&h->top_borders[0]);
2018 h->slice_table= NULL; 2017 h->slice_table= NULL;
2019 2018
2020 av_freep(&h->mb2b_xy); 2019 av_freep(&h->mb2b_xy);
2021 av_freep(&h->mb2b8_xy); 2020 av_freep(&h->mb2b8_xy);
2022 2021
2023 av_freep(&h->s.obmc_scratchpad);
2024
2025 for(i = 0; i < MAX_SPS_COUNT; i++) 2022 for(i = 0; i < MAX_SPS_COUNT; i++)
2026 av_freep(h->sps_buffers + i); 2023 av_freep(h->sps_buffers + i);
2027 2024
2028 for(i = 0; i < MAX_PPS_COUNT; i++) 2025 for(i = 0; i < MAX_PPS_COUNT; i++)
2029 av_freep(h->pps_buffers + i); 2026 av_freep(h->pps_buffers + i);
2027
2028 for(i = 0; i < h->s.avctx->thread_count; i++) {
2029 hx = h->thread_context[i];
2030 if(!hx) continue;
2031 av_freep(&hx->top_borders[1]);
2032 av_freep(&hx->top_borders[0]);
2033 av_freep(&hx->s.obmc_scratchpad);
2034 av_freep(&hx->s.allocated_edge_emu_buffer);
2035 }
2030 } 2036 }
2031 2037
2032 static void init_dequant8_coeff_table(H264Context *h){ 2038 static void init_dequant8_coeff_table(H264Context *h){
2033 int i,q,x; 2039 int i,q,x;
2034 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly 2040 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2105 2111
2106 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) 2112 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2107 2113
2108 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) 2114 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2109 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t)) 2115 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2110 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2111 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
2112 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t)) 2116 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2113 2117
2114 if( h->pps.cabac ) { 2118 if( h->pps.cabac ) {
2115 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t)) 2119 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2116 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t)); 2120 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2143 fail: 2147 fail:
2144 free_tables(h); 2148 free_tables(h);
2145 return -1; 2149 return -1;
2146 } 2150 }
2147 2151
2152 /**
2153 * Mimic alloc_tables(), but for every context thread.
2154 */
2155 static void clone_tables(H264Context *dst, H264Context *src){
2156 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2157 dst->non_zero_count = src->non_zero_count;
2158 dst->slice_table = src->slice_table;
2159 dst->cbp_table = src->cbp_table;
2160 dst->mb2b_xy = src->mb2b_xy;
2161 dst->mb2b8_xy = src->mb2b8_xy;
2162 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2163 dst->mvd_table[0] = src->mvd_table[0];
2164 dst->mvd_table[1] = src->mvd_table[1];
2165 dst->direct_table = src->direct_table;
2166
2167 if(!dst->dequant4_coeff[0])
2168 init_dequant_tables(dst);
2169 dst->s.obmc_scratchpad = NULL;
2170 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2171 dst->dequant_coeff_pps= -1;
2172 }
2173
2174 /**
2175 * Init context
2176 * Allocate buffers which are not shared amongst multiple threads.
2177 */
2178 static int context_init(H264Context *h){
2179 MpegEncContext * const s = &h->s;
2180
2181 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2182 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2183
2184 // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
2185 CHECKED_ALLOCZ(s->allocated_edge_emu_buffer,
2186 (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
2187 s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
2188 return 0;
2189 fail:
2190 return -1; // free_tables will clean up for us
2191 }
2192
2148 static void common_init(H264Context *h){ 2193 static void common_init(H264Context *h){
2149 MpegEncContext * const s = &h->s; 2194 MpegEncContext * const s = &h->s;
2150 2195
2151 s->width = s->avctx->width; 2196 s->width = s->avctx->width;
2152 s->height = s->avctx->height; 2197 s->height = s->avctx->height;
2188 h->got_avcC = 0; 2233 h->got_avcC = 0;
2189 } else { 2234 } else {
2190 h->is_avc = 0; 2235 h->is_avc = 0;
2191 } 2236 }
2192 2237
2238 h->thread_context[0] = h;
2193 return 0; 2239 return 0;
2194 } 2240 }
2195 2241
2196 static int frame_start(H264Context *h){ 2242 static int frame_start(H264Context *h){
2197 MpegEncContext * const s = &h->s; 2243 MpegEncContext * const s = &h->s;
2214 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); 2260 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2215 } 2261 }
2216 2262
2217 /* can't be in alloc_tables because linesize isn't known there. 2263 /* can't be in alloc_tables because linesize isn't known there.
2218 * FIXME: redo bipred weight to not require extra buffer? */ 2264 * FIXME: redo bipred weight to not require extra buffer? */
2219 if(!s->obmc_scratchpad) 2265 for(i = 0; i < s->avctx->thread_count; i++)
2220 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); 2266 if(!h->thread_context[i]->s.obmc_scratchpad)
2267 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2221 2268
2222 /* some macroblocks will be accessed before they're available */ 2269 /* some macroblocks will be accessed before they're available */
2223 if(FRAME_MBAFF) 2270 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2224 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t)); 2271 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2225 2272
2226 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; 2273 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2227 return 0; 2274 return 0;
2228 } 2275 }
3451 h->field_scan_q0 = h->field_scan; 3498 h->field_scan_q0 = h->field_scan;
3452 h->field_scan8x8_q0 = h->field_scan8x8; 3499 h->field_scan8x8_q0 = h->field_scan8x8;
3453 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; 3500 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3454 } 3501 }
3455 } 3502 }
3503
3504 /**
3505 * Replicates H264 "master" context to thread contexts.
3506 */
3507 static void clone_slice(H264Context *dst, H264Context *src)
3508 {
3509 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3510 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3511 dst->s.current_picture = src->s.current_picture;
3512 dst->s.linesize = src->s.linesize;
3513 dst->s.uvlinesize = src->s.uvlinesize;
3514
3515 dst->prev_poc_msb = src->prev_poc_msb;
3516 dst->prev_poc_lsb = src->prev_poc_lsb;
3517 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3518 dst->prev_frame_num = src->prev_frame_num;
3519 dst->short_ref_count = src->short_ref_count;
3520
3521 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3522 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3523 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3524 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3525 }
3526
3456 /** 3527 /**
3457 * decodes a slice header. 3528 * decodes a slice header.
3458 * this will allso call MPV_common_init() and frame_start() as needed 3529 * this will allso call MPV_common_init() and frame_start() as needed
3530 *
3531 * @param h h264context
3532 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3533 *
3534 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
3459 */ 3535 */
3460 static int decode_slice_header(H264Context *h){ 3536 static int decode_slice_header(H264Context *h, H264Context *h0){
3461 MpegEncContext * const s = &h->s; 3537 MpegEncContext * const s = &h->s;
3462 unsigned int first_mb_in_slice; 3538 unsigned int first_mb_in_slice;
3463 unsigned int pps_id; 3539 unsigned int pps_id;
3464 int num_ref_idx_active_override_flag; 3540 int num_ref_idx_active_override_flag;
3465 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE}; 3541 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
3466 unsigned int slice_type, tmp; 3542 unsigned int slice_type, tmp, i;
3467 int default_ref_list_done = 0; 3543 int default_ref_list_done = 0;
3468 3544
3469 s->current_picture.reference= h->nal_ref_idc != 0; 3545 s->current_picture.reference= h->nal_ref_idc != 0;
3470 s->dropable= h->nal_ref_idc == 0; 3546 s->dropable= h->nal_ref_idc == 0;
3471 3547
3472 first_mb_in_slice= get_ue_golomb(&s->gb); 3548 first_mb_in_slice= get_ue_golomb(&s->gb);
3473 3549
3474 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){ 3550 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3475 h->slice_num = 0; 3551 h0->current_slice = 0;
3476 s->current_picture_ptr= NULL; 3552 s->current_picture_ptr= NULL;
3477 } 3553 }
3478 3554
3479 slice_type= get_ue_golomb(&s->gb); 3555 slice_type= get_ue_golomb(&s->gb);
3480 if(slice_type > 9){ 3556 if(slice_type > 9){
3487 }else 3563 }else
3488 h->slice_type_fixed=0; 3564 h->slice_type_fixed=0;
3489 3565
3490 slice_type= slice_type_map[ slice_type ]; 3566 slice_type= slice_type_map[ slice_type ];
3491 if (slice_type == I_TYPE 3567 if (slice_type == I_TYPE
3492 || (h->slice_num != 0 && slice_type == h->slice_type) ) { 3568 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3493 default_ref_list_done = 1; 3569 default_ref_list_done = 1;
3494 } 3570 }
3495 h->slice_type= slice_type; 3571 h->slice_type= slice_type;
3496 3572
3497 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though 3573 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3499 pps_id= get_ue_golomb(&s->gb); 3575 pps_id= get_ue_golomb(&s->gb);
3500 if(pps_id>=MAX_PPS_COUNT){ 3576 if(pps_id>=MAX_PPS_COUNT){
3501 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); 3577 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3502 return -1; 3578 return -1;
3503 } 3579 }
3504 if(!h->pps_buffers[pps_id]) { 3580 if(!h0->pps_buffers[pps_id]) {
3505 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n"); 3581 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3506 return -1; 3582 return -1;
3507 } 3583 }
3508 h->pps= *h->pps_buffers[pps_id]; 3584 h->pps= *h0->pps_buffers[pps_id];
3509 3585
3510 if(!h->sps_buffers[h->pps.sps_id]) { 3586 if(!h0->sps_buffers[h->pps.sps_id]) {
3511 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n"); 3587 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3512 return -1; 3588 return -1;
3513 } 3589 }
3514 h->sps = *h->sps_buffers[h->pps.sps_id]; 3590 h->sps = *h0->sps_buffers[h->pps.sps_id];
3515 3591
3516 if(h->dequant_coeff_pps != pps_id){ 3592 if(h->dequant_coeff_pps != pps_id){
3517 h->dequant_coeff_pps = pps_id; 3593 h->dequant_coeff_pps = pps_id;
3518 init_dequant_tables(h); 3594 init_dequant_tables(h);
3519 } 3595 }
3530 else 3606 else
3531 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck 3607 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3532 3608
3533 if (s->context_initialized 3609 if (s->context_initialized
3534 && ( s->width != s->avctx->width || s->height != s->avctx->height)) { 3610 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3611 if(h != h0)
3612 return -1; // width / height changed during parallelized decoding
3535 free_tables(h); 3613 free_tables(h);
3536 MPV_common_end(s); 3614 MPV_common_end(s);
3537 } 3615 }
3538 if (!s->context_initialized) { 3616 if (!s->context_initialized) {
3617 if(h != h0)
3618 return -1; // we cant (re-)initialize context during parallel decoding
3539 if (MPV_common_init(s) < 0) 3619 if (MPV_common_init(s) < 0)
3540 return -1; 3620 return -1;
3541 3621
3542 init_scan_tables(h); 3622 init_scan_tables(h);
3543 alloc_tables(h); 3623 alloc_tables(h);
3624
3625 for(i = 1; i < s->avctx->thread_count; i++) {
3626 H264Context *c;
3627 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3628 memcpy(c, h, sizeof(MpegEncContext));
3629 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3630 c->sps = h->sps;
3631 c->pps = h->pps;
3632 init_scan_tables(c);
3633 clone_tables(c, h);
3634 }
3635
3636 for(i = 0; i < s->avctx->thread_count; i++)
3637 if(context_init(h->thread_context[i]) < 0)
3638 return -1;
3544 3639
3545 s->avctx->width = s->width; 3640 s->avctx->width = s->width;
3546 s->avctx->height = s->height; 3641 s->avctx->height = s->height;
3547 s->avctx->sample_aspect_ratio= h->sps.sar; 3642 s->avctx->sample_aspect_ratio= h->sps.sar;
3548 if(!s->avctx->sample_aspect_ratio.den) 3643 if(!s->avctx->sample_aspect_ratio.den)
3555 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, 3650 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3556 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30); 3651 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3557 } 3652 }
3558 } 3653 }
3559 3654
3560 if(h->slice_num == 0){ 3655 if(h0->current_slice == 0){
3561 if(frame_start(h) < 0) 3656 if(frame_start(h) < 0)
3562 return -1; 3657 return -1;
3563 } 3658 }
3659 if(h != h0)
3660 clone_slice(h, h0);
3564 3661
3565 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup 3662 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
3566 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); 3663 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3567 3664
3568 h->mb_mbaff = 0; 3665 h->mb_mbaff = 0;
3665 implicit_weight_table(h); 3762 implicit_weight_table(h);
3666 else 3763 else
3667 h->use_weight = 0; 3764 h->use_weight = 0;
3668 3765
3669 if(s->current_picture.reference) 3766 if(s->current_picture.reference)
3670 decode_ref_pic_marking(h, &s->gb); 3767 decode_ref_pic_marking(h0, &s->gb);
3671 3768
3672 if(FRAME_MBAFF) 3769 if(FRAME_MBAFF)
3673 fill_mbaff_ref_list(h); 3770 fill_mbaff_ref_list(h);
3674 3771
3675 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){ 3772 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
3714 if( h->deblocking_filter ) { 3811 if( h->deblocking_filter ) {
3715 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1; 3812 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3716 h->slice_beta_offset = get_se_golomb(&s->gb) << 1; 3813 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3717 } 3814 }
3718 } 3815 }
3816
3817 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3818 h0->max_contexts = 1;
3819 if(!h0->single_decode_warning) {
3820 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3821 h0->single_decode_warning = 1;
3822 }
3823 if(h != h0)
3824 return 1; // deblocking switched inside frame
3825 }
3826
3719 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL 3827 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3720 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE) 3828 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
3721 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE) 3829 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
3722 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) 3830 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3723 h->deblocking_filter= 0; 3831 h->deblocking_filter= 0;
3725 #if 0 //FMO 3833 #if 0 //FMO
3726 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5) 3834 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3727 slice_group_change_cycle= get_bits(&s->gb, ?); 3835 slice_group_change_cycle= get_bits(&s->gb, ?);
3728 #endif 3836 #endif
3729 3837
3730 h->slice_num++; 3838 h0->last_slice_type = slice_type;
3839 h->slice_num = ++h0->current_slice;
3731 3840
3732 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; 3841 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3733 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width; 3842 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
3734 3843
3735 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ 3844 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
6293 } 6402 }
6294 } 6403 }
6295 } 6404 }
6296 } 6405 }
6297 6406
6298 static int decode_slice(H264Context *h){ 6407 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6299 MpegEncContext * const s = &h->s; 6408 MpegEncContext * const s = &h->s;
6300 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; 6409 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6301 6410
6302 s->mb_skip_run= -1; 6411 s->mb_skip_run= -1;
6303 6412
6938 } 7047 }
6939 7048
6940 return 0; 7049 return 0;
6941 } 7050 }
6942 7051
7052 /**
7053 * Call decode_slice() for each context.
7054 *
7055 * @param h h264 master context
7056 * @param context_count number of contexts to execute
7057 */
7058 static void execute_decode_slices(H264Context *h, int context_count){
7059 MpegEncContext * const s = &h->s;
7060 AVCodecContext * const avctx= s->avctx;
7061 H264Context *hx;
7062 int i;
7063
7064 if(context_count == 1) {
7065 decode_slice(avctx, h);
7066 } else {
7067 for(i = 1; i < context_count; i++) {
7068 hx = h->thread_context[i];
7069 hx->s.error_resilience = avctx->error_resilience;
7070 hx->s.error_count = 0;
7071 }
7072
7073 avctx->execute(avctx, (void *)decode_slice,
7074 (void **)h->thread_context, NULL, context_count);
7075
7076 /* pull back stuff from slices to master context */
7077 hx = h->thread_context[context_count - 1];
7078 s->mb_x = hx->s.mb_x;
7079 s->mb_y = hx->s.mb_y;
7080 for(i = 1; i < context_count; i++)
7081 h->s.error_count += h->thread_context[i]->s.error_count;
7082 }
7083 }
7084
7085
6943 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ 7086 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
6944 MpegEncContext * const s = &h->s; 7087 MpegEncContext * const s = &h->s;
6945 AVCodecContext * const avctx= s->avctx; 7088 AVCodecContext * const avctx= s->avctx;
6946 int buf_index=0; 7089 int buf_index=0;
7090 H264Context *hx; ///< thread context
7091 int context_count = 0;
7092
7093 h->max_contexts = avctx->thread_count;
6947 #if 0 7094 #if 0
6948 int i; 7095 int i;
6949 for(i=0; i<50; i++){ 7096 for(i=0; i<50; i++){
6950 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]); 7097 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
6951 } 7098 }
6952 #endif 7099 #endif
6953 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){ 7100 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
6954 h->slice_num = 0; 7101 h->current_slice = 0;
6955 s->current_picture_ptr= NULL; 7102 s->current_picture_ptr= NULL;
6956 } 7103 }
6957 7104
6958 for(;;){ 7105 for(;;){
6959 int consumed; 7106 int consumed;
6960 int dst_length; 7107 int dst_length;
6961 int bit_length; 7108 int bit_length;
6962 uint8_t *ptr; 7109 uint8_t *ptr;
6963 int i, nalsize = 0; 7110 int i, nalsize = 0;
7111 int err;
6964 7112
6965 if(h->is_avc) { 7113 if(h->is_avc) {
6966 if(buf_index >= buf_size) break; 7114 if(buf_index >= buf_size) break;
6967 nalsize = 0; 7115 nalsize = 0;
6968 for(i = 0; i < h->nal_length_size; i++) 7116 for(i = 0; i < h->nal_length_size; i++)
6987 if(buf_index+3 >= buf_size) break; 7135 if(buf_index+3 >= buf_size) break;
6988 7136
6989 buf_index+=3; 7137 buf_index+=3;
6990 } 7138 }
6991 7139
6992 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index); 7140 hx = h->thread_context[context_count];
7141
7142 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
6993 if (ptr==NULL || dst_length < 0){ 7143 if (ptr==NULL || dst_length < 0){
6994 return -1; 7144 return -1;
6995 } 7145 }
6996 while(ptr[dst_length - 1] == 0 && dst_length > 0) 7146 while(ptr[dst_length - 1] == 0 && dst_length > 0)
6997 dst_length--; 7147 dst_length--;
6998 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1)); 7148 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
6999 7149
7000 if(s->avctx->debug&FF_DEBUG_STARTCODE){ 7150 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7001 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length); 7151 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7002 } 7152 }
7003 7153
7004 if (h->is_avc && (nalsize != consumed)) 7154 if (h->is_avc && (nalsize != consumed))
7005 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize); 7155 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7006 7156
7008 7158
7009 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id 7159 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7010 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) 7160 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7011 continue; 7161 continue;
7012 7162
7013 switch(h->nal_unit_type){ 7163 again:
7164 err = 0;
7165 switch(hx->nal_unit_type){
7014 case NAL_IDR_SLICE: 7166 case NAL_IDR_SLICE:
7167 if (h->nal_unit_type != NAL_IDR_SLICE) {
7168 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7169 return -1;
7170 }
7015 idr(h); //FIXME ensure we don't loose some frames if there is reordering 7171 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7016 case NAL_SLICE: 7172 case NAL_SLICE:
7017 init_get_bits(&s->gb, ptr, bit_length); 7173 init_get_bits(&hx->s.gb, ptr, bit_length);
7018 h->intra_gb_ptr= 7174 hx->intra_gb_ptr=
7019 h->inter_gb_ptr= &s->gb; 7175 hx->inter_gb_ptr= &hx->s.gb;
7020 s->data_partitioning = 0; 7176 hx->s.data_partitioning = 0;
7021 7177
7022 if(decode_slice_header(h) < 0){ 7178 if((err = decode_slice_header(hx, h)))
7023 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); 7179 break;
7024 break; 7180
7025 } 7181 s->current_picture_ptr->key_frame= (hx->nal_unit_type == NAL_IDR_SLICE);
7026 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE); 7182 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7027 if(h->redundant_pic_count==0 && s->hurry_up < 5 7183 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7028 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc) 7184 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7029 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE) 7185 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7030 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
7031 && avctx->skip_frame < AVDISCARD_ALL) 7186 && avctx->skip_frame < AVDISCARD_ALL)
7032 decode_slice(h); 7187 context_count++;
7033 break; 7188 break;
7034 case NAL_DPA: 7189 case NAL_DPA:
7035 init_get_bits(&s->gb, ptr, bit_length); 7190 init_get_bits(&hx->s.gb, ptr, bit_length);
7036 h->intra_gb_ptr= 7191 hx->intra_gb_ptr=
7037 h->inter_gb_ptr= NULL; 7192 hx->inter_gb_ptr= NULL;
7038 s->data_partitioning = 1; 7193 hx->s.data_partitioning = 1;
7039 7194
7040 if(decode_slice_header(h) < 0){ 7195 err = decode_slice_header(hx, h);
7041 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7042 }
7043 break; 7196 break;
7044 case NAL_DPB: 7197 case NAL_DPB:
7045 init_get_bits(&h->intra_gb, ptr, bit_length); 7198 init_get_bits(&hx->intra_gb, ptr, bit_length);
7046 h->intra_gb_ptr= &h->intra_gb; 7199 hx->intra_gb_ptr= &hx->intra_gb;
7047 break; 7200 break;
7048 case NAL_DPC: 7201 case NAL_DPC:
7049 init_get_bits(&h->inter_gb, ptr, bit_length); 7202 init_get_bits(&hx->inter_gb, ptr, bit_length);
7050 h->inter_gb_ptr= &h->inter_gb; 7203 hx->inter_gb_ptr= &hx->inter_gb;
7051 7204
7052 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning 7205 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7053 && s->context_initialized 7206 && s->context_initialized
7054 && s->hurry_up < 5 7207 && s->hurry_up < 5
7055 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc) 7208 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7056 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE) 7209 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7057 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE) 7210 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7058 && avctx->skip_frame < AVDISCARD_ALL) 7211 && avctx->skip_frame < AVDISCARD_ALL)
7059 decode_slice(h); 7212 context_count++;
7060 break; 7213 break;
7061 case NAL_SEI: 7214 case NAL_SEI:
7062 init_get_bits(&s->gb, ptr, bit_length); 7215 init_get_bits(&s->gb, ptr, bit_length);
7063 decode_sei(h); 7216 decode_sei(h);
7064 break; 7217 break;
7086 case NAL_AUXILIARY_SLICE: 7239 case NAL_AUXILIARY_SLICE:
7087 break; 7240 break;
7088 default: 7241 default:
7089 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length); 7242 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7090 } 7243 }
7091 } 7244
7092 7245 if(context_count == h->max_contexts) {
7246 execute_decode_slices(h, context_count);
7247 context_count = 0;
7248 }
7249
7250 if (err < 0)
7251 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7252 else if(err == 1) {
7253 /* Slice could not be decoded in parallel mode, copy down
7254 * NAL unit stuff to context 0 and restart. Note that
7255 * rbsp_buffer is not transfered, but since we no longer
7256 * run in parallel mode this should not be an issue. */
7257 h->nal_unit_type = hx->nal_unit_type;
7258 h->nal_ref_idc = hx->nal_ref_idc;
7259 hx = h;
7260 goto again;
7261 }
7262 }
7263 if(context_count)
7264 execute_decode_slices(h, context_count);
7093 return buf_index; 7265 return buf_index;
7094 } 7266 }
7095 7267
7096 /** 7268 /**
7097 * returns the number of bytes consumed for building the current frame 7269 * returns the number of bytes consumed for building the current frame