Mercurial > libavcodec.hg
comparison h264.c @ 5642:d2598034f2a9 libavcodec
Add slice-based parallel H.264 decoding
Patch by Andreas ªÓman % andreas A olebyn P nu %
NB: depends on having a thread library activated at config time, and on
having a source encoded with multiple slices
Original threads:
date: May 18, 2007 11:00 PM
subject: [FFmpeg-devel] Parallelized h264 proof-of-concept
date: Jun 15, 2007 10:10 PM
subject: [FFmpeg-devel] [PATCH] h264 parallelized, (was: Parallelized h264 proof-of-concept)
date: Jun 25, 2007 7:02 PM
subject: Re: [FFmpeg-devel] [PATCH] h264 parallelized
author | gpoirier |
---|---|
date | Wed, 05 Sep 2007 16:18:15 +0000 |
parents | 4a26dc4ca11d |
children | bf02fa211648 |
comparison
equal
deleted
inserted
replaced
5641:1e93e637fa21 | 5642:d2598034f2a9 |
---|---|
2003 } | 2003 } |
2004 } | 2004 } |
2005 | 2005 |
2006 static void free_tables(H264Context *h){ | 2006 static void free_tables(H264Context *h){ |
2007 int i; | 2007 int i; |
2008 H264Context *hx; | |
2008 av_freep(&h->intra4x4_pred_mode); | 2009 av_freep(&h->intra4x4_pred_mode); |
2009 av_freep(&h->chroma_pred_mode_table); | 2010 av_freep(&h->chroma_pred_mode_table); |
2010 av_freep(&h->cbp_table); | 2011 av_freep(&h->cbp_table); |
2011 av_freep(&h->mvd_table[0]); | 2012 av_freep(&h->mvd_table[0]); |
2012 av_freep(&h->mvd_table[1]); | 2013 av_freep(&h->mvd_table[1]); |
2013 av_freep(&h->direct_table); | 2014 av_freep(&h->direct_table); |
2014 av_freep(&h->non_zero_count); | 2015 av_freep(&h->non_zero_count); |
2015 av_freep(&h->slice_table_base); | 2016 av_freep(&h->slice_table_base); |
2016 av_freep(&h->top_borders[1]); | |
2017 av_freep(&h->top_borders[0]); | |
2018 h->slice_table= NULL; | 2017 h->slice_table= NULL; |
2019 | 2018 |
2020 av_freep(&h->mb2b_xy); | 2019 av_freep(&h->mb2b_xy); |
2021 av_freep(&h->mb2b8_xy); | 2020 av_freep(&h->mb2b8_xy); |
2022 | 2021 |
2023 av_freep(&h->s.obmc_scratchpad); | |
2024 | |
2025 for(i = 0; i < MAX_SPS_COUNT; i++) | 2022 for(i = 0; i < MAX_SPS_COUNT; i++) |
2026 av_freep(h->sps_buffers + i); | 2023 av_freep(h->sps_buffers + i); |
2027 | 2024 |
2028 for(i = 0; i < MAX_PPS_COUNT; i++) | 2025 for(i = 0; i < MAX_PPS_COUNT; i++) |
2029 av_freep(h->pps_buffers + i); | 2026 av_freep(h->pps_buffers + i); |
2027 | |
2028 for(i = 0; i < h->s.avctx->thread_count; i++) { | |
2029 hx = h->thread_context[i]; | |
2030 if(!hx) continue; | |
2031 av_freep(&hx->top_borders[1]); | |
2032 av_freep(&hx->top_borders[0]); | |
2033 av_freep(&hx->s.obmc_scratchpad); | |
2034 av_freep(&hx->s.allocated_edge_emu_buffer); | |
2035 } | |
2030 } | 2036 } |
2031 | 2037 |
2032 static void init_dequant8_coeff_table(H264Context *h){ | 2038 static void init_dequant8_coeff_table(H264Context *h){ |
2033 int i,q,x; | 2039 int i,q,x; |
2034 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly | 2040 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly |
2105 | 2111 |
2106 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) | 2112 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) |
2107 | 2113 |
2108 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) | 2114 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) |
2109 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t)) | 2115 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t)) |
2110 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t)) | |
2111 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t)) | |
2112 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t)) | 2116 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t)) |
2113 | 2117 |
2114 if( h->pps.cabac ) { | 2118 if( h->pps.cabac ) { |
2115 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t)) | 2119 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t)) |
2116 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t)); | 2120 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t)); |
2143 fail: | 2147 fail: |
2144 free_tables(h); | 2148 free_tables(h); |
2145 return -1; | 2149 return -1; |
2146 } | 2150 } |
2147 | 2151 |
2152 /** | |
2153 * Mimic alloc_tables(), but for every context thread. | |
2154 */ | |
2155 static void clone_tables(H264Context *dst, H264Context *src){ | |
2156 dst->intra4x4_pred_mode = src->intra4x4_pred_mode; | |
2157 dst->non_zero_count = src->non_zero_count; | |
2158 dst->slice_table = src->slice_table; | |
2159 dst->cbp_table = src->cbp_table; | |
2160 dst->mb2b_xy = src->mb2b_xy; | |
2161 dst->mb2b8_xy = src->mb2b8_xy; | |
2162 dst->chroma_pred_mode_table = src->chroma_pred_mode_table; | |
2163 dst->mvd_table[0] = src->mvd_table[0]; | |
2164 dst->mvd_table[1] = src->mvd_table[1]; | |
2165 dst->direct_table = src->direct_table; | |
2166 | |
2167 if(!dst->dequant4_coeff[0]) | |
2168 init_dequant_tables(dst); | |
2169 dst->s.obmc_scratchpad = NULL; | |
2170 ff_h264_pred_init(&dst->hpc, src->s.codec_id); | |
2171 dst->dequant_coeff_pps= -1; | |
2172 } | |
2173 | |
2174 /** | |
2175 * Init context | |
2176 * Allocate buffers which are not shared amongst multiple threads. | |
2177 */ | |
2178 static int context_init(H264Context *h){ | |
2179 MpegEncContext * const s = &h->s; | |
2180 | |
2181 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)) | |
2182 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)) | |
2183 | |
2184 // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264) | |
2185 CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, | |
2186 (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance | |
2187 s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21; | |
2188 return 0; | |
2189 fail: | |
2190 return -1; // free_tables will clean up for us | |
2191 } | |
2192 | |
2148 static void common_init(H264Context *h){ | 2193 static void common_init(H264Context *h){ |
2149 MpegEncContext * const s = &h->s; | 2194 MpegEncContext * const s = &h->s; |
2150 | 2195 |
2151 s->width = s->avctx->width; | 2196 s->width = s->avctx->width; |
2152 s->height = s->avctx->height; | 2197 s->height = s->avctx->height; |
2188 h->got_avcC = 0; | 2233 h->got_avcC = 0; |
2189 } else { | 2234 } else { |
2190 h->is_avc = 0; | 2235 h->is_avc = 0; |
2191 } | 2236 } |
2192 | 2237 |
2238 h->thread_context[0] = h; | |
2193 return 0; | 2239 return 0; |
2194 } | 2240 } |
2195 | 2241 |
2196 static int frame_start(H264Context *h){ | 2242 static int frame_start(H264Context *h){ |
2197 MpegEncContext * const s = &h->s; | 2243 MpegEncContext * const s = &h->s; |
2214 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); | 2260 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); |
2215 } | 2261 } |
2216 | 2262 |
2217 /* can't be in alloc_tables because linesize isn't known there. | 2263 /* can't be in alloc_tables because linesize isn't known there. |
2218 * FIXME: redo bipred weight to not require extra buffer? */ | 2264 * FIXME: redo bipred weight to not require extra buffer? */ |
2219 if(!s->obmc_scratchpad) | 2265 for(i = 0; i < s->avctx->thread_count; i++) |
2220 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); | 2266 if(!h->thread_context[i]->s.obmc_scratchpad) |
2267 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); | |
2221 | 2268 |
2222 /* some macroblocks will be accessed before they're available */ | 2269 /* some macroblocks will be accessed before they're available */ |
2223 if(FRAME_MBAFF) | 2270 if(FRAME_MBAFF || s->avctx->thread_count > 1) |
2224 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t)); | 2271 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t)); |
2225 | 2272 |
2226 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; | 2273 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; |
2227 return 0; | 2274 return 0; |
2228 } | 2275 } |
3451 h->field_scan_q0 = h->field_scan; | 3498 h->field_scan_q0 = h->field_scan; |
3452 h->field_scan8x8_q0 = h->field_scan8x8; | 3499 h->field_scan8x8_q0 = h->field_scan8x8; |
3453 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; | 3500 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; |
3454 } | 3501 } |
3455 } | 3502 } |
3503 | |
3504 /** | |
3505 * Replicates H264 "master" context to thread contexts. | |
3506 */ | |
3507 static void clone_slice(H264Context *dst, H264Context *src) | |
3508 { | |
3509 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); | |
3510 dst->s.current_picture_ptr = src->s.current_picture_ptr; | |
3511 dst->s.current_picture = src->s.current_picture; | |
3512 dst->s.linesize = src->s.linesize; | |
3513 dst->s.uvlinesize = src->s.uvlinesize; | |
3514 | |
3515 dst->prev_poc_msb = src->prev_poc_msb; | |
3516 dst->prev_poc_lsb = src->prev_poc_lsb; | |
3517 dst->prev_frame_num_offset = src->prev_frame_num_offset; | |
3518 dst->prev_frame_num = src->prev_frame_num; | |
3519 dst->short_ref_count = src->short_ref_count; | |
3520 | |
3521 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); | |
3522 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); | |
3523 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); | |
3524 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list)); | |
3525 } | |
3526 | |
3456 /** | 3527 /** |
3457 * decodes a slice header. | 3528 * decodes a slice header. |
3458 * this will allso call MPV_common_init() and frame_start() as needed | 3529 * this will allso call MPV_common_init() and frame_start() as needed |
3530 * | |
3531 * @param h h264context | |
3532 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding) | |
3533 * | |
3534 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded | |
3459 */ | 3535 */ |
3460 static int decode_slice_header(H264Context *h){ | 3536 static int decode_slice_header(H264Context *h, H264Context *h0){ |
3461 MpegEncContext * const s = &h->s; | 3537 MpegEncContext * const s = &h->s; |
3462 unsigned int first_mb_in_slice; | 3538 unsigned int first_mb_in_slice; |
3463 unsigned int pps_id; | 3539 unsigned int pps_id; |
3464 int num_ref_idx_active_override_flag; | 3540 int num_ref_idx_active_override_flag; |
3465 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE}; | 3541 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE}; |
3466 unsigned int slice_type, tmp; | 3542 unsigned int slice_type, tmp, i; |
3467 int default_ref_list_done = 0; | 3543 int default_ref_list_done = 0; |
3468 | 3544 |
3469 s->current_picture.reference= h->nal_ref_idc != 0; | 3545 s->current_picture.reference= h->nal_ref_idc != 0; |
3470 s->dropable= h->nal_ref_idc == 0; | 3546 s->dropable= h->nal_ref_idc == 0; |
3471 | 3547 |
3472 first_mb_in_slice= get_ue_golomb(&s->gb); | 3548 first_mb_in_slice= get_ue_golomb(&s->gb); |
3473 | 3549 |
3474 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){ | 3550 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){ |
3475 h->slice_num = 0; | 3551 h0->current_slice = 0; |
3476 s->current_picture_ptr= NULL; | 3552 s->current_picture_ptr= NULL; |
3477 } | 3553 } |
3478 | 3554 |
3479 slice_type= get_ue_golomb(&s->gb); | 3555 slice_type= get_ue_golomb(&s->gb); |
3480 if(slice_type > 9){ | 3556 if(slice_type > 9){ |
3487 }else | 3563 }else |
3488 h->slice_type_fixed=0; | 3564 h->slice_type_fixed=0; |
3489 | 3565 |
3490 slice_type= slice_type_map[ slice_type ]; | 3566 slice_type= slice_type_map[ slice_type ]; |
3491 if (slice_type == I_TYPE | 3567 if (slice_type == I_TYPE |
3492 || (h->slice_num != 0 && slice_type == h->slice_type) ) { | 3568 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) { |
3493 default_ref_list_done = 1; | 3569 default_ref_list_done = 1; |
3494 } | 3570 } |
3495 h->slice_type= slice_type; | 3571 h->slice_type= slice_type; |
3496 | 3572 |
3497 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though | 3573 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though |
3499 pps_id= get_ue_golomb(&s->gb); | 3575 pps_id= get_ue_golomb(&s->gb); |
3500 if(pps_id>=MAX_PPS_COUNT){ | 3576 if(pps_id>=MAX_PPS_COUNT){ |
3501 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); | 3577 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); |
3502 return -1; | 3578 return -1; |
3503 } | 3579 } |
3504 if(!h->pps_buffers[pps_id]) { | 3580 if(!h0->pps_buffers[pps_id]) { |
3505 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n"); | 3581 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n"); |
3506 return -1; | 3582 return -1; |
3507 } | 3583 } |
3508 h->pps= *h->pps_buffers[pps_id]; | 3584 h->pps= *h0->pps_buffers[pps_id]; |
3509 | 3585 |
3510 if(!h->sps_buffers[h->pps.sps_id]) { | 3586 if(!h0->sps_buffers[h->pps.sps_id]) { |
3511 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n"); | 3587 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n"); |
3512 return -1; | 3588 return -1; |
3513 } | 3589 } |
3514 h->sps = *h->sps_buffers[h->pps.sps_id]; | 3590 h->sps = *h0->sps_buffers[h->pps.sps_id]; |
3515 | 3591 |
3516 if(h->dequant_coeff_pps != pps_id){ | 3592 if(h->dequant_coeff_pps != pps_id){ |
3517 h->dequant_coeff_pps = pps_id; | 3593 h->dequant_coeff_pps = pps_id; |
3518 init_dequant_tables(h); | 3594 init_dequant_tables(h); |
3519 } | 3595 } |
3530 else | 3606 else |
3531 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck | 3607 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck |
3532 | 3608 |
3533 if (s->context_initialized | 3609 if (s->context_initialized |
3534 && ( s->width != s->avctx->width || s->height != s->avctx->height)) { | 3610 && ( s->width != s->avctx->width || s->height != s->avctx->height)) { |
3611 if(h != h0) | |
3612 return -1; // width / height changed during parallelized decoding | |
3535 free_tables(h); | 3613 free_tables(h); |
3536 MPV_common_end(s); | 3614 MPV_common_end(s); |
3537 } | 3615 } |
3538 if (!s->context_initialized) { | 3616 if (!s->context_initialized) { |
3617 if(h != h0) | |
3618 return -1; // we cant (re-)initialize context during parallel decoding | |
3539 if (MPV_common_init(s) < 0) | 3619 if (MPV_common_init(s) < 0) |
3540 return -1; | 3620 return -1; |
3541 | 3621 |
3542 init_scan_tables(h); | 3622 init_scan_tables(h); |
3543 alloc_tables(h); | 3623 alloc_tables(h); |
3624 | |
3625 for(i = 1; i < s->avctx->thread_count; i++) { | |
3626 H264Context *c; | |
3627 c = h->thread_context[i] = av_malloc(sizeof(H264Context)); | |
3628 memcpy(c, h, sizeof(MpegEncContext)); | |
3629 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext)); | |
3630 c->sps = h->sps; | |
3631 c->pps = h->pps; | |
3632 init_scan_tables(c); | |
3633 clone_tables(c, h); | |
3634 } | |
3635 | |
3636 for(i = 0; i < s->avctx->thread_count; i++) | |
3637 if(context_init(h->thread_context[i]) < 0) | |
3638 return -1; | |
3544 | 3639 |
3545 s->avctx->width = s->width; | 3640 s->avctx->width = s->width; |
3546 s->avctx->height = s->height; | 3641 s->avctx->height = s->height; |
3547 s->avctx->sample_aspect_ratio= h->sps.sar; | 3642 s->avctx->sample_aspect_ratio= h->sps.sar; |
3548 if(!s->avctx->sample_aspect_ratio.den) | 3643 if(!s->avctx->sample_aspect_ratio.den) |
3555 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, | 3650 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, |
3556 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30); | 3651 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30); |
3557 } | 3652 } |
3558 } | 3653 } |
3559 | 3654 |
3560 if(h->slice_num == 0){ | 3655 if(h0->current_slice == 0){ |
3561 if(frame_start(h) < 0) | 3656 if(frame_start(h) < 0) |
3562 return -1; | 3657 return -1; |
3563 } | 3658 } |
3659 if(h != h0) | |
3660 clone_slice(h, h0); | |
3564 | 3661 |
3565 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup | 3662 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup |
3566 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); | 3663 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); |
3567 | 3664 |
3568 h->mb_mbaff = 0; | 3665 h->mb_mbaff = 0; |
3665 implicit_weight_table(h); | 3762 implicit_weight_table(h); |
3666 else | 3763 else |
3667 h->use_weight = 0; | 3764 h->use_weight = 0; |
3668 | 3765 |
3669 if(s->current_picture.reference) | 3766 if(s->current_picture.reference) |
3670 decode_ref_pic_marking(h, &s->gb); | 3767 decode_ref_pic_marking(h0, &s->gb); |
3671 | 3768 |
3672 if(FRAME_MBAFF) | 3769 if(FRAME_MBAFF) |
3673 fill_mbaff_ref_list(h); | 3770 fill_mbaff_ref_list(h); |
3674 | 3771 |
3675 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){ | 3772 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){ |
3714 if( h->deblocking_filter ) { | 3811 if( h->deblocking_filter ) { |
3715 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1; | 3812 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1; |
3716 h->slice_beta_offset = get_se_golomb(&s->gb) << 1; | 3813 h->slice_beta_offset = get_se_golomb(&s->gb) << 1; |
3717 } | 3814 } |
3718 } | 3815 } |
3816 | |
3817 if(h->deblocking_filter == 1 && h0->max_contexts > 1) { | |
3818 h0->max_contexts = 1; | |
3819 if(!h0->single_decode_warning) { | |
3820 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); | |
3821 h0->single_decode_warning = 1; | |
3822 } | |
3823 if(h != h0) | |
3824 return 1; // deblocking switched inside frame | |
3825 } | |
3826 | |
3719 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL | 3827 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL |
3720 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE) | 3828 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE) |
3721 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE) | 3829 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE) |
3722 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) | 3830 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) |
3723 h->deblocking_filter= 0; | 3831 h->deblocking_filter= 0; |
3725 #if 0 //FMO | 3833 #if 0 //FMO |
3726 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5) | 3834 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5) |
3727 slice_group_change_cycle= get_bits(&s->gb, ?); | 3835 slice_group_change_cycle= get_bits(&s->gb, ?); |
3728 #endif | 3836 #endif |
3729 | 3837 |
3730 h->slice_num++; | 3838 h0->last_slice_type = slice_type; |
3839 h->slice_num = ++h0->current_slice; | |
3731 | 3840 |
3732 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; | 3841 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; |
3733 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width; | 3842 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width; |
3734 | 3843 |
3735 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ | 3844 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ |
6293 } | 6402 } |
6294 } | 6403 } |
6295 } | 6404 } |
6296 } | 6405 } |
6297 | 6406 |
6298 static int decode_slice(H264Context *h){ | 6407 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){ |
6299 MpegEncContext * const s = &h->s; | 6408 MpegEncContext * const s = &h->s; |
6300 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; | 6409 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; |
6301 | 6410 |
6302 s->mb_skip_run= -1; | 6411 s->mb_skip_run= -1; |
6303 | 6412 |
6938 } | 7047 } |
6939 | 7048 |
6940 return 0; | 7049 return 0; |
6941 } | 7050 } |
6942 | 7051 |
7052 /** | |
7053 * Call decode_slice() for each context. | |
7054 * | |
7055 * @param h h264 master context | |
7056 * @param context_count number of contexts to execute | |
7057 */ | |
7058 static void execute_decode_slices(H264Context *h, int context_count){ | |
7059 MpegEncContext * const s = &h->s; | |
7060 AVCodecContext * const avctx= s->avctx; | |
7061 H264Context *hx; | |
7062 int i; | |
7063 | |
7064 if(context_count == 1) { | |
7065 decode_slice(avctx, h); | |
7066 } else { | |
7067 for(i = 1; i < context_count; i++) { | |
7068 hx = h->thread_context[i]; | |
7069 hx->s.error_resilience = avctx->error_resilience; | |
7070 hx->s.error_count = 0; | |
7071 } | |
7072 | |
7073 avctx->execute(avctx, (void *)decode_slice, | |
7074 (void **)h->thread_context, NULL, context_count); | |
7075 | |
7076 /* pull back stuff from slices to master context */ | |
7077 hx = h->thread_context[context_count - 1]; | |
7078 s->mb_x = hx->s.mb_x; | |
7079 s->mb_y = hx->s.mb_y; | |
7080 for(i = 1; i < context_count; i++) | |
7081 h->s.error_count += h->thread_context[i]->s.error_count; | |
7082 } | |
7083 } | |
7084 | |
7085 | |
6943 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ | 7086 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ |
6944 MpegEncContext * const s = &h->s; | 7087 MpegEncContext * const s = &h->s; |
6945 AVCodecContext * const avctx= s->avctx; | 7088 AVCodecContext * const avctx= s->avctx; |
6946 int buf_index=0; | 7089 int buf_index=0; |
7090 H264Context *hx; ///< thread context | |
7091 int context_count = 0; | |
7092 | |
7093 h->max_contexts = avctx->thread_count; | |
6947 #if 0 | 7094 #if 0 |
6948 int i; | 7095 int i; |
6949 for(i=0; i<50; i++){ | 7096 for(i=0; i<50; i++){ |
6950 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]); | 7097 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]); |
6951 } | 7098 } |
6952 #endif | 7099 #endif |
6953 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){ | 7100 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){ |
6954 h->slice_num = 0; | 7101 h->current_slice = 0; |
6955 s->current_picture_ptr= NULL; | 7102 s->current_picture_ptr= NULL; |
6956 } | 7103 } |
6957 | 7104 |
6958 for(;;){ | 7105 for(;;){ |
6959 int consumed; | 7106 int consumed; |
6960 int dst_length; | 7107 int dst_length; |
6961 int bit_length; | 7108 int bit_length; |
6962 uint8_t *ptr; | 7109 uint8_t *ptr; |
6963 int i, nalsize = 0; | 7110 int i, nalsize = 0; |
7111 int err; | |
6964 | 7112 |
6965 if(h->is_avc) { | 7113 if(h->is_avc) { |
6966 if(buf_index >= buf_size) break; | 7114 if(buf_index >= buf_size) break; |
6967 nalsize = 0; | 7115 nalsize = 0; |
6968 for(i = 0; i < h->nal_length_size; i++) | 7116 for(i = 0; i < h->nal_length_size; i++) |
6987 if(buf_index+3 >= buf_size) break; | 7135 if(buf_index+3 >= buf_size) break; |
6988 | 7136 |
6989 buf_index+=3; | 7137 buf_index+=3; |
6990 } | 7138 } |
6991 | 7139 |
6992 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index); | 7140 hx = h->thread_context[context_count]; |
7141 | |
7142 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index); | |
6993 if (ptr==NULL || dst_length < 0){ | 7143 if (ptr==NULL || dst_length < 0){ |
6994 return -1; | 7144 return -1; |
6995 } | 7145 } |
6996 while(ptr[dst_length - 1] == 0 && dst_length > 0) | 7146 while(ptr[dst_length - 1] == 0 && dst_length > 0) |
6997 dst_length--; | 7147 dst_length--; |
6998 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1)); | 7148 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1)); |
6999 | 7149 |
7000 if(s->avctx->debug&FF_DEBUG_STARTCODE){ | 7150 if(s->avctx->debug&FF_DEBUG_STARTCODE){ |
7001 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length); | 7151 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length); |
7002 } | 7152 } |
7003 | 7153 |
7004 if (h->is_avc && (nalsize != consumed)) | 7154 if (h->is_avc && (nalsize != consumed)) |
7005 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize); | 7155 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize); |
7006 | 7156 |
7008 | 7158 |
7009 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id | 7159 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id |
7010 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) | 7160 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) |
7011 continue; | 7161 continue; |
7012 | 7162 |
7013 switch(h->nal_unit_type){ | 7163 again: |
7164 err = 0; | |
7165 switch(hx->nal_unit_type){ | |
7014 case NAL_IDR_SLICE: | 7166 case NAL_IDR_SLICE: |
7167 if (h->nal_unit_type != NAL_IDR_SLICE) { | |
7168 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices"); | |
7169 return -1; | |
7170 } | |
7015 idr(h); //FIXME ensure we don't loose some frames if there is reordering | 7171 idr(h); //FIXME ensure we don't loose some frames if there is reordering |
7016 case NAL_SLICE: | 7172 case NAL_SLICE: |
7017 init_get_bits(&s->gb, ptr, bit_length); | 7173 init_get_bits(&hx->s.gb, ptr, bit_length); |
7018 h->intra_gb_ptr= | 7174 hx->intra_gb_ptr= |
7019 h->inter_gb_ptr= &s->gb; | 7175 hx->inter_gb_ptr= &hx->s.gb; |
7020 s->data_partitioning = 0; | 7176 hx->s.data_partitioning = 0; |
7021 | 7177 |
7022 if(decode_slice_header(h) < 0){ | 7178 if((err = decode_slice_header(hx, h))) |
7023 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); | 7179 break; |
7024 break; | 7180 |
7025 } | 7181 s->current_picture_ptr->key_frame= (hx->nal_unit_type == NAL_IDR_SLICE); |
7026 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE); | 7182 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5 |
7027 if(h->redundant_pic_count==0 && s->hurry_up < 5 | 7183 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) |
7028 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc) | 7184 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE) |
7029 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE) | 7185 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE) |
7030 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE) | |
7031 && avctx->skip_frame < AVDISCARD_ALL) | 7186 && avctx->skip_frame < AVDISCARD_ALL) |
7032 decode_slice(h); | 7187 context_count++; |
7033 break; | 7188 break; |
7034 case NAL_DPA: | 7189 case NAL_DPA: |
7035 init_get_bits(&s->gb, ptr, bit_length); | 7190 init_get_bits(&hx->s.gb, ptr, bit_length); |
7036 h->intra_gb_ptr= | 7191 hx->intra_gb_ptr= |
7037 h->inter_gb_ptr= NULL; | 7192 hx->inter_gb_ptr= NULL; |
7038 s->data_partitioning = 1; | 7193 hx->s.data_partitioning = 1; |
7039 | 7194 |
7040 if(decode_slice_header(h) < 0){ | 7195 err = decode_slice_header(hx, h); |
7041 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); | |
7042 } | |
7043 break; | 7196 break; |
7044 case NAL_DPB: | 7197 case NAL_DPB: |
7045 init_get_bits(&h->intra_gb, ptr, bit_length); | 7198 init_get_bits(&hx->intra_gb, ptr, bit_length); |
7046 h->intra_gb_ptr= &h->intra_gb; | 7199 hx->intra_gb_ptr= &hx->intra_gb; |
7047 break; | 7200 break; |
7048 case NAL_DPC: | 7201 case NAL_DPC: |
7049 init_get_bits(&h->inter_gb, ptr, bit_length); | 7202 init_get_bits(&hx->inter_gb, ptr, bit_length); |
7050 h->inter_gb_ptr= &h->inter_gb; | 7203 hx->inter_gb_ptr= &hx->inter_gb; |
7051 | 7204 |
7052 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning | 7205 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning |
7053 && s->context_initialized | 7206 && s->context_initialized |
7054 && s->hurry_up < 5 | 7207 && s->hurry_up < 5 |
7055 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc) | 7208 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) |
7056 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE) | 7209 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE) |
7057 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE) | 7210 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE) |
7058 && avctx->skip_frame < AVDISCARD_ALL) | 7211 && avctx->skip_frame < AVDISCARD_ALL) |
7059 decode_slice(h); | 7212 context_count++; |
7060 break; | 7213 break; |
7061 case NAL_SEI: | 7214 case NAL_SEI: |
7062 init_get_bits(&s->gb, ptr, bit_length); | 7215 init_get_bits(&s->gb, ptr, bit_length); |
7063 decode_sei(h); | 7216 decode_sei(h); |
7064 break; | 7217 break; |
7086 case NAL_AUXILIARY_SLICE: | 7239 case NAL_AUXILIARY_SLICE: |
7087 break; | 7240 break; |
7088 default: | 7241 default: |
7089 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length); | 7242 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length); |
7090 } | 7243 } |
7091 } | 7244 |
7092 | 7245 if(context_count == h->max_contexts) { |
7246 execute_decode_slices(h, context_count); | |
7247 context_count = 0; | |
7248 } | |
7249 | |
7250 if (err < 0) | |
7251 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); | |
7252 else if(err == 1) { | |
7253 /* Slice could not be decoded in parallel mode, copy down | |
7254 * NAL unit stuff to context 0 and restart. Note that | |
7255 * rbsp_buffer is not transfered, but since we no longer | |
7256 * run in parallel mode this should not be an issue. */ | |
7257 h->nal_unit_type = hx->nal_unit_type; | |
7258 h->nal_ref_idc = hx->nal_ref_idc; | |
7259 hx = h; | |
7260 goto again; | |
7261 } | |
7262 } | |
7263 if(context_count) | |
7264 execute_decode_slices(h, context_count); | |
7093 return buf_index; | 7265 return buf_index; |
7094 } | 7266 } |
7095 | 7267 |
7096 /** | 7268 /** |
7097 * returns the number of bytes consumed for building the current frame | 7269 * returns the number of bytes consumed for building the current frame |