Mercurial > libavcodec.hg
comparison h264.c @ 2919:71ca5ed04789 libavcodec
decode custom quant matrices.
based on a patch by anonymous, with optimizations by me.
author | lorenm |
---|---|
date | Tue, 25 Oct 2005 01:17:20 +0000 |
parents | 95f469274a1d |
children | 1443b4d3f4ab |
comparison
equal
deleted
inserted
replaced
2918:13dcd22f0816 | 2919:71ca5ed04789 |
---|---|
88 uint32_t time_scale; | 88 uint32_t time_scale; |
89 int fixed_frame_rate_flag; | 89 int fixed_frame_rate_flag; |
90 short offset_for_ref_frame[256]; //FIXME dyn aloc? | 90 short offset_for_ref_frame[256]; //FIXME dyn aloc? |
91 int bitstream_restriction_flag; | 91 int bitstream_restriction_flag; |
92 int num_reorder_frames; | 92 int num_reorder_frames; |
93 int scaling_matrix_present; | |
94 uint8_t scaling_matrix4[6][16]; | |
95 uint8_t scaling_matrix8[2][64]; | |
93 }SPS; | 96 }SPS; |
94 | 97 |
95 /** | 98 /** |
96 * Picture parameter set | 99 * Picture parameter set |
97 */ | 100 */ |
109 int chroma_qp_index_offset; | 112 int chroma_qp_index_offset; |
110 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag | 113 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag |
111 int constrained_intra_pred; ///< constrained_intra_pred_flag | 114 int constrained_intra_pred; ///< constrained_intra_pred_flag |
112 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag | 115 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag |
113 int transform_8x8_mode; ///< transform_8x8_mode_flag | 116 int transform_8x8_mode; ///< transform_8x8_mode_flag |
117 uint8_t scaling_matrix4[6][16]; | |
118 uint8_t scaling_matrix8[2][64]; | |
114 }PPS; | 119 }PPS; |
115 | 120 |
116 /** | 121 /** |
117 * Memory management control operation opcode. | 122 * Memory management control operation opcode. |
118 */ | 123 */ |
235 /** | 240 /** |
236 * current pps | 241 * current pps |
237 */ | 242 */ |
238 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? | 243 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? |
239 | 244 |
240 uint16_t (*dequant4_coeff)[16]; // FIXME quant matrices should be per SPS or PPS | 245 uint32_t dequant4_buffer[6][52][16]; |
241 uint16_t (*dequant8_coeff)[64]; | 246 uint32_t dequant8_buffer[2][52][64]; |
247 uint32_t (*dequant4_coeff[6])[16]; | |
248 uint32_t (*dequant8_coeff[2])[64]; | |
249 int dequant_coeff_pps; ///< reinit tables when pps changes | |
242 | 250 |
243 int slice_num; | 251 int slice_num; |
244 uint8_t *slice_table_base; | 252 uint8_t *slice_table_base; |
245 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1 | 253 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1 |
246 int slice_type; | 254 int slice_type; |
1596 | 1604 |
1597 /** | 1605 /** |
1598 * idct tranforms the 16 dc values and dequantize them. | 1606 * idct tranforms the 16 dc values and dequantize them. |
1599 * @param qp quantization parameter | 1607 * @param qp quantization parameter |
1600 */ | 1608 */ |
1601 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){ | 1609 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ |
1602 const int qmul= dequant_coeff[qp][0]; | |
1603 #define stride 16 | 1610 #define stride 16 |
1604 int i; | 1611 int i; |
1605 int temp[16]; //FIXME check if this is a good idea | 1612 int temp[16]; //FIXME check if this is a good idea |
1606 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; | 1613 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; |
1607 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; | 1614 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; |
1626 const int z0= temp[4*0+i] + temp[4*2+i]; | 1633 const int z0= temp[4*0+i] + temp[4*2+i]; |
1627 const int z1= temp[4*0+i] - temp[4*2+i]; | 1634 const int z1= temp[4*0+i] - temp[4*2+i]; |
1628 const int z2= temp[4*1+i] - temp[4*3+i]; | 1635 const int z2= temp[4*1+i] - temp[4*3+i]; |
1629 const int z3= temp[4*1+i] + temp[4*3+i]; | 1636 const int z3= temp[4*1+i] + temp[4*3+i]; |
1630 | 1637 |
1631 block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual | 1638 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual |
1632 block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2; | 1639 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8)); |
1633 block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2; | 1640 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8)); |
1634 block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2; | 1641 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8)); |
1635 } | 1642 } |
1636 } | 1643 } |
1637 | 1644 |
1638 #if 0 | 1645 #if 0 |
1639 /** | 1646 /** |
1676 #endif | 1683 #endif |
1677 | 1684 |
1678 #undef xStride | 1685 #undef xStride |
1679 #undef stride | 1686 #undef stride |
1680 | 1687 |
1681 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){ | 1688 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ |
1682 const int qmul= dequant_coeff[qp][0]; | |
1683 const int stride= 16*2; | 1689 const int stride= 16*2; |
1684 const int xStride= 16; | 1690 const int xStride= 16; |
1685 int a,b,c,d,e; | 1691 int a,b,c,d,e; |
1686 | 1692 |
1687 a= block[stride*0 + xStride*0]; | 1693 a= block[stride*0 + xStride*0]; |
1692 e= a-b; | 1698 e= a-b; |
1693 a= a+b; | 1699 a= a+b; |
1694 b= c-d; | 1700 b= c-d; |
1695 c= c+d; | 1701 c= c+d; |
1696 | 1702 |
1697 block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1; | 1703 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7; |
1698 block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1; | 1704 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7; |
1699 block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1; | 1705 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7; |
1700 block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1; | 1706 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7; |
1701 } | 1707 } |
1702 | 1708 |
1703 #if 0 | 1709 #if 0 |
1704 static void chroma_dc_dct_c(DCTELEM *block){ | 1710 static void chroma_dc_dct_c(DCTELEM *block){ |
1705 const int stride= 16*2; | 1711 const int stride= 16*2; |
2919 h->slice_table= NULL; | 2925 h->slice_table= NULL; |
2920 | 2926 |
2921 av_freep(&h->mb2b_xy); | 2927 av_freep(&h->mb2b_xy); |
2922 av_freep(&h->mb2b8_xy); | 2928 av_freep(&h->mb2b8_xy); |
2923 | 2929 |
2924 av_freep(&h->dequant4_coeff); | |
2925 av_freep(&h->dequant8_coeff); | |
2926 | |
2927 av_freep(&h->s.obmc_scratchpad); | 2930 av_freep(&h->s.obmc_scratchpad); |
2928 } | 2931 } |
2932 | |
2933 static void init_dequant8_coeff_table(H264Context *h){ | |
2934 int i,q,x; | |
2935 h->dequant8_coeff[0] = h->dequant8_buffer[0]; | |
2936 h->dequant8_coeff[1] = h->dequant8_buffer[1]; | |
2937 | |
2938 for(i=0; i<2; i++ ){ | |
2939 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ | |
2940 h->dequant8_coeff[1] = h->dequant8_buffer[0]; | |
2941 break; | |
2942 } | |
2943 | |
2944 for(q=0; q<52; q++){ | |
2945 int shift = div6[q]; | |
2946 int idx = rem6[q]; | |
2947 for(x=0; x<64; x++) | |
2948 h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][ | |
2949 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift; | |
2950 } | |
2951 } | |
2952 } | |
2953 | |
2954 static void init_dequant4_coeff_table(H264Context *h){ | |
2955 int i,j,q,x; | |
2956 for(i=0; i<6; i++ ){ | |
2957 h->dequant4_coeff[i] = h->dequant4_buffer[i]; | |
2958 for(j=0; j<i; j++){ | |
2959 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ | |
2960 h->dequant4_coeff[i] = h->dequant4_buffer[j]; | |
2961 break; | |
2962 } | |
2963 } | |
2964 if(j<i) | |
2965 continue; | |
2966 | |
2967 for(q=0; q<52; q++){ | |
2968 int shift = div6[q] + 2; | |
2969 int idx = rem6[q]; | |
2970 for(x=0; x<16; x++) | |
2971 h->dequant4_coeff[i][q][x] = ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * | |
2972 h->pps.scaling_matrix4[i][x]) << shift; | |
2973 } | |
2974 } | |
2975 } | |
2976 | |
2977 static void init_dequant_tables(H264Context *h){ | |
2978 int i,x; | |
2979 init_dequant4_coeff_table(h); | |
2980 if(h->pps.transform_8x8_mode) | |
2981 init_dequant8_coeff_table(h); | |
2982 if(h->sps.transform_bypass){ | |
2983 for(i=0; i<6; i++) | |
2984 for(x=0; x<16; x++) | |
2985 h->dequant4_coeff[i][0][x] = 1<<6; | |
2986 if(h->pps.transform_8x8_mode) | |
2987 for(i=0; i<2; i++) | |
2988 for(x=0; x<64; x++) | |
2989 h->dequant8_coeff[i][0][x] = 1<<6; | |
2990 } | |
2991 } | |
2992 | |
2929 | 2993 |
2930 /** | 2994 /** |
2931 * allocates tables. | 2995 * allocates tables. |
2932 * needs width/height | 2996 * needs width/height |
2933 */ | 2997 */ |
2934 static int alloc_tables(H264Context *h){ | 2998 static int alloc_tables(H264Context *h){ |
2935 MpegEncContext * const s = &h->s; | 2999 MpegEncContext * const s = &h->s; |
2936 const int big_mb_num= s->mb_stride * (s->mb_height+1); | 3000 const int big_mb_num= s->mb_stride * (s->mb_height+1); |
2937 int x,y,q; | 3001 int x,y; |
2938 | 3002 |
2939 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) | 3003 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) |
2940 | 3004 |
2941 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) | 3005 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) |
2942 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t)) | 3006 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t)) |
2965 h->mb2b_xy [mb_xy]= b_xy; | 3029 h->mb2b_xy [mb_xy]= b_xy; |
2966 h->mb2b8_xy[mb_xy]= b8_xy; | 3030 h->mb2b8_xy[mb_xy]= b8_xy; |
2967 } | 3031 } |
2968 } | 3032 } |
2969 | 3033 |
2970 CHECKED_ALLOCZ(h->dequant4_coeff, 52*16 * sizeof(uint16_t)); | |
2971 CHECKED_ALLOCZ(h->dequant8_coeff, 52*64 * sizeof(uint16_t)); | |
2972 memcpy(h->dequant4_coeff, dequant_coeff, 52*16 * sizeof(uint16_t)); | |
2973 for(q=0; q<52; q++){ | |
2974 int shift = div6[q]; | |
2975 int idx = rem6[q]; | |
2976 if(shift >= 2) // qp<12 are shifted during dequant | |
2977 shift -= 2; | |
2978 for(x=0; x<64; x++) | |
2979 h->dequant8_coeff[q][x] = dequant8_coeff_init[idx][ | |
2980 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] << shift; | |
2981 } | |
2982 if(h->sps.transform_bypass){ | |
2983 for(x=0; x<16; x++) | |
2984 h->dequant4_coeff[0][x] = 1; | |
2985 for(x=0; x<64; x++) | |
2986 h->dequant8_coeff[0][x] = 1<<2; | |
2987 } | |
2988 | |
2989 s->obmc_scratchpad = NULL; | 3034 s->obmc_scratchpad = NULL; |
2990 | 3035 |
2991 return 0; | 3036 return 0; |
2992 fail: | 3037 fail: |
2993 free_tables(h); | 3038 free_tables(h); |
3001 s->height = s->avctx->height; | 3046 s->height = s->avctx->height; |
3002 s->codec_id= s->avctx->codec->id; | 3047 s->codec_id= s->avctx->codec->id; |
3003 | 3048 |
3004 init_pred_ptrs(h); | 3049 init_pred_ptrs(h); |
3005 | 3050 |
3051 h->dequant_coeff_pps= -1; | |
3006 s->unrestricted_mv=1; | 3052 s->unrestricted_mv=1; |
3007 s->decode=1; //FIXME | 3053 s->decode=1; //FIXME |
3008 } | 3054 } |
3009 | 3055 |
3010 static int decode_init(AVCodecContext *avctx){ | 3056 static int decode_init(AVCodecContext *avctx){ |
3347 } | 3393 } |
3348 }else{ | 3394 }else{ |
3349 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); | 3395 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); |
3350 if(s->codec_id == CODEC_ID_H264){ | 3396 if(s->codec_id == CODEC_ID_H264){ |
3351 if(!transform_bypass) | 3397 if(!transform_bypass) |
3352 h264_luma_dc_dequant_idct_c(h->mb, s->qscale); | 3398 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]); |
3353 }else | 3399 }else |
3354 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); | 3400 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); |
3355 } | 3401 } |
3356 if(h->deblocking_filter) { | 3402 if(h->deblocking_filter) { |
3357 if (h->mb_aff_frame) { | 3403 if (h->mb_aff_frame) { |
3395 } | 3441 } |
3396 | 3442 |
3397 if(!(s->flags&CODEC_FLAG_GRAY)){ | 3443 if(!(s->flags&CODEC_FLAG_GRAY)){ |
3398 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add; | 3444 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add; |
3399 if(!transform_bypass){ | 3445 if(!transform_bypass){ |
3400 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp); | 3446 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]); |
3401 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp); | 3447 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]); |
3402 } | 3448 } |
3403 if(s->codec_id == CODEC_ID_H264){ | 3449 if(s->codec_id == CODEC_ID_H264){ |
3404 for(i=16; i<16+4; i++){ | 3450 for(i=16; i<16+4; i++){ |
3405 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ | 3451 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ |
3406 uint8_t * const ptr= dest_cb + block_offset[i]; | 3452 uint8_t * const ptr= dest_cb + block_offset[i]; |
4176 h->sps= h->sps_buffer[ h->pps.sps_id ]; | 4222 h->sps= h->sps_buffer[ h->pps.sps_id ]; |
4177 if(h->sps.log2_max_frame_num == 0){ | 4223 if(h->sps.log2_max_frame_num == 0){ |
4178 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n"); | 4224 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n"); |
4179 return -1; | 4225 return -1; |
4180 } | 4226 } |
4227 | |
4228 if(h->dequant_coeff_pps != pps_id){ | |
4229 h->dequant_coeff_pps = pps_id; | |
4230 init_dequant_tables(h); | |
4231 } | |
4181 | 4232 |
4182 s->mb_width= h->sps.mb_width; | 4233 s->mb_width= h->sps.mb_width; |
4183 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); | 4234 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); |
4184 | 4235 |
4185 h->b_stride= s->mb_width*4 + 1; | 4236 h->b_stride= s->mb_width*4 + 1; |
4432 * @param n block index | 4483 * @param n block index |
4433 * @param scantable scantable | 4484 * @param scantable scantable |
4434 * @param max_coeff number of coefficients in the block | 4485 * @param max_coeff number of coefficients in the block |
4435 * @return <0 if an error occured | 4486 * @return <0 if an error occured |
4436 */ | 4487 */ |
4437 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){ | 4488 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){ |
4438 MpegEncContext * const s = &h->s; | 4489 MpegEncContext * const s = &h->s; |
4439 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}; | 4490 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}; |
4440 int level[16]; | 4491 int level[16]; |
4441 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before; | 4492 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before; |
4442 | 4493 |
4549 j= scantable[ coeff_num ]; | 4600 j= scantable[ coeff_num ]; |
4550 | 4601 |
4551 block[j]= level[i]; | 4602 block[j]= level[i]; |
4552 } | 4603 } |
4553 }else{ | 4604 }else{ |
4554 block[j] = level[0] * qmul[j]; | 4605 block[j] = (level[0] * qmul[j] + 32)>>6; |
4555 for(i=1;i<total_coeff;i++) { | 4606 for(i=1;i<total_coeff;i++) { |
4556 if(zeros_left <= 0) | 4607 if(zeros_left <= 0) |
4557 run_before = 0; | 4608 run_before = 0; |
4558 else if(zeros_left < 7){ | 4609 else if(zeros_left < 7){ |
4559 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1); | 4610 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1); |
4562 } | 4613 } |
4563 zeros_left -= run_before; | 4614 zeros_left -= run_before; |
4564 coeff_num -= 1 + run_before; | 4615 coeff_num -= 1 + run_before; |
4565 j= scantable[ coeff_num ]; | 4616 j= scantable[ coeff_num ]; |
4566 | 4617 |
4567 block[j]= level[i] * qmul[j]; | 4618 block[j]= (level[i] * qmul[j] + 32)>>6; |
4568 // printf("%d %d ", block[j], qmul[j]); | |
4569 } | 4619 } |
4570 } | 4620 } |
4571 | 4621 |
4572 if(zeros_left<0){ | 4622 if(zeros_left<0){ |
4573 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y); | 4623 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y); |
5006 else s->qscale-= 52; | 5056 else s->qscale-= 52; |
5007 } | 5057 } |
5008 | 5058 |
5009 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale); | 5059 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale); |
5010 if(IS_INTRA16x16(mb_type)){ | 5060 if(IS_INTRA16x16(mb_type)){ |
5011 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0){ | 5061 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){ |
5012 return -1; //FIXME continue if partitioned and other return -1 too | 5062 return -1; //FIXME continue if partitioned and other return -1 too |
5013 } | 5063 } |
5014 | 5064 |
5015 assert((cbp&15) == 0 || (cbp&15) == 15); | 5065 assert((cbp&15) == 0 || (cbp&15) == 15); |
5016 | 5066 |
5017 if(cbp&15){ | 5067 if(cbp&15){ |
5018 for(i8x8=0; i8x8<4; i8x8++){ | 5068 for(i8x8=0; i8x8<4; i8x8++){ |
5019 for(i4x4=0; i4x4<4; i4x4++){ | 5069 for(i4x4=0; i4x4<4; i4x4++){ |
5020 const int index= i4x4 + 4*i8x8; | 5070 const int index= i4x4 + 4*i8x8; |
5021 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 ){ | 5071 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){ |
5022 return -1; | 5072 return -1; |
5023 } | 5073 } |
5024 } | 5074 } |
5025 } | 5075 } |
5026 }else{ | 5076 }else{ |
5032 if(IS_8x8DCT(mb_type)){ | 5082 if(IS_8x8DCT(mb_type)){ |
5033 DCTELEM *buf = &h->mb[64*i8x8]; | 5083 DCTELEM *buf = &h->mb[64*i8x8]; |
5034 uint8_t *nnz; | 5084 uint8_t *nnz; |
5035 for(i4x4=0; i4x4<4; i4x4++){ | 5085 for(i4x4=0; i4x4<4; i4x4++){ |
5036 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4, | 5086 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4, |
5037 h->dequant8_coeff[s->qscale], 16) <0 ) | 5087 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) |
5038 return -1; | 5088 return -1; |
5039 } | |
5040 if(s->qscale < 12){ | |
5041 int i; | |
5042 for(i=0; i<64; i++) | |
5043 buf[i] = (buf[i] + 2) >> 2; | |
5044 } | 5089 } |
5045 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; | 5090 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; |
5046 nnz[0] |= nnz[1] | nnz[8] | nnz[9]; | 5091 nnz[0] |= nnz[1] | nnz[8] | nnz[9]; |
5047 }else{ | 5092 }else{ |
5048 for(i4x4=0; i4x4<4; i4x4++){ | 5093 for(i4x4=0; i4x4<4; i4x4++){ |
5049 const int index= i4x4 + 4*i8x8; | 5094 const int index= i4x4 + 4*i8x8; |
5050 | 5095 |
5051 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[s->qscale], 16) <0 ){ | 5096 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){ |
5052 return -1; | 5097 return -1; |
5053 } | 5098 } |
5054 } | 5099 } |
5055 } | 5100 } |
5056 }else{ | 5101 }else{ |
5060 } | 5105 } |
5061 } | 5106 } |
5062 | 5107 |
5063 if(cbp&0x30){ | 5108 if(cbp&0x30){ |
5064 for(chroma_idx=0; chroma_idx<2; chroma_idx++) | 5109 for(chroma_idx=0; chroma_idx<2; chroma_idx++) |
5065 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, h->dequant4_coeff[chroma_qp], 4) < 0){ | 5110 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){ |
5066 return -1; | 5111 return -1; |
5067 } | 5112 } |
5068 } | 5113 } |
5069 | 5114 |
5070 if(cbp&0x20){ | 5115 if(cbp&0x20){ |
5071 for(chroma_idx=0; chroma_idx<2; chroma_idx++){ | 5116 for(chroma_idx=0; chroma_idx<2; chroma_idx++){ |
5072 for(i4x4=0; i4x4<4; i4x4++){ | 5117 for(i4x4=0; i4x4<4; i4x4++){ |
5073 const int index= 16 + 4*chroma_idx + i4x4; | 5118 const int index= 16 + 4*chroma_idx + i4x4; |
5074 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_qp], 15) < 0){ | 5119 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){ |
5075 return -1; | 5120 return -1; |
5076 } | 5121 } |
5077 } | 5122 } |
5078 } | 5123 } |
5079 }else{ | 5124 }else{ |
5508 ctx += 2; | 5553 ctx += 2; |
5509 | 5554 |
5510 return ctx + 4 * cat; | 5555 return ctx + 4 * cat; |
5511 } | 5556 } |
5512 | 5557 |
5513 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff) { | 5558 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) { |
5514 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride; | 5559 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride; |
5515 static const int significant_coeff_flag_field_offset[2] = { 105, 277 }; | 5560 static const int significant_coeff_flag_field_offset[2] = { 105, 277 }; |
5516 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 }; | 5561 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 }; |
5517 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 }; | 5562 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 }; |
5518 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 }; | 5563 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 }; |
5614 if( get_cabac( &h->cabac, ctx ) == 0 ) { | 5659 if( get_cabac( &h->cabac, ctx ) == 0 ) { |
5615 if( !qmul ) { | 5660 if( !qmul ) { |
5616 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1; | 5661 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1; |
5617 else block[j] = 1; | 5662 else block[j] = 1; |
5618 }else{ | 5663 }else{ |
5619 if( get_cabac_bypass( &h->cabac ) ) block[j] = -qmul[j]; | 5664 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6; |
5620 else block[j] = qmul[j]; | 5665 else block[j] = ( qmul[j] + 32) >> 6; |
5621 } | 5666 } |
5622 | 5667 |
5623 abslevel1++; | 5668 abslevel1++; |
5624 } else { | 5669 } else { |
5625 int coeff_abs = 2; | 5670 int coeff_abs = 2; |
5643 | 5688 |
5644 if( !qmul ) { | 5689 if( !qmul ) { |
5645 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs; | 5690 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs; |
5646 else block[j] = coeff_abs; | 5691 else block[j] = coeff_abs; |
5647 }else{ | 5692 }else{ |
5648 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs * qmul[j]; | 5693 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6; |
5649 else block[j] = coeff_abs * qmul[j]; | 5694 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6; |
5650 } | 5695 } |
5651 | 5696 |
5652 abslevelgt1++; | 5697 abslevelgt1++; |
5653 } | 5698 } |
5654 } | 5699 } |
6076 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0) | 6121 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0) |
6077 return -1; | 6122 return -1; |
6078 if( cbp&15 ) { | 6123 if( cbp&15 ) { |
6079 for( i = 0; i < 16; i++ ) { | 6124 for( i = 0; i < 16; i++ ) { |
6080 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); | 6125 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); |
6081 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 ) | 6126 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ) |
6082 return -1; | 6127 return -1; |
6083 } | 6128 } |
6084 } else { | 6129 } else { |
6085 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); | 6130 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); |
6086 } | 6131 } |
6088 int i8x8, i4x4; | 6133 int i8x8, i4x4; |
6089 for( i8x8 = 0; i8x8 < 4; i8x8++ ) { | 6134 for( i8x8 = 0; i8x8 < 4; i8x8++ ) { |
6090 if( cbp & (1<<i8x8) ) { | 6135 if( cbp & (1<<i8x8) ) { |
6091 if( IS_8x8DCT(mb_type) ) { | 6136 if( IS_8x8DCT(mb_type) ) { |
6092 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, | 6137 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, |
6093 zigzag_scan8x8, h->dequant8_coeff[s->qscale], 64) < 0 ) | 6138 zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 ) |
6094 return -1; | 6139 return -1; |
6095 if(s->qscale < 12){ | |
6096 int i; | |
6097 for(i=0; i<64; i++) | |
6098 h->mb[64*i8x8+i] = (h->mb[64*i8x8+i] + 2) >> 2; | |
6099 } | |
6100 } else | 6140 } else |
6101 for( i4x4 = 0; i4x4 < 4; i4x4++ ) { | 6141 for( i4x4 = 0; i4x4 < 4; i4x4++ ) { |
6102 const int index = 4*i8x8 + i4x4; | 6142 const int index = 4*i8x8 + i4x4; |
6103 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); | 6143 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); |
6104 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[s->qscale], 16) < 0 ) | 6144 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 ) |
6105 return -1; | 6145 return -1; |
6106 } | 6146 } |
6107 } else { | 6147 } else { |
6108 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; | 6148 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; |
6109 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; | 6149 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; |
6124 int c, i; | 6164 int c, i; |
6125 for( c = 0; c < 2; c++ ) { | 6165 for( c = 0; c < 2; c++ ) { |
6126 for( i = 0; i < 4; i++ ) { | 6166 for( i = 0; i < 4; i++ ) { |
6127 const int index = 16 + 4 * c + i; | 6167 const int index = 16 + 4 * c + i; |
6128 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); | 6168 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); |
6129 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[h->chroma_qp], 15) < 0) | 6169 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0) |
6130 return -1; | 6170 return -1; |
6131 } | 6171 } |
6132 } | 6172 } |
6133 } else { | 6173 } else { |
6134 uint8_t * const nnz= &h->non_zero_count_cache[0]; | 6174 uint8_t * const nnz= &h->non_zero_count_cache[0]; |
7004 } | 7044 } |
7005 | 7045 |
7006 return 0; | 7046 return 0; |
7007 } | 7047 } |
7008 | 7048 |
7049 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size, const uint8_t *default_list){ | |
7050 MpegEncContext * const s = &h->s; | |
7051 int i, last = 8, next = 8; | |
7052 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8; | |
7053 if(!get_bits1(&s->gb)) /* matrix not written, we use the default one */ | |
7054 memcpy(factors, default_list, size*sizeof(uint8_t)); | |
7055 else | |
7056 for(i=0;i<size;i++){ | |
7057 if(next) | |
7058 next = (last + get_se_golomb(&s->gb)) & 0xff; | |
7059 if(!i && !next){ /* matrix not written, we use the default one */ | |
7060 memcpy(factors, default_list, size*sizeof(uint8_t)); | |
7061 break; | |
7062 } | |
7063 last = factors[scan[i]] = next ? next : last; | |
7064 } | |
7065 } | |
7066 | |
7067 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps, | |
7068 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){ | |
7069 MpegEncContext * const s = &h->s; | |
7070 int fallback_sps = !is_sps && sps->scaling_matrix_present; | |
7071 const uint8_t *fallback[4] = { | |
7072 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0], | |
7073 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1], | |
7074 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0], | |
7075 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1] | |
7076 }; | |
7077 if(get_bits1(&s->gb)){ | |
7078 sps->scaling_matrix_present |= is_sps; | |
7079 decode_scaling_list(h,scaling_matrix4[0],16,fallback[0]); // Intra, Y | |
7080 decode_scaling_list(h,scaling_matrix4[1],16,scaling_matrix4[0]); // Intra, Cr | |
7081 decode_scaling_list(h,scaling_matrix4[2],16,scaling_matrix4[1]); // Intra, Cb | |
7082 decode_scaling_list(h,scaling_matrix4[3],16,fallback[1]); // Inter, Y | |
7083 decode_scaling_list(h,scaling_matrix4[4],16,scaling_matrix4[3]); // Inter, Cr | |
7084 decode_scaling_list(h,scaling_matrix4[5],16,scaling_matrix4[4]); // Inter, Cb | |
7085 if(is_sps || pps->transform_8x8_mode){ | |
7086 decode_scaling_list(h,scaling_matrix8[0],64,fallback[2]); // Intra, Y | |
7087 decode_scaling_list(h,scaling_matrix8[1],64,fallback[3]); // Inter, Y | |
7088 } | |
7089 } else if(fallback_sps) { | |
7090 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t)); | |
7091 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t)); | |
7092 } | |
7093 } | |
7094 | |
7009 static inline int decode_seq_parameter_set(H264Context *h){ | 7095 static inline int decode_seq_parameter_set(H264Context *h){ |
7010 MpegEncContext * const s = &h->s; | 7096 MpegEncContext * const s = &h->s; |
7011 int profile_idc, level_idc; | 7097 int profile_idc, level_idc; |
7012 int sps_id, i; | 7098 int sps_id, i; |
7013 SPS *sps; | 7099 SPS *sps; |
7029 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc | 7115 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc |
7030 get_bits1(&s->gb); //residual_color_transform_flag | 7116 get_bits1(&s->gb); //residual_color_transform_flag |
7031 get_ue_golomb(&s->gb); //bit_depth_luma_minus8 | 7117 get_ue_golomb(&s->gb); //bit_depth_luma_minus8 |
7032 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8 | 7118 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8 |
7033 sps->transform_bypass = get_bits1(&s->gb); | 7119 sps->transform_bypass = get_bits1(&s->gb); |
7034 if(get_bits1(&s->gb)){ //seq_scaling_matrix_present_flag | 7120 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8); |
7035 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n"); | 7121 }else |
7036 return -1; | 7122 sps->scaling_matrix_present = 0; |
7037 } | |
7038 } | |
7039 | 7123 |
7040 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4; | 7124 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4; |
7041 sps->poc_type= get_ue_golomb(&s->gb); | 7125 sps->poc_type= get_ue_golomb(&s->gb); |
7042 | 7126 |
7043 if(sps->poc_type == 0){ //FIXME #define | 7127 if(sps->poc_type == 0){ //FIXME #define |
7170 pps->init_qs= get_se_golomb(&s->gb) + 26; | 7254 pps->init_qs= get_se_golomb(&s->gb) + 26; |
7171 pps->chroma_qp_index_offset= get_se_golomb(&s->gb); | 7255 pps->chroma_qp_index_offset= get_se_golomb(&s->gb); |
7172 pps->deblocking_filter_parameters_present= get_bits1(&s->gb); | 7256 pps->deblocking_filter_parameters_present= get_bits1(&s->gb); |
7173 pps->constrained_intra_pred= get_bits1(&s->gb); | 7257 pps->constrained_intra_pred= get_bits1(&s->gb); |
7174 pps->redundant_pic_cnt_present = get_bits1(&s->gb); | 7258 pps->redundant_pic_cnt_present = get_bits1(&s->gb); |
7259 | |
7260 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t)); | |
7261 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t)); | |
7175 | 7262 |
7176 if(get_bits_count(&s->gb) < bit_length){ | 7263 if(get_bits_count(&s->gb) < bit_length){ |
7177 pps->transform_8x8_mode= get_bits1(&s->gb); | 7264 pps->transform_8x8_mode= get_bits1(&s->gb); |
7178 if(get_bits1(&s->gb)){ //pic_scaling_matrix_present_flag | 7265 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8); |
7179 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n"); | |
7180 return -1; | |
7181 } | |
7182 get_se_golomb(&s->gb); //second_chroma_qp_index_offset | 7266 get_se_golomb(&s->gb); //second_chroma_qp_index_offset |
7183 } | 7267 } |
7184 | 7268 |
7185 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ | 7269 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ |
7186 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n", | 7270 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n", |