comparison h264.c @ 2919:71ca5ed04789 libavcodec

decode custom quant matrices. based on a patch by anonymous, with optimizations by me.
author lorenm
date Tue, 25 Oct 2005 01:17:20 +0000
parents 95f469274a1d
children 1443b4d3f4ab
comparison
equal deleted inserted replaced
2918:13dcd22f0816 2919:71ca5ed04789
88 uint32_t time_scale; 88 uint32_t time_scale;
89 int fixed_frame_rate_flag; 89 int fixed_frame_rate_flag;
90 short offset_for_ref_frame[256]; //FIXME dyn aloc? 90 short offset_for_ref_frame[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag; 91 int bitstream_restriction_flag;
92 int num_reorder_frames; 92 int num_reorder_frames;
93 int scaling_matrix_present;
94 uint8_t scaling_matrix4[6][16];
95 uint8_t scaling_matrix8[2][64];
93 }SPS; 96 }SPS;
94 97
95 /** 98 /**
96 * Picture parameter set 99 * Picture parameter set
97 */ 100 */
109 int chroma_qp_index_offset; 112 int chroma_qp_index_offset;
110 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag 113 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
111 int constrained_intra_pred; ///< constrained_intra_pred_flag 114 int constrained_intra_pred; ///< constrained_intra_pred_flag
112 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag 115 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
113 int transform_8x8_mode; ///< transform_8x8_mode_flag 116 int transform_8x8_mode; ///< transform_8x8_mode_flag
117 uint8_t scaling_matrix4[6][16];
118 uint8_t scaling_matrix8[2][64];
114 }PPS; 119 }PPS;
115 120
116 /** 121 /**
117 * Memory management control operation opcode. 122 * Memory management control operation opcode.
118 */ 123 */
235 /** 240 /**
236 * current pps 241 * current pps
237 */ 242 */
238 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? 243 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
239 244
240 uint16_t (*dequant4_coeff)[16]; // FIXME quant matrices should be per SPS or PPS 245 uint32_t dequant4_buffer[6][52][16];
241 uint16_t (*dequant8_coeff)[64]; 246 uint32_t dequant8_buffer[2][52][64];
247 uint32_t (*dequant4_coeff[6])[16];
248 uint32_t (*dequant8_coeff[2])[64];
249 int dequant_coeff_pps; ///< reinit tables when pps changes
242 250
243 int slice_num; 251 int slice_num;
244 uint8_t *slice_table_base; 252 uint8_t *slice_table_base;
245 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1 253 uint8_t *slice_table; ///< slice_table_base + mb_stride + 1
246 int slice_type; 254 int slice_type;
1596 1604
1597 /** 1605 /**
1598 * idct tranforms the 16 dc values and dequantize them. 1606 * idct tranforms the 16 dc values and dequantize them.
1599 * @param qp quantization parameter 1607 * @param qp quantization parameter
1600 */ 1608 */
1601 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){ 1609 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1602 const int qmul= dequant_coeff[qp][0];
1603 #define stride 16 1610 #define stride 16
1604 int i; 1611 int i;
1605 int temp[16]; //FIXME check if this is a good idea 1612 int temp[16]; //FIXME check if this is a good idea
1606 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; 1613 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1607 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; 1614 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1626 const int z0= temp[4*0+i] + temp[4*2+i]; 1633 const int z0= temp[4*0+i] + temp[4*2+i];
1627 const int z1= temp[4*0+i] - temp[4*2+i]; 1634 const int z1= temp[4*0+i] - temp[4*2+i];
1628 const int z2= temp[4*1+i] - temp[4*3+i]; 1635 const int z2= temp[4*1+i] - temp[4*3+i];
1629 const int z3= temp[4*1+i] + temp[4*3+i]; 1636 const int z3= temp[4*1+i] + temp[4*3+i];
1630 1637
1631 block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual 1638 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1632 block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2; 1639 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1633 block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2; 1640 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1634 block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2; 1641 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1635 } 1642 }
1636 } 1643 }
1637 1644
1638 #if 0 1645 #if 0
1639 /** 1646 /**
1676 #endif 1683 #endif
1677 1684
1678 #undef xStride 1685 #undef xStride
1679 #undef stride 1686 #undef stride
1680 1687
1681 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){ 1688 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1682 const int qmul= dequant_coeff[qp][0];
1683 const int stride= 16*2; 1689 const int stride= 16*2;
1684 const int xStride= 16; 1690 const int xStride= 16;
1685 int a,b,c,d,e; 1691 int a,b,c,d,e;
1686 1692
1687 a= block[stride*0 + xStride*0]; 1693 a= block[stride*0 + xStride*0];
1692 e= a-b; 1698 e= a-b;
1693 a= a+b; 1699 a= a+b;
1694 b= c-d; 1700 b= c-d;
1695 c= c+d; 1701 c= c+d;
1696 1702
1697 block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1; 1703 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1698 block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1; 1704 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1699 block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1; 1705 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1700 block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1; 1706 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1701 } 1707 }
1702 1708
1703 #if 0 1709 #if 0
1704 static void chroma_dc_dct_c(DCTELEM *block){ 1710 static void chroma_dc_dct_c(DCTELEM *block){
1705 const int stride= 16*2; 1711 const int stride= 16*2;
2919 h->slice_table= NULL; 2925 h->slice_table= NULL;
2920 2926
2921 av_freep(&h->mb2b_xy); 2927 av_freep(&h->mb2b_xy);
2922 av_freep(&h->mb2b8_xy); 2928 av_freep(&h->mb2b8_xy);
2923 2929
2924 av_freep(&h->dequant4_coeff);
2925 av_freep(&h->dequant8_coeff);
2926
2927 av_freep(&h->s.obmc_scratchpad); 2930 av_freep(&h->s.obmc_scratchpad);
2928 } 2931 }
2932
2933 static void init_dequant8_coeff_table(H264Context *h){
2934 int i,q,x;
2935 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2936 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2937
2938 for(i=0; i<2; i++ ){
2939 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2940 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2941 break;
2942 }
2943
2944 for(q=0; q<52; q++){
2945 int shift = div6[q];
2946 int idx = rem6[q];
2947 for(x=0; x<64; x++)
2948 h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][
2949 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift;
2950 }
2951 }
2952 }
2953
2954 static void init_dequant4_coeff_table(H264Context *h){
2955 int i,j,q,x;
2956 for(i=0; i<6; i++ ){
2957 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2958 for(j=0; j<i; j++){
2959 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2960 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2961 break;
2962 }
2963 }
2964 if(j<i)
2965 continue;
2966
2967 for(q=0; q<52; q++){
2968 int shift = div6[q] + 2;
2969 int idx = rem6[q];
2970 for(x=0; x<16; x++)
2971 h->dequant4_coeff[i][q][x] = ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2972 h->pps.scaling_matrix4[i][x]) << shift;
2973 }
2974 }
2975 }
2976
2977 static void init_dequant_tables(H264Context *h){
2978 int i,x;
2979 init_dequant4_coeff_table(h);
2980 if(h->pps.transform_8x8_mode)
2981 init_dequant8_coeff_table(h);
2982 if(h->sps.transform_bypass){
2983 for(i=0; i<6; i++)
2984 for(x=0; x<16; x++)
2985 h->dequant4_coeff[i][0][x] = 1<<6;
2986 if(h->pps.transform_8x8_mode)
2987 for(i=0; i<2; i++)
2988 for(x=0; x<64; x++)
2989 h->dequant8_coeff[i][0][x] = 1<<6;
2990 }
2991 }
2992
2929 2993
2930 /** 2994 /**
2931 * allocates tables. 2995 * allocates tables.
2932 * needs width/height 2996 * needs width/height
2933 */ 2997 */
2934 static int alloc_tables(H264Context *h){ 2998 static int alloc_tables(H264Context *h){
2935 MpegEncContext * const s = &h->s; 2999 MpegEncContext * const s = &h->s;
2936 const int big_mb_num= s->mb_stride * (s->mb_height+1); 3000 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2937 int x,y,q; 3001 int x,y;
2938 3002
2939 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) 3003 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2940 3004
2941 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) 3005 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2942 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t)) 3006 CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t))
2965 h->mb2b_xy [mb_xy]= b_xy; 3029 h->mb2b_xy [mb_xy]= b_xy;
2966 h->mb2b8_xy[mb_xy]= b8_xy; 3030 h->mb2b8_xy[mb_xy]= b8_xy;
2967 } 3031 }
2968 } 3032 }
2969 3033
2970 CHECKED_ALLOCZ(h->dequant4_coeff, 52*16 * sizeof(uint16_t));
2971 CHECKED_ALLOCZ(h->dequant8_coeff, 52*64 * sizeof(uint16_t));
2972 memcpy(h->dequant4_coeff, dequant_coeff, 52*16 * sizeof(uint16_t));
2973 for(q=0; q<52; q++){
2974 int shift = div6[q];
2975 int idx = rem6[q];
2976 if(shift >= 2) // qp<12 are shifted during dequant
2977 shift -= 2;
2978 for(x=0; x<64; x++)
2979 h->dequant8_coeff[q][x] = dequant8_coeff_init[idx][
2980 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] << shift;
2981 }
2982 if(h->sps.transform_bypass){
2983 for(x=0; x<16; x++)
2984 h->dequant4_coeff[0][x] = 1;
2985 for(x=0; x<64; x++)
2986 h->dequant8_coeff[0][x] = 1<<2;
2987 }
2988
2989 s->obmc_scratchpad = NULL; 3034 s->obmc_scratchpad = NULL;
2990 3035
2991 return 0; 3036 return 0;
2992 fail: 3037 fail:
2993 free_tables(h); 3038 free_tables(h);
3001 s->height = s->avctx->height; 3046 s->height = s->avctx->height;
3002 s->codec_id= s->avctx->codec->id; 3047 s->codec_id= s->avctx->codec->id;
3003 3048
3004 init_pred_ptrs(h); 3049 init_pred_ptrs(h);
3005 3050
3051 h->dequant_coeff_pps= -1;
3006 s->unrestricted_mv=1; 3052 s->unrestricted_mv=1;
3007 s->decode=1; //FIXME 3053 s->decode=1; //FIXME
3008 } 3054 }
3009 3055
3010 static int decode_init(AVCodecContext *avctx){ 3056 static int decode_init(AVCodecContext *avctx){
3347 } 3393 }
3348 }else{ 3394 }else{
3349 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); 3395 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3350 if(s->codec_id == CODEC_ID_H264){ 3396 if(s->codec_id == CODEC_ID_H264){
3351 if(!transform_bypass) 3397 if(!transform_bypass)
3352 h264_luma_dc_dequant_idct_c(h->mb, s->qscale); 3398 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3353 }else 3399 }else
3354 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); 3400 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3355 } 3401 }
3356 if(h->deblocking_filter) { 3402 if(h->deblocking_filter) {
3357 if (h->mb_aff_frame) { 3403 if (h->mb_aff_frame) {
3395 } 3441 }
3396 3442
3397 if(!(s->flags&CODEC_FLAG_GRAY)){ 3443 if(!(s->flags&CODEC_FLAG_GRAY)){
3398 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add; 3444 idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add;
3399 if(!transform_bypass){ 3445 if(!transform_bypass){
3400 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp); 3446 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3401 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp); 3447 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3402 } 3448 }
3403 if(s->codec_id == CODEC_ID_H264){ 3449 if(s->codec_id == CODEC_ID_H264){
3404 for(i=16; i<16+4; i++){ 3450 for(i=16; i<16+4; i++){
3405 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ 3451 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3406 uint8_t * const ptr= dest_cb + block_offset[i]; 3452 uint8_t * const ptr= dest_cb + block_offset[i];
4176 h->sps= h->sps_buffer[ h->pps.sps_id ]; 4222 h->sps= h->sps_buffer[ h->pps.sps_id ];
4177 if(h->sps.log2_max_frame_num == 0){ 4223 if(h->sps.log2_max_frame_num == 0){
4178 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n"); 4224 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4179 return -1; 4225 return -1;
4180 } 4226 }
4227
4228 if(h->dequant_coeff_pps != pps_id){
4229 h->dequant_coeff_pps = pps_id;
4230 init_dequant_tables(h);
4231 }
4181 4232
4182 s->mb_width= h->sps.mb_width; 4233 s->mb_width= h->sps.mb_width;
4183 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); 4234 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4184 4235
4185 h->b_stride= s->mb_width*4 + 1; 4236 h->b_stride= s->mb_width*4 + 1;
4432 * @param n block index 4483 * @param n block index
4433 * @param scantable scantable 4484 * @param scantable scantable
4434 * @param max_coeff number of coefficients in the block 4485 * @param max_coeff number of coefficients in the block
4435 * @return <0 if an error occured 4486 * @return <0 if an error occured
4436 */ 4487 */
4437 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){ 4488 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4438 MpegEncContext * const s = &h->s; 4489 MpegEncContext * const s = &h->s;
4439 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}; 4490 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4440 int level[16]; 4491 int level[16];
4441 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before; 4492 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4442 4493
4549 j= scantable[ coeff_num ]; 4600 j= scantable[ coeff_num ];
4550 4601
4551 block[j]= level[i]; 4602 block[j]= level[i];
4552 } 4603 }
4553 }else{ 4604 }else{
4554 block[j] = level[0] * qmul[j]; 4605 block[j] = (level[0] * qmul[j] + 32)>>6;
4555 for(i=1;i<total_coeff;i++) { 4606 for(i=1;i<total_coeff;i++) {
4556 if(zeros_left <= 0) 4607 if(zeros_left <= 0)
4557 run_before = 0; 4608 run_before = 0;
4558 else if(zeros_left < 7){ 4609 else if(zeros_left < 7){
4559 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1); 4610 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4562 } 4613 }
4563 zeros_left -= run_before; 4614 zeros_left -= run_before;
4564 coeff_num -= 1 + run_before; 4615 coeff_num -= 1 + run_before;
4565 j= scantable[ coeff_num ]; 4616 j= scantable[ coeff_num ];
4566 4617
4567 block[j]= level[i] * qmul[j]; 4618 block[j]= (level[i] * qmul[j] + 32)>>6;
4568 // printf("%d %d ", block[j], qmul[j]);
4569 } 4619 }
4570 } 4620 }
4571 4621
4572 if(zeros_left<0){ 4622 if(zeros_left<0){
4573 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y); 4623 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5006 else s->qscale-= 52; 5056 else s->qscale-= 52;
5007 } 5057 }
5008 5058
5009 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale); 5059 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5010 if(IS_INTRA16x16(mb_type)){ 5060 if(IS_INTRA16x16(mb_type)){
5011 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0){ 5061 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5012 return -1; //FIXME continue if partitioned and other return -1 too 5062 return -1; //FIXME continue if partitioned and other return -1 too
5013 } 5063 }
5014 5064
5015 assert((cbp&15) == 0 || (cbp&15) == 15); 5065 assert((cbp&15) == 0 || (cbp&15) == 15);
5016 5066
5017 if(cbp&15){ 5067 if(cbp&15){
5018 for(i8x8=0; i8x8<4; i8x8++){ 5068 for(i8x8=0; i8x8<4; i8x8++){
5019 for(i4x4=0; i4x4<4; i4x4++){ 5069 for(i4x4=0; i4x4<4; i4x4++){
5020 const int index= i4x4 + 4*i8x8; 5070 const int index= i4x4 + 4*i8x8;
5021 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 ){ 5071 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5022 return -1; 5072 return -1;
5023 } 5073 }
5024 } 5074 }
5025 } 5075 }
5026 }else{ 5076 }else{
5032 if(IS_8x8DCT(mb_type)){ 5082 if(IS_8x8DCT(mb_type)){
5033 DCTELEM *buf = &h->mb[64*i8x8]; 5083 DCTELEM *buf = &h->mb[64*i8x8];
5034 uint8_t *nnz; 5084 uint8_t *nnz;
5035 for(i4x4=0; i4x4<4; i4x4++){ 5085 for(i4x4=0; i4x4<4; i4x4++){
5036 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4, 5086 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
5037 h->dequant8_coeff[s->qscale], 16) <0 ) 5087 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5038 return -1; 5088 return -1;
5039 }
5040 if(s->qscale < 12){
5041 int i;
5042 for(i=0; i<64; i++)
5043 buf[i] = (buf[i] + 2) >> 2;
5044 } 5089 }
5045 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; 5090 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5046 nnz[0] |= nnz[1] | nnz[8] | nnz[9]; 5091 nnz[0] |= nnz[1] | nnz[8] | nnz[9];
5047 }else{ 5092 }else{
5048 for(i4x4=0; i4x4<4; i4x4++){ 5093 for(i4x4=0; i4x4<4; i4x4++){
5049 const int index= i4x4 + 4*i8x8; 5094 const int index= i4x4 + 4*i8x8;
5050 5095
5051 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[s->qscale], 16) <0 ){ 5096 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5052 return -1; 5097 return -1;
5053 } 5098 }
5054 } 5099 }
5055 } 5100 }
5056 }else{ 5101 }else{
5060 } 5105 }
5061 } 5106 }
5062 5107
5063 if(cbp&0x30){ 5108 if(cbp&0x30){
5064 for(chroma_idx=0; chroma_idx<2; chroma_idx++) 5109 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5065 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, h->dequant4_coeff[chroma_qp], 4) < 0){ 5110 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5066 return -1; 5111 return -1;
5067 } 5112 }
5068 } 5113 }
5069 5114
5070 if(cbp&0x20){ 5115 if(cbp&0x20){
5071 for(chroma_idx=0; chroma_idx<2; chroma_idx++){ 5116 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5072 for(i4x4=0; i4x4<4; i4x4++){ 5117 for(i4x4=0; i4x4<4; i4x4++){
5073 const int index= 16 + 4*chroma_idx + i4x4; 5118 const int index= 16 + 4*chroma_idx + i4x4;
5074 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_qp], 15) < 0){ 5119 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5075 return -1; 5120 return -1;
5076 } 5121 }
5077 } 5122 }
5078 } 5123 }
5079 }else{ 5124 }else{
5508 ctx += 2; 5553 ctx += 2;
5509 5554
5510 return ctx + 4 * cat; 5555 return ctx + 4 * cat;
5511 } 5556 }
5512 5557
5513 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff) { 5558 static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5514 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride; 5559 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5515 static const int significant_coeff_flag_field_offset[2] = { 105, 277 }; 5560 static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5516 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 }; 5561 static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5517 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 }; 5562 static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5518 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 }; 5563 static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5614 if( get_cabac( &h->cabac, ctx ) == 0 ) { 5659 if( get_cabac( &h->cabac, ctx ) == 0 ) {
5615 if( !qmul ) { 5660 if( !qmul ) {
5616 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1; 5661 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
5617 else block[j] = 1; 5662 else block[j] = 1;
5618 }else{ 5663 }else{
5619 if( get_cabac_bypass( &h->cabac ) ) block[j] = -qmul[j]; 5664 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
5620 else block[j] = qmul[j]; 5665 else block[j] = ( qmul[j] + 32) >> 6;
5621 } 5666 }
5622 5667
5623 abslevel1++; 5668 abslevel1++;
5624 } else { 5669 } else {
5625 int coeff_abs = 2; 5670 int coeff_abs = 2;
5643 5688
5644 if( !qmul ) { 5689 if( !qmul ) {
5645 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs; 5690 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
5646 else block[j] = coeff_abs; 5691 else block[j] = coeff_abs;
5647 }else{ 5692 }else{
5648 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs * qmul[j]; 5693 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5649 else block[j] = coeff_abs * qmul[j]; 5694 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5650 } 5695 }
5651 5696
5652 abslevelgt1++; 5697 abslevelgt1++;
5653 } 5698 }
5654 } 5699 }
6076 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0) 6121 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6077 return -1; 6122 return -1;
6078 if( cbp&15 ) { 6123 if( cbp&15 ) {
6079 for( i = 0; i < 16; i++ ) { 6124 for( i = 0; i < 16; i++ ) {
6080 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); 6125 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6081 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 ) 6126 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6082 return -1; 6127 return -1;
6083 } 6128 }
6084 } else { 6129 } else {
6085 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); 6130 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6086 } 6131 }
6088 int i8x8, i4x4; 6133 int i8x8, i4x4;
6089 for( i8x8 = 0; i8x8 < 4; i8x8++ ) { 6134 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6090 if( cbp & (1<<i8x8) ) { 6135 if( cbp & (1<<i8x8) ) {
6091 if( IS_8x8DCT(mb_type) ) { 6136 if( IS_8x8DCT(mb_type) ) {
6092 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, 6137 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6093 zigzag_scan8x8, h->dequant8_coeff[s->qscale], 64) < 0 ) 6138 zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6094 return -1; 6139 return -1;
6095 if(s->qscale < 12){
6096 int i;
6097 for(i=0; i<64; i++)
6098 h->mb[64*i8x8+i] = (h->mb[64*i8x8+i] + 2) >> 2;
6099 }
6100 } else 6140 } else
6101 for( i4x4 = 0; i4x4 < 4; i4x4++ ) { 6141 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6102 const int index = 4*i8x8 + i4x4; 6142 const int index = 4*i8x8 + i4x4;
6103 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); 6143 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6104 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[s->qscale], 16) < 0 ) 6144 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6105 return -1; 6145 return -1;
6106 } 6146 }
6107 } else { 6147 } else {
6108 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; 6148 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6109 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; 6149 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6124 int c, i; 6164 int c, i;
6125 for( c = 0; c < 2; c++ ) { 6165 for( c = 0; c < 2; c++ ) {
6126 for( i = 0; i < 4; i++ ) { 6166 for( i = 0; i < 4; i++ ) {
6127 const int index = 16 + 4 * c + i; 6167 const int index = 16 + 4 * c + i;
6128 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); 6168 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6129 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[h->chroma_qp], 15) < 0) 6169 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6130 return -1; 6170 return -1;
6131 } 6171 }
6132 } 6172 }
6133 } else { 6173 } else {
6134 uint8_t * const nnz= &h->non_zero_count_cache[0]; 6174 uint8_t * const nnz= &h->non_zero_count_cache[0];
7004 } 7044 }
7005 7045
7006 return 0; 7046 return 0;
7007 } 7047 }
7008 7048
7049 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size, const uint8_t *default_list){
7050 MpegEncContext * const s = &h->s;
7051 int i, last = 8, next = 8;
7052 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7053 if(!get_bits1(&s->gb)) /* matrix not written, we use the default one */
7054 memcpy(factors, default_list, size*sizeof(uint8_t));
7055 else
7056 for(i=0;i<size;i++){
7057 if(next)
7058 next = (last + get_se_golomb(&s->gb)) & 0xff;
7059 if(!i && !next){ /* matrix not written, we use the default one */
7060 memcpy(factors, default_list, size*sizeof(uint8_t));
7061 break;
7062 }
7063 last = factors[scan[i]] = next ? next : last;
7064 }
7065 }
7066
7067 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7068 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7069 MpegEncContext * const s = &h->s;
7070 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7071 const uint8_t *fallback[4] = {
7072 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7073 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7074 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7075 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7076 };
7077 if(get_bits1(&s->gb)){
7078 sps->scaling_matrix_present |= is_sps;
7079 decode_scaling_list(h,scaling_matrix4[0],16,fallback[0]); // Intra, Y
7080 decode_scaling_list(h,scaling_matrix4[1],16,scaling_matrix4[0]); // Intra, Cr
7081 decode_scaling_list(h,scaling_matrix4[2],16,scaling_matrix4[1]); // Intra, Cb
7082 decode_scaling_list(h,scaling_matrix4[3],16,fallback[1]); // Inter, Y
7083 decode_scaling_list(h,scaling_matrix4[4],16,scaling_matrix4[3]); // Inter, Cr
7084 decode_scaling_list(h,scaling_matrix4[5],16,scaling_matrix4[4]); // Inter, Cb
7085 if(is_sps || pps->transform_8x8_mode){
7086 decode_scaling_list(h,scaling_matrix8[0],64,fallback[2]); // Intra, Y
7087 decode_scaling_list(h,scaling_matrix8[1],64,fallback[3]); // Inter, Y
7088 }
7089 } else if(fallback_sps) {
7090 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7091 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7092 }
7093 }
7094
7009 static inline int decode_seq_parameter_set(H264Context *h){ 7095 static inline int decode_seq_parameter_set(H264Context *h){
7010 MpegEncContext * const s = &h->s; 7096 MpegEncContext * const s = &h->s;
7011 int profile_idc, level_idc; 7097 int profile_idc, level_idc;
7012 int sps_id, i; 7098 int sps_id, i;
7013 SPS *sps; 7099 SPS *sps;
7029 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc 7115 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7030 get_bits1(&s->gb); //residual_color_transform_flag 7116 get_bits1(&s->gb); //residual_color_transform_flag
7031 get_ue_golomb(&s->gb); //bit_depth_luma_minus8 7117 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7032 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8 7118 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7033 sps->transform_bypass = get_bits1(&s->gb); 7119 sps->transform_bypass = get_bits1(&s->gb);
7034 if(get_bits1(&s->gb)){ //seq_scaling_matrix_present_flag 7120 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7035 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n"); 7121 }else
7036 return -1; 7122 sps->scaling_matrix_present = 0;
7037 }
7038 }
7039 7123
7040 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4; 7124 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7041 sps->poc_type= get_ue_golomb(&s->gb); 7125 sps->poc_type= get_ue_golomb(&s->gb);
7042 7126
7043 if(sps->poc_type == 0){ //FIXME #define 7127 if(sps->poc_type == 0){ //FIXME #define
7170 pps->init_qs= get_se_golomb(&s->gb) + 26; 7254 pps->init_qs= get_se_golomb(&s->gb) + 26;
7171 pps->chroma_qp_index_offset= get_se_golomb(&s->gb); 7255 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7172 pps->deblocking_filter_parameters_present= get_bits1(&s->gb); 7256 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7173 pps->constrained_intra_pred= get_bits1(&s->gb); 7257 pps->constrained_intra_pred= get_bits1(&s->gb);
7174 pps->redundant_pic_cnt_present = get_bits1(&s->gb); 7258 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7259
7260 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7261 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7175 7262
7176 if(get_bits_count(&s->gb) < bit_length){ 7263 if(get_bits_count(&s->gb) < bit_length){
7177 pps->transform_8x8_mode= get_bits1(&s->gb); 7264 pps->transform_8x8_mode= get_bits1(&s->gb);
7178 if(get_bits1(&s->gb)){ //pic_scaling_matrix_present_flag 7265 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7179 av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
7180 return -1;
7181 }
7182 get_se_golomb(&s->gb); //second_chroma_qp_index_offset 7266 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7183 } 7267 }
7184 7268
7185 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ 7269 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7186 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n", 7270 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",