Mercurial > libavcodec.hg
comparison h264.c @ 3174:b65cbae9d940 libavcodec
h264_idct8_add_mmx
author | lorenm |
---|---|
date | Tue, 07 Mar 2006 22:45:56 +0000 |
parents | e3e94632c6e9 |
children | 8d1b2cc2a75b |
comparison
equal
deleted
inserted
replaced
3173:9a2cc7b0fbdb | 3174:b65cbae9d940 |
---|---|
356 uint8_t *direct_table; | 356 uint8_t *direct_table; |
357 uint8_t direct_cache[5*8]; | 357 uint8_t direct_cache[5*8]; |
358 | 358 |
359 uint8_t zigzag_scan[16]; | 359 uint8_t zigzag_scan[16]; |
360 uint8_t field_scan[16]; | 360 uint8_t field_scan[16]; |
361 uint8_t zigzag_scan8x8[64]; | |
362 uint8_t zigzag_scan8x8_cavlc[64]; | |
361 const uint8_t *zigzag_scan_q0; | 363 const uint8_t *zigzag_scan_q0; |
362 const uint8_t *field_scan_q0; | 364 const uint8_t *field_scan_q0; |
365 const uint8_t *zigzag_scan8x8_q0; | |
366 const uint8_t *zigzag_scan8x8_cavlc_q0; | |
363 | 367 |
364 int x264_build; | 368 int x264_build; |
365 }H264Context; | 369 }H264Context; |
366 | 370 |
367 static VLC coeff_token_vlc[4]; | 371 static VLC coeff_token_vlc[4]; |
2951 av_freep(&h->s.obmc_scratchpad); | 2955 av_freep(&h->s.obmc_scratchpad); |
2952 } | 2956 } |
2953 | 2957 |
2954 static void init_dequant8_coeff_table(H264Context *h){ | 2958 static void init_dequant8_coeff_table(H264Context *h){ |
2955 int i,q,x; | 2959 int i,q,x; |
2960 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly | |
2956 h->dequant8_coeff[0] = h->dequant8_buffer[0]; | 2961 h->dequant8_coeff[0] = h->dequant8_buffer[0]; |
2957 h->dequant8_coeff[1] = h->dequant8_buffer[1]; | 2962 h->dequant8_coeff[1] = h->dequant8_buffer[1]; |
2958 | 2963 |
2959 for(i=0; i<2; i++ ){ | 2964 for(i=0; i<2; i++ ){ |
2960 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ | 2965 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ |
2964 | 2969 |
2965 for(q=0; q<52; q++){ | 2970 for(q=0; q<52; q++){ |
2966 int shift = div6[q]; | 2971 int shift = div6[q]; |
2967 int idx = rem6[q]; | 2972 int idx = rem6[q]; |
2968 for(x=0; x<64; x++) | 2973 for(x=0; x<64; x++) |
2969 h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][ | 2974 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = |
2970 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift; | 2975 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * |
2976 h->pps.scaling_matrix8[i][x]) << shift; | |
2971 } | 2977 } |
2972 } | 2978 } |
2973 } | 2979 } |
2974 | 2980 |
2975 static void init_dequant4_coeff_table(H264Context *h){ | 2981 static void init_dequant4_coeff_table(H264Context *h){ |
4315 int i; | 4321 int i; |
4316 for(i=0; i<16; i++){ | 4322 for(i=0; i<16; i++){ |
4317 #define T(x) (x>>2) | ((x<<2) & 0xF) | 4323 #define T(x) (x>>2) | ((x<<2) & 0xF) |
4318 h->zigzag_scan[i] = T(zigzag_scan[i]); | 4324 h->zigzag_scan[i] = T(zigzag_scan[i]); |
4319 h-> field_scan[i] = T( field_scan[i]); | 4325 h-> field_scan[i] = T( field_scan[i]); |
4326 #undef T | |
4327 } | |
4328 } | |
4329 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){ | |
4330 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t)); | |
4331 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t)); | |
4332 }else{ | |
4333 int i; | |
4334 for(i=0; i<64; i++){ | |
4335 #define T(x) (x>>3) | ((x&7)<<3) | |
4336 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]); | |
4337 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); | |
4338 #undef T | |
4320 } | 4339 } |
4321 } | 4340 } |
4322 if(h->sps.transform_bypass){ //FIXME same ugly | 4341 if(h->sps.transform_bypass){ //FIXME same ugly |
4323 h->zigzag_scan_q0 = zigzag_scan; | 4342 h->zigzag_scan_q0 = zigzag_scan; |
4324 h->field_scan_q0 = field_scan; | 4343 h->field_scan_q0 = field_scan; |
4344 h->zigzag_scan8x8_q0 = zigzag_scan8x8; | |
4345 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; | |
4325 }else{ | 4346 }else{ |
4326 h->zigzag_scan_q0 = h->zigzag_scan; | 4347 h->zigzag_scan_q0 = h->zigzag_scan; |
4327 h->field_scan_q0 = h->field_scan; | 4348 h->field_scan_q0 = h->field_scan; |
4349 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; | |
4350 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; | |
4328 } | 4351 } |
4329 | 4352 |
4330 alloc_tables(h); | 4353 alloc_tables(h); |
4331 | 4354 |
4332 s->avctx->width = s->width; | 4355 s->avctx->width = s->width; |
5099 | 5122 |
5100 if(cbp || IS_INTRA16x16(mb_type)){ | 5123 if(cbp || IS_INTRA16x16(mb_type)){ |
5101 int i8x8, i4x4, chroma_idx; | 5124 int i8x8, i4x4, chroma_idx; |
5102 int chroma_qp, dquant; | 5125 int chroma_qp, dquant; |
5103 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; | 5126 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; |
5104 const uint8_t *scan, *dc_scan; | 5127 const uint8_t *scan, *scan8x8, *dc_scan; |
5105 | 5128 |
5106 // fill_non_zero_count_cache(h); | 5129 // fill_non_zero_count_cache(h); |
5107 | 5130 |
5108 if(IS_INTERLACED(mb_type)){ | 5131 if(IS_INTERLACED(mb_type)){ |
5109 scan= s->qscale ? h->field_scan : h->field_scan_q0; | 5132 scan= s->qscale ? h->field_scan : h->field_scan_q0; |
5110 dc_scan= luma_dc_field_scan; | 5133 dc_scan= luma_dc_field_scan; |
5111 }else{ | 5134 }else{ |
5112 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; | 5135 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; |
5113 dc_scan= luma_dc_zigzag_scan; | 5136 dc_scan= luma_dc_zigzag_scan; |
5114 } | 5137 } |
5138 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; | |
5115 | 5139 |
5116 dquant= get_se_golomb(&s->gb); | 5140 dquant= get_se_golomb(&s->gb); |
5117 | 5141 |
5118 if( dquant > 25 || dquant < -26 ){ | 5142 if( dquant > 25 || dquant < -26 ){ |
5119 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y); | 5143 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y); |
5151 if(cbp & (1<<i8x8)){ | 5175 if(cbp & (1<<i8x8)){ |
5152 if(IS_8x8DCT(mb_type)){ | 5176 if(IS_8x8DCT(mb_type)){ |
5153 DCTELEM *buf = &h->mb[64*i8x8]; | 5177 DCTELEM *buf = &h->mb[64*i8x8]; |
5154 uint8_t *nnz; | 5178 uint8_t *nnz; |
5155 for(i4x4=0; i4x4<4; i4x4++){ | 5179 for(i4x4=0; i4x4<4; i4x4++){ |
5156 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4, | 5180 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4, |
5157 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) | 5181 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) |
5158 return -1; | 5182 return -1; |
5159 } | 5183 } |
5160 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; | 5184 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; |
5161 nnz[0] += nnz[1] + nnz[8] + nnz[9]; | 5185 nnz[0] += nnz[1] + nnz[8] + nnz[9]; |
6142 mb_type |= MB_TYPE_8x8DCT; | 6166 mb_type |= MB_TYPE_8x8DCT; |
6143 } | 6167 } |
6144 s->current_picture.mb_type[mb_xy]= mb_type; | 6168 s->current_picture.mb_type[mb_xy]= mb_type; |
6145 | 6169 |
6146 if( cbp || IS_INTRA16x16( mb_type ) ) { | 6170 if( cbp || IS_INTRA16x16( mb_type ) ) { |
6147 const uint8_t *scan, *dc_scan; | 6171 const uint8_t *scan, *scan8x8, *dc_scan; |
6148 int dqp; | 6172 int dqp; |
6149 | 6173 |
6150 if(IS_INTERLACED(mb_type)){ | 6174 if(IS_INTERLACED(mb_type)){ |
6151 scan= s->qscale ? h->field_scan : h->field_scan_q0; | 6175 scan= s->qscale ? h->field_scan : h->field_scan_q0; |
6152 dc_scan= luma_dc_field_scan; | 6176 dc_scan= luma_dc_field_scan; |
6153 }else{ | 6177 }else{ |
6154 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; | 6178 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; |
6155 dc_scan= luma_dc_zigzag_scan; | 6179 dc_scan= luma_dc_zigzag_scan; |
6156 } | 6180 } |
6181 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0; | |
6157 | 6182 |
6158 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h ); | 6183 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h ); |
6159 if( dqp == INT_MIN ){ | 6184 if( dqp == INT_MIN ){ |
6160 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y); | 6185 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y); |
6161 return -1; | 6186 return -1; |
6185 int i8x8, i4x4; | 6210 int i8x8, i4x4; |
6186 for( i8x8 = 0; i8x8 < 4; i8x8++ ) { | 6211 for( i8x8 = 0; i8x8 < 4; i8x8++ ) { |
6187 if( cbp & (1<<i8x8) ) { | 6212 if( cbp & (1<<i8x8) ) { |
6188 if( IS_8x8DCT(mb_type) ) { | 6213 if( IS_8x8DCT(mb_type) ) { |
6189 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, | 6214 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, |
6190 zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 ) | 6215 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 ) |
6191 return -1; | 6216 return -1; |
6192 } else | 6217 } else |
6193 for( i4x4 = 0; i4x4 < 4; i4x4++ ) { | 6218 for( i4x4 = 0; i4x4 < 4; i4x4++ ) { |
6194 const int index = 4*i8x8 + i4x4; | 6219 const int index = 4*i8x8 + i4x4; |
6195 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); | 6220 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); |