comparison h264.c @ 3174:b65cbae9d940 libavcodec

h264_idct8_add_mmx
author lorenm
date Tue, 07 Mar 2006 22:45:56 +0000
parents e3e94632c6e9
children 8d1b2cc2a75b
comparison
equal deleted inserted replaced
3173:9a2cc7b0fbdb 3174:b65cbae9d940
356 uint8_t *direct_table; 356 uint8_t *direct_table;
357 uint8_t direct_cache[5*8]; 357 uint8_t direct_cache[5*8];
358 358
359 uint8_t zigzag_scan[16]; 359 uint8_t zigzag_scan[16];
360 uint8_t field_scan[16]; 360 uint8_t field_scan[16];
361 uint8_t zigzag_scan8x8[64];
362 uint8_t zigzag_scan8x8_cavlc[64];
361 const uint8_t *zigzag_scan_q0; 363 const uint8_t *zigzag_scan_q0;
362 const uint8_t *field_scan_q0; 364 const uint8_t *field_scan_q0;
365 const uint8_t *zigzag_scan8x8_q0;
366 const uint8_t *zigzag_scan8x8_cavlc_q0;
363 367
364 int x264_build; 368 int x264_build;
365 }H264Context; 369 }H264Context;
366 370
367 static VLC coeff_token_vlc[4]; 371 static VLC coeff_token_vlc[4];
2951 av_freep(&h->s.obmc_scratchpad); 2955 av_freep(&h->s.obmc_scratchpad);
2952 } 2956 }
2953 2957
2954 static void init_dequant8_coeff_table(H264Context *h){ 2958 static void init_dequant8_coeff_table(H264Context *h){
2955 int i,q,x; 2959 int i,q,x;
2960 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2956 h->dequant8_coeff[0] = h->dequant8_buffer[0]; 2961 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2957 h->dequant8_coeff[1] = h->dequant8_buffer[1]; 2962 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2958 2963
2959 for(i=0; i<2; i++ ){ 2964 for(i=0; i<2; i++ ){
2960 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ 2965 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2964 2969
2965 for(q=0; q<52; q++){ 2970 for(q=0; q<52; q++){
2966 int shift = div6[q]; 2971 int shift = div6[q];
2967 int idx = rem6[q]; 2972 int idx = rem6[q];
2968 for(x=0; x<64; x++) 2973 for(x=0; x<64; x++)
2969 h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][ 2974 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2970 dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift; 2975 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2976 h->pps.scaling_matrix8[i][x]) << shift;
2971 } 2977 }
2972 } 2978 }
2973 } 2979 }
2974 2980
2975 static void init_dequant4_coeff_table(H264Context *h){ 2981 static void init_dequant4_coeff_table(H264Context *h){
4315 int i; 4321 int i;
4316 for(i=0; i<16; i++){ 4322 for(i=0; i<16; i++){
4317 #define T(x) (x>>2) | ((x<<2) & 0xF) 4323 #define T(x) (x>>2) | ((x<<2) & 0xF)
4318 h->zigzag_scan[i] = T(zigzag_scan[i]); 4324 h->zigzag_scan[i] = T(zigzag_scan[i]);
4319 h-> field_scan[i] = T( field_scan[i]); 4325 h-> field_scan[i] = T( field_scan[i]);
4326 #undef T
4327 }
4328 }
4329 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4330 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4331 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4332 }else{
4333 int i;
4334 for(i=0; i<64; i++){
4335 #define T(x) (x>>3) | ((x&7)<<3)
4336 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4337 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4338 #undef T
4320 } 4339 }
4321 } 4340 }
4322 if(h->sps.transform_bypass){ //FIXME same ugly 4341 if(h->sps.transform_bypass){ //FIXME same ugly
4323 h->zigzag_scan_q0 = zigzag_scan; 4342 h->zigzag_scan_q0 = zigzag_scan;
4324 h->field_scan_q0 = field_scan; 4343 h->field_scan_q0 = field_scan;
4344 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4345 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4325 }else{ 4346 }else{
4326 h->zigzag_scan_q0 = h->zigzag_scan; 4347 h->zigzag_scan_q0 = h->zigzag_scan;
4327 h->field_scan_q0 = h->field_scan; 4348 h->field_scan_q0 = h->field_scan;
4349 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4350 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4328 } 4351 }
4329 4352
4330 alloc_tables(h); 4353 alloc_tables(h);
4331 4354
4332 s->avctx->width = s->width; 4355 s->avctx->width = s->width;
5099 5122
5100 if(cbp || IS_INTRA16x16(mb_type)){ 5123 if(cbp || IS_INTRA16x16(mb_type)){
5101 int i8x8, i4x4, chroma_idx; 5124 int i8x8, i4x4, chroma_idx;
5102 int chroma_qp, dquant; 5125 int chroma_qp, dquant;
5103 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; 5126 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5104 const uint8_t *scan, *dc_scan; 5127 const uint8_t *scan, *scan8x8, *dc_scan;
5105 5128
5106 // fill_non_zero_count_cache(h); 5129 // fill_non_zero_count_cache(h);
5107 5130
5108 if(IS_INTERLACED(mb_type)){ 5131 if(IS_INTERLACED(mb_type)){
5109 scan= s->qscale ? h->field_scan : h->field_scan_q0; 5132 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5110 dc_scan= luma_dc_field_scan; 5133 dc_scan= luma_dc_field_scan;
5111 }else{ 5134 }else{
5112 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; 5135 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5113 dc_scan= luma_dc_zigzag_scan; 5136 dc_scan= luma_dc_zigzag_scan;
5114 } 5137 }
5138 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5115 5139
5116 dquant= get_se_golomb(&s->gb); 5140 dquant= get_se_golomb(&s->gb);
5117 5141
5118 if( dquant > 25 || dquant < -26 ){ 5142 if( dquant > 25 || dquant < -26 ){
5119 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y); 5143 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5151 if(cbp & (1<<i8x8)){ 5175 if(cbp & (1<<i8x8)){
5152 if(IS_8x8DCT(mb_type)){ 5176 if(IS_8x8DCT(mb_type)){
5153 DCTELEM *buf = &h->mb[64*i8x8]; 5177 DCTELEM *buf = &h->mb[64*i8x8];
5154 uint8_t *nnz; 5178 uint8_t *nnz;
5155 for(i4x4=0; i4x4<4; i4x4++){ 5179 for(i4x4=0; i4x4<4; i4x4++){
5156 if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4, 5180 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5157 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) 5181 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5158 return -1; 5182 return -1;
5159 } 5183 }
5160 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; 5184 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5161 nnz[0] += nnz[1] + nnz[8] + nnz[9]; 5185 nnz[0] += nnz[1] + nnz[8] + nnz[9];
6142 mb_type |= MB_TYPE_8x8DCT; 6166 mb_type |= MB_TYPE_8x8DCT;
6143 } 6167 }
6144 s->current_picture.mb_type[mb_xy]= mb_type; 6168 s->current_picture.mb_type[mb_xy]= mb_type;
6145 6169
6146 if( cbp || IS_INTRA16x16( mb_type ) ) { 6170 if( cbp || IS_INTRA16x16( mb_type ) ) {
6147 const uint8_t *scan, *dc_scan; 6171 const uint8_t *scan, *scan8x8, *dc_scan;
6148 int dqp; 6172 int dqp;
6149 6173
6150 if(IS_INTERLACED(mb_type)){ 6174 if(IS_INTERLACED(mb_type)){
6151 scan= s->qscale ? h->field_scan : h->field_scan_q0; 6175 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6152 dc_scan= luma_dc_field_scan; 6176 dc_scan= luma_dc_field_scan;
6153 }else{ 6177 }else{
6154 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; 6178 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6155 dc_scan= luma_dc_zigzag_scan; 6179 dc_scan= luma_dc_zigzag_scan;
6156 } 6180 }
6181 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6157 6182
6158 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h ); 6183 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6159 if( dqp == INT_MIN ){ 6184 if( dqp == INT_MIN ){
6160 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y); 6185 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6161 return -1; 6186 return -1;
6185 int i8x8, i4x4; 6210 int i8x8, i4x4;
6186 for( i8x8 = 0; i8x8 < 4; i8x8++ ) { 6211 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6187 if( cbp & (1<<i8x8) ) { 6212 if( cbp & (1<<i8x8) ) {
6188 if( IS_8x8DCT(mb_type) ) { 6213 if( IS_8x8DCT(mb_type) ) {
6189 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, 6214 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6190 zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 ) 6215 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6191 return -1; 6216 return -1;
6192 } else 6217 } else
6193 for( i4x4 = 0; i4x4 < 4; i4x4++ ) { 6218 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6194 const int index = 4*i8x8 + i4x4; 6219 const int index = 4*i8x8 + i4x4;
6195 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); 6220 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );