Mercurial > libavcodec.hg
comparison dv.c @ 8709:8754ae06ddcf libavcodec
unrolling encoding loops
author | romansh |
---|---|
date | Sat, 31 Jan 2009 01:46:25 +0000 |
parents | 2ecdf1ad277f |
children | eaae57fe72b2 |
comparison
equal
deleted
inserted
replaced
8708:2ecdf1ad277f | 8709:8754ae06ddcf |
---|---|
996 | 996 |
997 static int dv_encode_video_segment(AVCodecContext *avctx, DVwork_chunk *work_chunk) | 997 static int dv_encode_video_segment(AVCodecContext *avctx, DVwork_chunk *work_chunk) |
998 { | 998 { |
999 DVVideoContext *s = avctx->priv_data; | 999 DVVideoContext *s = avctx->priv_data; |
1000 int mb_index, i, j; | 1000 int mb_index, i, j; |
1001 int mb_x, mb_y, c_offset, linesize; | 1001 int mb_x, mb_y, c_offset, linesize, y_stride; |
1002 uint8_t* y_ptr; | 1002 uint8_t* y_ptr; |
1003 uint8_t* data; | |
1004 uint8_t* dif; | 1003 uint8_t* dif; |
1005 uint8_t scratch[64]; | 1004 uint8_t scratch[64]; |
1006 EncBlockInfo enc_blks[5*DV_MAX_BPM]; | 1005 EncBlockInfo enc_blks[5*DV_MAX_BPM]; |
1007 PutBitContext pbs[5*DV_MAX_BPM]; | 1006 PutBitContext pbs[5*DV_MAX_BPM]; |
1008 PutBitContext* pb; | 1007 PutBitContext* pb; |
1013 | 1012 |
1014 dif = &s->buf[work_chunk->buf_offset*80]; | 1013 dif = &s->buf[work_chunk->buf_offset*80]; |
1015 enc_blk = &enc_blks[0]; | 1014 enc_blk = &enc_blks[0]; |
1016 for (mb_index = 0; mb_index < 5; mb_index++) { | 1015 for (mb_index = 0; mb_index < 5; mb_index++) { |
1017 dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y); | 1016 dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y); |
1017 | |
1018 /* initializing luminance blocks */ | |
1019 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) || | |
1020 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) || | |
1021 (s->sys->height >= 720 && mb_y != 134)) { | |
1022 y_stride = s->picture.linesize[0] << 3; | |
1023 } else { | |
1024 y_stride = 16; | |
1025 } | |
1018 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << 3); | 1026 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << 3); |
1027 linesize = s->picture.linesize[0]; | |
1028 | |
1029 if (s->sys->video_stype == 4) { /* SD 422 */ | |
1030 vs_bit_size += | |
1031 dv_init_enc_block(enc_blk+0, y_ptr , linesize, s, 0) + | |
1032 dv_init_enc_block(enc_blk+1, NULL , linesize, s, 0) + | |
1033 dv_init_enc_block(enc_blk+2, y_ptr + 8 , linesize, s, 0) + | |
1034 dv_init_enc_block(enc_blk+3, NULL , linesize, s, 0); | |
1035 } else { | |
1036 vs_bit_size += | |
1037 dv_init_enc_block(enc_blk+0, y_ptr , linesize, s, 0) + | |
1038 dv_init_enc_block(enc_blk+1, y_ptr + 8 , linesize, s, 0) + | |
1039 dv_init_enc_block(enc_blk+2, y_ptr + y_stride, linesize, s, 0) + | |
1040 dv_init_enc_block(enc_blk+3, y_ptr + 8 + y_stride, linesize, s, 0); | |
1041 } | |
1042 enc_blk += 4; | |
1043 | |
1044 /* initializing chrominance blocks */ | |
1019 c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] + | 1045 c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] + |
1020 (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << 3); | 1046 (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << 3); |
1021 for (j = 0; j < 6; j++) { | 1047 for (j = 2; j; j--) { |
1022 if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */ | 1048 uint8_t *c_ptr = s->picture.data[j] + c_offset; |
1023 if (j == 0 || j == 2) { | 1049 linesize = s->picture.linesize[j]; |
1024 /* Y0 Y1 */ | 1050 y_stride = (mb_y == 134) ? 8 : (s->picture.linesize[j] << 3); |
1025 data = y_ptr + ((j >> 1) * 8); | |
1026 linesize = s->picture.linesize[0]; | |
1027 } else if (j > 3) { | |
1028 /* Cr Cb */ | |
1029 data = s->picture.data[6 - j] + c_offset; | |
1030 linesize = s->picture.linesize[6 - j]; | |
1031 } else { | |
1032 /* j=1 and j=3 are "dummy" blocks, used for AC data only */ | |
1033 data = NULL; | |
1034 linesize = 0; | |
1035 } | |
1036 } else { /* 4:1:1 or 4:2:0 */ | |
1037 if (j < 4) { /* Four Y blocks */ | |
1038 /* NOTE: at end of line, the macroblock is handled as 420 */ | |
1039 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { | |
1040 data = y_ptr + (j * 8); | |
1041 } else { | |
1042 data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]); | |
1043 } | |
1044 linesize = s->picture.linesize[0]; | |
1045 } else { /* Cr and Cb blocks */ | |
1046 /* don't ask Fabrice why they inverted Cb and Cr ! */ | |
1047 data = s->picture.data [6 - j] + c_offset; | |
1048 linesize = s->picture.linesize[6 - j]; | |
1049 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) { | 1051 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) { |
1050 uint8_t* d; | 1052 uint8_t* d; |
1051 uint8_t* b = scratch; | 1053 uint8_t* b = scratch; |
1052 for (i = 0; i < 8; i++) { | 1054 for (i = 0; i < 8; i++) { |
1053 d = data + 8 * linesize; | 1055 d = c_ptr + (linesize << 3); |
1054 b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3]; | 1056 b[0] = c_ptr[0]; b[1] = c_ptr[1]; b[2] = c_ptr[2]; b[3] = c_ptr[3]; |
1055 b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3]; | 1057 b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3]; |
1056 data += linesize; | 1058 c_ptr += linesize; |
1057 b += 8; | 1059 b += 8; |
1058 } | 1060 } |
1059 data = scratch; | 1061 c_ptr = scratch; |
1060 linesize = 8; | 1062 linesize = 8; |
1061 } | 1063 } |
1062 } | 1064 |
1063 } | 1065 vs_bit_size += dv_init_enc_block( enc_blk++, c_ptr , linesize, s, 1); |
1064 | 1066 if (s->sys->bpm == 8) { |
1065 vs_bit_size += dv_init_enc_block(enc_blk, data, linesize, s, j>>2); | 1067 vs_bit_size += dv_init_enc_block(enc_blk++, c_ptr + y_stride, linesize, s, 1); |
1066 | 1068 } |
1067 ++enc_blk; | |
1068 } | 1069 } |
1069 } | 1070 } |
1070 | 1071 |
1071 if (vs_total_ac_bits < vs_bit_size) | 1072 if (vs_total_ac_bits < vs_bit_size) |
1072 dv_guess_qnos(&enc_blks[0], qnosp); | 1073 dv_guess_qnos(&enc_blks[0], qnosp); |