comparison dv.c @ 8118:890df98a7848 libavcodec

implementing more efficient (and direct) allocation of work for DV codec workers
author romansh
date Sat, 08 Nov 2008 00:18:00 +0000
parents 728e0e4fcb95
children a9734fe0811e
comparison
equal deleted inserted replaced
8117:a0f9045e0a82 8118:890df98a7848
60 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); 60 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
61 void (*fdct[2])(DCTELEM *block); 61 void (*fdct[2])(DCTELEM *block);
62 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); 62 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
63 } DVVideoContext; 63 } DVVideoContext;
64 64
65 /**
66 * MultiThreading - dv_anchor applies to entire DV codec, not just the avcontext
67 * one element is needed for each video segment in a DV frame
68 * at most there are 4 DIF channels * 12 DIF sequences * 27 video segments (1080i50)
69 */
70 #define DV_ANCHOR_SIZE (4*12*27)
71
72 static void* dv_anchor[DV_ANCHOR_SIZE];
73
74 #define TEX_VLC_BITS 9 65 #define TEX_VLC_BITS 9
75 66
76 #if ENABLE_SMALL 67 #if ENABLE_SMALL
77 #define DV_VLC_MAP_RUN_SIZE 15 68 #define DV_VLC_MAP_RUN_SIZE 15
78 #define DV_VLC_MAP_LEV_SIZE 23 69 #define DV_VLC_MAP_LEV_SIZE 23
86 /* VLC encoding lookup table */ 77 /* VLC encoding lookup table */
87 static struct dv_vlc_pair { 78 static struct dv_vlc_pair {
88 uint32_t vlc; 79 uint32_t vlc;
89 uint8_t size; 80 uint8_t size;
90 } dv_vlc_map[DV_VLC_MAP_RUN_SIZE][DV_VLC_MAP_LEV_SIZE]; 81 } dv_vlc_map[DV_VLC_MAP_RUN_SIZE][DV_VLC_MAP_LEV_SIZE];
82
83 static inline int dv_work_pool_size(const DVprofile *d)
84 {
85 int size = d->n_difchan*d->difseg_size*27;
86 if (DV_PROFILE_IS_1080i50(d))
87 size -= 3*27;
88 if (DV_PROFILE_IS_720p50(d))
89 size -= 4*27;
90 return size;
91 }
92
93 static int dv_init_dynamic_tables(const DVprofile *d)
94 {
95 int j,i,c,s,p;
96
97 if (d->work_chunks[dv_work_pool_size(d)-1])
98 return 0;
99
100 p = i = 0;
101 for (c=0; c<d->n_difchan; c++) {
102 for (s=0; s<d->difseg_size; s++) {
103 p += 6;
104 for (j=0; j<27; j++) {
105 p += !(j%3);
106 if (!(DV_PROFILE_IS_1080i50(d) && c != 0 && s == 11) &&
107 !(DV_PROFILE_IS_720p50(d) && s > 9)) {
108 d->work_chunks[i++] = (void*)(size_t)((p<<18)|(c << 16)|(s << 8)|j);
109 }
110 p += 5;
111 }
112 }
113 }
114 return 0;
115 }
91 116
92 static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm) 117 static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
93 { 118 {
94 int i, q, a; 119 int i, q, a;
95 120
135 uint8_t new_dv_vlc_len[NB_DV_VLC*2]; 160 uint8_t new_dv_vlc_len[NB_DV_VLC*2];
136 uint8_t new_dv_vlc_run[NB_DV_VLC*2]; 161 uint8_t new_dv_vlc_run[NB_DV_VLC*2];
137 int16_t new_dv_vlc_level[NB_DV_VLC*2]; 162 int16_t new_dv_vlc_level[NB_DV_VLC*2];
138 163
139 done = 1; 164 done = 1;
140
141 /* dv_anchor lets each thread know its ID */
142 for (i = 0; i < DV_ANCHOR_SIZE; i++)
143 dv_anchor[i] = (void*)(size_t)i;
144 165
145 /* it's faster to include sign bit in a generic VLC parsing scheme */ 166 /* it's faster to include sign bit in a generic VLC parsing scheme */
146 for (i = 0, j = 0; i < NB_DV_VLC; i++, j++) { 167 for (i = 0, j = 0; i < NB_DV_VLC; i++, j++) {
147 new_dv_vlc_bits[j] = dv_vlc_bits[i]; 168 new_dv_vlc_bits[j] = dv_vlc_bits[i];
148 new_dv_vlc_len[j] = dv_vlc_len[i]; 169 new_dv_vlc_len[j] = dv_vlc_len[i];
358 if (bits_left > 0) { 379 if (bits_left > 0) {
359 put_bits(pb, bits_left, get_bits(gb, bits_left)); 380 put_bits(pb, bits_left, get_bits(gb, bits_left));
360 } 381 }
361 } 382 }
362 383
384 static inline void dv_calculate_mb_xy(DVVideoContext *s, int work_chunk, int m, int *mb_x, int *mb_y)
385 {
386 int i, chan, seg, slot;
387
388 chan = (work_chunk>>16)&0x03;
389 seg = (work_chunk>>8)&0xff;
390 slot = (work_chunk)&0xff;
391
392 i = (chan*s->sys->difseg_size+seg)*27*5 + slot*5 + m;
393 *mb_x = s->sys->video_place[i] & 0xff;
394 *mb_y = s->sys->video_place[i] >> 8;
395
396 /* We work with 720p frames split in half. The odd half-frame (chan==2,3) is displaced :-( */
397 if (s->sys->height == 720 && !(s->buf[1]&0x0C)) {
398 *mb_y -= (*mb_y>17)?18:-72; /* shifting the Y coordinate down by 72/2 macro blocks */
399 }
400 }
401
363 /* mb_x and mb_y are in units of 8 pixels */ 402 /* mb_x and mb_y are in units of 8 pixels */
364 static inline void dv_decode_video_segment(DVVideoContext *s, 403 static inline void dv_decode_video_segment(DVVideoContext *s, int work_chunk)
365 const uint8_t *buf_ptr1,
366 const uint16_t *mb_pos_ptr)
367 { 404 {
368 int quant, dc, dct_mode, class1, j; 405 int quant, dc, dct_mode, class1, j;
369 int mb_index, mb_x, mb_y, v, last_index; 406 int mb_index, mb_x, mb_y, last_index;
370 int y_stride, linesize; 407 int y_stride, linesize;
371 DCTELEM *block, *block1; 408 DCTELEM *block, *block1;
372 int c_offset; 409 int c_offset;
373 uint8_t *y_ptr; 410 uint8_t *y_ptr;
374 const uint8_t *buf_ptr; 411 const uint8_t *buf_ptr;
385 assert((((int)vs_bit_buffer) & 7) == 0); 422 assert((((int)vs_bit_buffer) & 7) == 0);
386 423
387 memset(sblock, 0, sizeof(sblock)); 424 memset(sblock, 0, sizeof(sblock));
388 425
389 /* pass 1 : read DC and AC coefficients in blocks */ 426 /* pass 1 : read DC and AC coefficients in blocks */
390 buf_ptr = buf_ptr1; 427 buf_ptr = &s->buf[(work_chunk>>18)*80];
391 block1 = &sblock[0][0]; 428 block1 = &sblock[0][0];
392 mb1 = mb_data; 429 mb1 = mb_data;
393 init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80); 430 init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80);
394 for (mb_index = 0; mb_index < 5; mb_index++, mb1 += s->sys->bpm, block1 += s->sys->bpm * 64) { 431 for (mb_index = 0; mb_index < 5; mb_index++, mb1 += s->sys->bpm, block1 += s->sys->bpm * 64) {
395 /* skip header */ 432 /* skip header */
488 525
489 /* compute idct and place blocks */ 526 /* compute idct and place blocks */
490 block = &sblock[0][0]; 527 block = &sblock[0][0];
491 mb = mb_data; 528 mb = mb_data;
492 for (mb_index = 0; mb_index < 5; mb_index++) { 529 for (mb_index = 0; mb_index < 5; mb_index++) {
493 v = *mb_pos_ptr++; 530 dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y);
494 mb_x = v & 0xff;
495 mb_y = v >> 8;
496 /* We work with 720p frames split in half. The odd half-frame (chan==2,3) is displaced :-( */
497 if (s->sys->height == 720 && !(s->buf[1] & 0x0C)) {
498 mb_y -= (mb_y > 17) ? 18 : -72; /* shifting the Y coordinate down by 72/2 macroblocks */
499 }
500 531
501 /* idct_put'ting luminance */ 532 /* idct_put'ting luminance */
502 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) || 533 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) ||
503 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) || 534 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) ||
504 (s->sys->height >= 720 && mb_y != 134)) { 535 (s->sys->height >= 720 && mb_y != 134)) {
829 } 860 }
830 } 861 }
831 } 862 }
832 } 863 }
833 864
834 static inline void dv_encode_video_segment(DVVideoContext *s, 865 static inline void dv_encode_video_segment(DVVideoContext *s, int work_chunk)
835 uint8_t *dif, 866 {
836 const uint16_t *mb_pos_ptr) 867 int mb_index, i, j;
837 {
838 int mb_index, i, j, v;
839 int mb_x, mb_y, c_offset, linesize; 868 int mb_x, mb_y, c_offset, linesize;
840 uint8_t* y_ptr; 869 uint8_t* y_ptr;
841 uint8_t* data; 870 uint8_t* data;
842 uint8_t* ptr; 871 uint8_t* ptr;
872 uint8_t* dif;
843 int do_edge_wrap; 873 int do_edge_wrap;
844 DECLARE_ALIGNED_16(DCTELEM, block[64]); 874 DECLARE_ALIGNED_16(DCTELEM, block[64]);
845 EncBlockInfo enc_blks[5*6]; 875 EncBlockInfo enc_blks[5*6];
846 PutBitContext pbs[5*6]; 876 PutBitContext pbs[5*6];
847 PutBitContext* pb; 877 PutBitContext* pb;
849 int vs_bit_size = 0; 879 int vs_bit_size = 0;
850 int qnos[5]; 880 int qnos[5];
851 881
852 assert((((int)block) & 15) == 0); 882 assert((((int)block) & 15) == 0);
853 883
884 dif = &s->buf[(work_chunk>>18)*80];
854 enc_blk = &enc_blks[0]; 885 enc_blk = &enc_blks[0];
855 pb = &pbs[0]; 886 pb = &pbs[0];
856 for (mb_index = 0; mb_index < 5; mb_index++) { 887 for (mb_index = 0; mb_index < 5; mb_index++) {
857 v = *mb_pos_ptr++; 888 dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y);
858 mb_x = v & 0xff;
859 mb_y = v >> 8;
860 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << 3); 889 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << 3);
861 c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] + 890 c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] +
862 (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << 3); 891 (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << 3);
863 do_edge_wrap = 0; 892 do_edge_wrap = 0;
864 qnos[mb_index] = 15; /* No quantization */ 893 qnos[mb_index] = 15; /* No quantization */
982 flush_put_bits(&pbs[j]); 1011 flush_put_bits(&pbs[j]);
983 } 1012 }
984 1013
985 static int dv_decode_mt(AVCodecContext *avctx, void* sl) 1014 static int dv_decode_mt(AVCodecContext *avctx, void* sl)
986 { 1015 {
987 DVVideoContext *s = avctx->priv_data; 1016 dv_decode_video_segment((DVVideoContext *)avctx->priv_data, (size_t)sl);
988 int slice = (size_t)sl;
989
990 /* which DIF channel is this? */
991 int chan = slice / (s->sys->difseg_size * 27);
992
993 /* slice within the DIF channel */
994 int chan_slice = slice % (s->sys->difseg_size * 27);
995
996 /* byte offset of this channel's data */
997 int chan_offset = chan * s->sys->difseg_size * 150 * 80;
998
999 /* DIF sequence */
1000 int seq = chan_slice / 27;
1001
1002 /* in 1080i50 and 720p50 some seq are unused */
1003 if ((DV_PROFILE_IS_1080i50(s->sys) && chan != 0 && seq == 11) ||
1004 (DV_PROFILE_IS_720p50(s->sys) && seq > 9))
1005 return 0;
1006
1007 dv_decode_video_segment(s, &s->buf[(seq * 6 + (chan_slice / 3)
1008 + chan_slice * 5 + 7)
1009 * 80 + chan_offset],
1010 &s->sys->video_place[slice * 5]);
1011 return 0; 1017 return 0;
1012 } 1018 }
1013 1019
1014 #ifdef CONFIG_DVVIDEO_ENCODER 1020 #ifdef CONFIG_DVVIDEO_ENCODER
1015 static int dv_encode_mt(AVCodecContext *avctx, void* sl) 1021 static int dv_encode_mt(AVCodecContext *avctx, void* sl)
1016 { 1022 {
1017 DVVideoContext *s = avctx->priv_data; 1023 dv_encode_video_segment((DVVideoContext *)avctx->priv_data, (size_t)sl);
1018 int slice = (size_t)sl;
1019
1020 /* which DIF channel is this? */
1021 int chan = slice / (s->sys->difseg_size * 27);
1022
1023 /* slice within the DIF channel */
1024 int chan_slice = slice % (s->sys->difseg_size * 27);
1025
1026 /* byte offset of this channel's data */
1027 int chan_offset = chan * s->sys->difseg_size * 150 * 80;
1028
1029 dv_encode_video_segment(s, &s->buf[((chan_slice / 27) * 6 + (chan_slice / 3)
1030 + chan_slice * 5 + 7)
1031 * 80 + chan_offset],
1032 &s->sys->video_place[slice * 5]);
1033 return 0; 1024 return 0;
1034 } 1025 }
1035 #endif 1026 #endif
1036 1027
1037 #ifdef CONFIG_DVVIDEO_DECODER 1028 #ifdef CONFIG_DVVIDEO_DECODER
1042 const uint8_t *buf, int buf_size) 1033 const uint8_t *buf, int buf_size)
1043 { 1034 {
1044 DVVideoContext *s = avctx->priv_data; 1035 DVVideoContext *s = avctx->priv_data;
1045 1036
1046 s->sys = dv_frame_profile(buf); 1037 s->sys = dv_frame_profile(buf);
1047 if (!s->sys || buf_size < s->sys->frame_size) 1038 if (!s->sys || buf_size < s->sys->frame_size || dv_init_dynamic_tables(s->sys))
1048 return -1; /* NOTE: we only accept several full frames */ 1039 return -1; /* NOTE: we only accept several full frames */
1049 1040
1050 if (s->picture.data[0]) 1041 if (s->picture.data[0])
1051 avctx->release_buffer(avctx, &s->picture); 1042 avctx->release_buffer(avctx, &s->picture);
1052 1043
1062 } 1053 }
1063 s->picture.interlaced_frame = 1; 1054 s->picture.interlaced_frame = 1;
1064 s->picture.top_field_first = 0; 1055 s->picture.top_field_first = 0;
1065 1056
1066 s->buf = buf; 1057 s->buf = buf;
1067 avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL, 1058 avctx->execute(avctx, dv_decode_mt, s->sys->work_chunks, NULL,
1068 s->sys->n_difchan * s->sys->difseg_size * 27); 1059 dv_work_pool_size(s->sys));
1069 1060
1070 emms_c(); 1061 emms_c();
1071 1062
1072 /* return image */ 1063 /* return image */
1073 *data_size = sizeof(AVFrame); 1064 *data_size = sizeof(AVFrame);
1206 void *data) 1197 void *data)
1207 { 1198 {
1208 DVVideoContext *s = c->priv_data; 1199 DVVideoContext *s = c->priv_data;
1209 1200
1210 s->sys = dv_codec_profile(c); 1201 s->sys = dv_codec_profile(c);
1211 if (!s->sys) 1202 if (!s->sys || buf_size < s->sys->frame_size || dv_init_dynamic_tables(s->sys))
1212 return -1;
1213 if (buf_size < s->sys->frame_size)
1214 return -1; 1203 return -1;
1215 1204
1216 c->pix_fmt = s->sys->pix_fmt; 1205 c->pix_fmt = s->sys->pix_fmt;
1217 s->picture = *((AVFrame *)data); 1206 s->picture = *((AVFrame *)data);
1218 s->picture.key_frame = 1; 1207 s->picture.key_frame = 1;
1219 s->picture.pict_type = FF_I_TYPE; 1208 s->picture.pict_type = FF_I_TYPE;
1220 1209
1221 s->buf = buf; 1210 s->buf = buf;
1222 c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL, 1211 c->execute(c, dv_encode_mt, s->sys->work_chunks, NULL,
1223 s->sys->n_difchan * s->sys->difseg_size * 27); 1212 dv_work_pool_size(s->sys));
1224 1213
1225 emms_c(); 1214 emms_c();
1226 1215
1227 dv_format_frame(s, buf); 1216 dv_format_frame(s, buf);
1228 1217