Mercurial > libavcodec.hg
comparison dv.c @ 8118:890df98a7848 libavcodec
implementing more efficient (and direct) allocation of work for DV codec workers
author | romansh |
---|---|
date | Sat, 08 Nov 2008 00:18:00 +0000 |
parents | 728e0e4fcb95 |
children | a9734fe0811e |
comparison
equal
deleted
inserted
replaced
8117:a0f9045e0a82 | 8118:890df98a7848 |
---|---|
60 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); | 60 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); |
61 void (*fdct[2])(DCTELEM *block); | 61 void (*fdct[2])(DCTELEM *block); |
62 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); | 62 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); |
63 } DVVideoContext; | 63 } DVVideoContext; |
64 | 64 |
65 /** | |
66 * MultiThreading - dv_anchor applies to entire DV codec, not just the avcontext | |
67 * one element is needed for each video segment in a DV frame | |
68 * at most there are 4 DIF channels * 12 DIF sequences * 27 video segments (1080i50) | |
69 */ | |
70 #define DV_ANCHOR_SIZE (4*12*27) | |
71 | |
72 static void* dv_anchor[DV_ANCHOR_SIZE]; | |
73 | |
74 #define TEX_VLC_BITS 9 | 65 #define TEX_VLC_BITS 9 |
75 | 66 |
76 #if ENABLE_SMALL | 67 #if ENABLE_SMALL |
77 #define DV_VLC_MAP_RUN_SIZE 15 | 68 #define DV_VLC_MAP_RUN_SIZE 15 |
78 #define DV_VLC_MAP_LEV_SIZE 23 | 69 #define DV_VLC_MAP_LEV_SIZE 23 |
86 /* VLC encoding lookup table */ | 77 /* VLC encoding lookup table */ |
87 static struct dv_vlc_pair { | 78 static struct dv_vlc_pair { |
88 uint32_t vlc; | 79 uint32_t vlc; |
89 uint8_t size; | 80 uint8_t size; |
90 } dv_vlc_map[DV_VLC_MAP_RUN_SIZE][DV_VLC_MAP_LEV_SIZE]; | 81 } dv_vlc_map[DV_VLC_MAP_RUN_SIZE][DV_VLC_MAP_LEV_SIZE]; |
82 | |
83 static inline int dv_work_pool_size(const DVprofile *d) | |
84 { | |
85 int size = d->n_difchan*d->difseg_size*27; | |
86 if (DV_PROFILE_IS_1080i50(d)) | |
87 size -= 3*27; | |
88 if (DV_PROFILE_IS_720p50(d)) | |
89 size -= 4*27; | |
90 return size; | |
91 } | |
92 | |
93 static int dv_init_dynamic_tables(const DVprofile *d) | |
94 { | |
95 int j,i,c,s,p; | |
96 | |
97 if (d->work_chunks[dv_work_pool_size(d)-1]) | |
98 return 0; | |
99 | |
100 p = i = 0; | |
101 for (c=0; c<d->n_difchan; c++) { | |
102 for (s=0; s<d->difseg_size; s++) { | |
103 p += 6; | |
104 for (j=0; j<27; j++) { | |
105 p += !(j%3); | |
106 if (!(DV_PROFILE_IS_1080i50(d) && c != 0 && s == 11) && | |
107 !(DV_PROFILE_IS_720p50(d) && s > 9)) { | |
108 d->work_chunks[i++] = (void*)(size_t)((p<<18)|(c << 16)|(s << 8)|j); | |
109 } | |
110 p += 5; | |
111 } | |
112 } | |
113 } | |
114 return 0; | |
115 } | |
91 | 116 |
92 static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm) | 117 static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm) |
93 { | 118 { |
94 int i, q, a; | 119 int i, q, a; |
95 | 120 |
135 uint8_t new_dv_vlc_len[NB_DV_VLC*2]; | 160 uint8_t new_dv_vlc_len[NB_DV_VLC*2]; |
136 uint8_t new_dv_vlc_run[NB_DV_VLC*2]; | 161 uint8_t new_dv_vlc_run[NB_DV_VLC*2]; |
137 int16_t new_dv_vlc_level[NB_DV_VLC*2]; | 162 int16_t new_dv_vlc_level[NB_DV_VLC*2]; |
138 | 163 |
139 done = 1; | 164 done = 1; |
140 | |
141 /* dv_anchor lets each thread know its ID */ | |
142 for (i = 0; i < DV_ANCHOR_SIZE; i++) | |
143 dv_anchor[i] = (void*)(size_t)i; | |
144 | 165 |
145 /* it's faster to include sign bit in a generic VLC parsing scheme */ | 166 /* it's faster to include sign bit in a generic VLC parsing scheme */ |
146 for (i = 0, j = 0; i < NB_DV_VLC; i++, j++) { | 167 for (i = 0, j = 0; i < NB_DV_VLC; i++, j++) { |
147 new_dv_vlc_bits[j] = dv_vlc_bits[i]; | 168 new_dv_vlc_bits[j] = dv_vlc_bits[i]; |
148 new_dv_vlc_len[j] = dv_vlc_len[i]; | 169 new_dv_vlc_len[j] = dv_vlc_len[i]; |
358 if (bits_left > 0) { | 379 if (bits_left > 0) { |
359 put_bits(pb, bits_left, get_bits(gb, bits_left)); | 380 put_bits(pb, bits_left, get_bits(gb, bits_left)); |
360 } | 381 } |
361 } | 382 } |
362 | 383 |
384 static inline void dv_calculate_mb_xy(DVVideoContext *s, int work_chunk, int m, int *mb_x, int *mb_y) | |
385 { | |
386 int i, chan, seg, slot; | |
387 | |
388 chan = (work_chunk>>16)&0x03; | |
389 seg = (work_chunk>>8)&0xff; | |
390 slot = (work_chunk)&0xff; | |
391 | |
392 i = (chan*s->sys->difseg_size+seg)*27*5 + slot*5 + m; | |
393 *mb_x = s->sys->video_place[i] & 0xff; | |
394 *mb_y = s->sys->video_place[i] >> 8; | |
395 | |
396 /* We work with 720p frames split in half. The odd half-frame (chan==2,3) is displaced :-( */ | |
397 if (s->sys->height == 720 && !(s->buf[1]&0x0C)) { | |
398 *mb_y -= (*mb_y>17)?18:-72; /* shifting the Y coordinate down by 72/2 macro blocks */ | |
399 } | |
400 } | |
401 | |
363 /* mb_x and mb_y are in units of 8 pixels */ | 402 /* mb_x and mb_y are in units of 8 pixels */ |
364 static inline void dv_decode_video_segment(DVVideoContext *s, | 403 static inline void dv_decode_video_segment(DVVideoContext *s, int work_chunk) |
365 const uint8_t *buf_ptr1, | |
366 const uint16_t *mb_pos_ptr) | |
367 { | 404 { |
368 int quant, dc, dct_mode, class1, j; | 405 int quant, dc, dct_mode, class1, j; |
369 int mb_index, mb_x, mb_y, v, last_index; | 406 int mb_index, mb_x, mb_y, last_index; |
370 int y_stride, linesize; | 407 int y_stride, linesize; |
371 DCTELEM *block, *block1; | 408 DCTELEM *block, *block1; |
372 int c_offset; | 409 int c_offset; |
373 uint8_t *y_ptr; | 410 uint8_t *y_ptr; |
374 const uint8_t *buf_ptr; | 411 const uint8_t *buf_ptr; |
385 assert((((int)vs_bit_buffer) & 7) == 0); | 422 assert((((int)vs_bit_buffer) & 7) == 0); |
386 | 423 |
387 memset(sblock, 0, sizeof(sblock)); | 424 memset(sblock, 0, sizeof(sblock)); |
388 | 425 |
389 /* pass 1 : read DC and AC coefficients in blocks */ | 426 /* pass 1 : read DC and AC coefficients in blocks */ |
390 buf_ptr = buf_ptr1; | 427 buf_ptr = &s->buf[(work_chunk>>18)*80]; |
391 block1 = &sblock[0][0]; | 428 block1 = &sblock[0][0]; |
392 mb1 = mb_data; | 429 mb1 = mb_data; |
393 init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80); | 430 init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80); |
394 for (mb_index = 0; mb_index < 5; mb_index++, mb1 += s->sys->bpm, block1 += s->sys->bpm * 64) { | 431 for (mb_index = 0; mb_index < 5; mb_index++, mb1 += s->sys->bpm, block1 += s->sys->bpm * 64) { |
395 /* skip header */ | 432 /* skip header */ |
488 | 525 |
489 /* compute idct and place blocks */ | 526 /* compute idct and place blocks */ |
490 block = &sblock[0][0]; | 527 block = &sblock[0][0]; |
491 mb = mb_data; | 528 mb = mb_data; |
492 for (mb_index = 0; mb_index < 5; mb_index++) { | 529 for (mb_index = 0; mb_index < 5; mb_index++) { |
493 v = *mb_pos_ptr++; | 530 dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y); |
494 mb_x = v & 0xff; | |
495 mb_y = v >> 8; | |
496 /* We work with 720p frames split in half. The odd half-frame (chan==2,3) is displaced :-( */ | |
497 if (s->sys->height == 720 && !(s->buf[1] & 0x0C)) { | |
498 mb_y -= (mb_y > 17) ? 18 : -72; /* shifting the Y coordinate down by 72/2 macroblocks */ | |
499 } | |
500 | 531 |
501 /* idct_put'ting luminance */ | 532 /* idct_put'ting luminance */ |
502 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) || | 533 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) || |
503 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) || | 534 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) || |
504 (s->sys->height >= 720 && mb_y != 134)) { | 535 (s->sys->height >= 720 && mb_y != 134)) { |
829 } | 860 } |
830 } | 861 } |
831 } | 862 } |
832 } | 863 } |
833 | 864 |
834 static inline void dv_encode_video_segment(DVVideoContext *s, | 865 static inline void dv_encode_video_segment(DVVideoContext *s, int work_chunk) |
835 uint8_t *dif, | 866 { |
836 const uint16_t *mb_pos_ptr) | 867 int mb_index, i, j; |
837 { | |
838 int mb_index, i, j, v; | |
839 int mb_x, mb_y, c_offset, linesize; | 868 int mb_x, mb_y, c_offset, linesize; |
840 uint8_t* y_ptr; | 869 uint8_t* y_ptr; |
841 uint8_t* data; | 870 uint8_t* data; |
842 uint8_t* ptr; | 871 uint8_t* ptr; |
872 uint8_t* dif; | |
843 int do_edge_wrap; | 873 int do_edge_wrap; |
844 DECLARE_ALIGNED_16(DCTELEM, block[64]); | 874 DECLARE_ALIGNED_16(DCTELEM, block[64]); |
845 EncBlockInfo enc_blks[5*6]; | 875 EncBlockInfo enc_blks[5*6]; |
846 PutBitContext pbs[5*6]; | 876 PutBitContext pbs[5*6]; |
847 PutBitContext* pb; | 877 PutBitContext* pb; |
849 int vs_bit_size = 0; | 879 int vs_bit_size = 0; |
850 int qnos[5]; | 880 int qnos[5]; |
851 | 881 |
852 assert((((int)block) & 15) == 0); | 882 assert((((int)block) & 15) == 0); |
853 | 883 |
884 dif = &s->buf[(work_chunk>>18)*80]; | |
854 enc_blk = &enc_blks[0]; | 885 enc_blk = &enc_blks[0]; |
855 pb = &pbs[0]; | 886 pb = &pbs[0]; |
856 for (mb_index = 0; mb_index < 5; mb_index++) { | 887 for (mb_index = 0; mb_index < 5; mb_index++) { |
857 v = *mb_pos_ptr++; | 888 dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y); |
858 mb_x = v & 0xff; | |
859 mb_y = v >> 8; | |
860 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << 3); | 889 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << 3); |
861 c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] + | 890 c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] + |
862 (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << 3); | 891 (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << 3); |
863 do_edge_wrap = 0; | 892 do_edge_wrap = 0; |
864 qnos[mb_index] = 15; /* No quantization */ | 893 qnos[mb_index] = 15; /* No quantization */ |
982 flush_put_bits(&pbs[j]); | 1011 flush_put_bits(&pbs[j]); |
983 } | 1012 } |
984 | 1013 |
985 static int dv_decode_mt(AVCodecContext *avctx, void* sl) | 1014 static int dv_decode_mt(AVCodecContext *avctx, void* sl) |
986 { | 1015 { |
987 DVVideoContext *s = avctx->priv_data; | 1016 dv_decode_video_segment((DVVideoContext *)avctx->priv_data, (size_t)sl); |
988 int slice = (size_t)sl; | |
989 | |
990 /* which DIF channel is this? */ | |
991 int chan = slice / (s->sys->difseg_size * 27); | |
992 | |
993 /* slice within the DIF channel */ | |
994 int chan_slice = slice % (s->sys->difseg_size * 27); | |
995 | |
996 /* byte offset of this channel's data */ | |
997 int chan_offset = chan * s->sys->difseg_size * 150 * 80; | |
998 | |
999 /* DIF sequence */ | |
1000 int seq = chan_slice / 27; | |
1001 | |
1002 /* in 1080i50 and 720p50 some seq are unused */ | |
1003 if ((DV_PROFILE_IS_1080i50(s->sys) && chan != 0 && seq == 11) || | |
1004 (DV_PROFILE_IS_720p50(s->sys) && seq > 9)) | |
1005 return 0; | |
1006 | |
1007 dv_decode_video_segment(s, &s->buf[(seq * 6 + (chan_slice / 3) | |
1008 + chan_slice * 5 + 7) | |
1009 * 80 + chan_offset], | |
1010 &s->sys->video_place[slice * 5]); | |
1011 return 0; | 1017 return 0; |
1012 } | 1018 } |
1013 | 1019 |
1014 #ifdef CONFIG_DVVIDEO_ENCODER | 1020 #ifdef CONFIG_DVVIDEO_ENCODER |
1015 static int dv_encode_mt(AVCodecContext *avctx, void* sl) | 1021 static int dv_encode_mt(AVCodecContext *avctx, void* sl) |
1016 { | 1022 { |
1017 DVVideoContext *s = avctx->priv_data; | 1023 dv_encode_video_segment((DVVideoContext *)avctx->priv_data, (size_t)sl); |
1018 int slice = (size_t)sl; | |
1019 | |
1020 /* which DIF channel is this? */ | |
1021 int chan = slice / (s->sys->difseg_size * 27); | |
1022 | |
1023 /* slice within the DIF channel */ | |
1024 int chan_slice = slice % (s->sys->difseg_size * 27); | |
1025 | |
1026 /* byte offset of this channel's data */ | |
1027 int chan_offset = chan * s->sys->difseg_size * 150 * 80; | |
1028 | |
1029 dv_encode_video_segment(s, &s->buf[((chan_slice / 27) * 6 + (chan_slice / 3) | |
1030 + chan_slice * 5 + 7) | |
1031 * 80 + chan_offset], | |
1032 &s->sys->video_place[slice * 5]); | |
1033 return 0; | 1024 return 0; |
1034 } | 1025 } |
1035 #endif | 1026 #endif |
1036 | 1027 |
1037 #ifdef CONFIG_DVVIDEO_DECODER | 1028 #ifdef CONFIG_DVVIDEO_DECODER |
1042 const uint8_t *buf, int buf_size) | 1033 const uint8_t *buf, int buf_size) |
1043 { | 1034 { |
1044 DVVideoContext *s = avctx->priv_data; | 1035 DVVideoContext *s = avctx->priv_data; |
1045 | 1036 |
1046 s->sys = dv_frame_profile(buf); | 1037 s->sys = dv_frame_profile(buf); |
1047 if (!s->sys || buf_size < s->sys->frame_size) | 1038 if (!s->sys || buf_size < s->sys->frame_size || dv_init_dynamic_tables(s->sys)) |
1048 return -1; /* NOTE: we only accept several full frames */ | 1039 return -1; /* NOTE: we only accept several full frames */ |
1049 | 1040 |
1050 if (s->picture.data[0]) | 1041 if (s->picture.data[0]) |
1051 avctx->release_buffer(avctx, &s->picture); | 1042 avctx->release_buffer(avctx, &s->picture); |
1052 | 1043 |
1062 } | 1053 } |
1063 s->picture.interlaced_frame = 1; | 1054 s->picture.interlaced_frame = 1; |
1064 s->picture.top_field_first = 0; | 1055 s->picture.top_field_first = 0; |
1065 | 1056 |
1066 s->buf = buf; | 1057 s->buf = buf; |
1067 avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL, | 1058 avctx->execute(avctx, dv_decode_mt, s->sys->work_chunks, NULL, |
1068 s->sys->n_difchan * s->sys->difseg_size * 27); | 1059 dv_work_pool_size(s->sys)); |
1069 | 1060 |
1070 emms_c(); | 1061 emms_c(); |
1071 | 1062 |
1072 /* return image */ | 1063 /* return image */ |
1073 *data_size = sizeof(AVFrame); | 1064 *data_size = sizeof(AVFrame); |
1206 void *data) | 1197 void *data) |
1207 { | 1198 { |
1208 DVVideoContext *s = c->priv_data; | 1199 DVVideoContext *s = c->priv_data; |
1209 | 1200 |
1210 s->sys = dv_codec_profile(c); | 1201 s->sys = dv_codec_profile(c); |
1211 if (!s->sys) | 1202 if (!s->sys || buf_size < s->sys->frame_size || dv_init_dynamic_tables(s->sys)) |
1212 return -1; | |
1213 if (buf_size < s->sys->frame_size) | |
1214 return -1; | 1203 return -1; |
1215 | 1204 |
1216 c->pix_fmt = s->sys->pix_fmt; | 1205 c->pix_fmt = s->sys->pix_fmt; |
1217 s->picture = *((AVFrame *)data); | 1206 s->picture = *((AVFrame *)data); |
1218 s->picture.key_frame = 1; | 1207 s->picture.key_frame = 1; |
1219 s->picture.pict_type = FF_I_TYPE; | 1208 s->picture.pict_type = FF_I_TYPE; |
1220 | 1209 |
1221 s->buf = buf; | 1210 s->buf = buf; |
1222 c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL, | 1211 c->execute(c, dv_encode_mt, s->sys->work_chunks, NULL, |
1223 s->sys->n_difchan * s->sys->difseg_size * 27); | 1212 dv_work_pool_size(s->sys)); |
1224 | 1213 |
1225 emms_c(); | 1214 emms_c(); |
1226 | 1215 |
1227 dv_format_frame(s, buf); | 1216 dv_format_frame(s, buf); |
1228 | 1217 |