Mercurial > libavcodec.hg
annotate dnxhdenc.c @ 8566:48a4d9f4c6f8 libavcodec
RV30 decoder passes possible frame sizes in extradata and selects
an appropriate frame size from them in slice, make my decoder do
that as well.
This fixes issue 779
author | kostya |
---|---|
date | Sun, 11 Jan 2009 08:03:45 +0000 |
parents | bf6a78c6697b |
children | 7a463923ecd1 |
rev | line source |
---|---|
5790 | 1 /* |
2 * VC3/DNxHD encoder | |
3 * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com> | |
4 * | |
5 * VC-3 encoder funded by the British Broadcasting Corporation | |
6 * | |
7 * This file is part of FFmpeg. | |
8 * | |
9 * FFmpeg is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Lesser General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
14 * FFmpeg is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
20 * License along with FFmpeg; if not, write to the Free Software | |
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 */ | |
23 | |
24 //#define DEBUG | |
25 #define RC_VARIANCE 1 // use variance or ssd for fast rc | |
26 | |
27 #include "avcodec.h" | |
28 #include "dsputil.h" | |
29 #include "mpegvideo.h" | |
8294 | 30 #include "dnxhdenc.h" |
5790 | 31 |
32 int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); | |
33 | |
34 #define LAMBDA_FRAC_BITS 10 | |
35 | |
8302
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
36 static av_always_inline void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
37 { |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
38 int i; |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
39 for (i = 0; i < 4; i++) { |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
40 block[0] = pixels[0]; block[1] = pixels[1]; |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
41 block[2] = pixels[2]; block[3] = pixels[3]; |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
42 block[4] = pixels[4]; block[5] = pixels[5]; |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
43 block[6] = pixels[6]; block[7] = pixels[7]; |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
44 pixels += line_size; |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
45 block += 8; |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
46 } |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
47 memcpy(block , block- 8, sizeof(*block)*8); |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
48 memcpy(block+ 8, block-16, sizeof(*block)*8); |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
49 memcpy(block+16, block-24, sizeof(*block)*8); |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
50 memcpy(block+24, block-32, sizeof(*block)*8); |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
51 } |
f54976d6a8a5
Move get_pixels_8x4 before init func to avoid useless forward declaration.
bcoudurier
parents:
8301
diff
changeset
|
52 |
5790 | 53 static int dnxhd_init_vlc(DNXHDEncContext *ctx) |
54 { | |
6978 | 55 int i, j, level, run; |
56 int max_level = 1<<(ctx->cid_table->bit_depth+2); | |
5790 | 57 |
6981 | 58 CHECKED_ALLOCZ(ctx->vlc_codes, max_level*4*sizeof(*ctx->vlc_codes)); |
59 CHECKED_ALLOCZ(ctx->vlc_bits, max_level*4*sizeof(*ctx->vlc_bits)); | |
60 CHECKED_ALLOCZ(ctx->run_codes, 63*2); | |
61 CHECKED_ALLOCZ(ctx->run_bits, 63); | |
6978 | 62 |
6981 | 63 ctx->vlc_codes += max_level*2; |
64 ctx->vlc_bits += max_level*2; | |
6978 | 65 for (level = -max_level; level < max_level; level++) { |
66 for (run = 0; run < 2; run++) { | |
67 int index = (level<<1)|run; | |
68 int sign, offset = 0, alevel = level; | |
5790 | 69 |
6978 | 70 MASK_ABS(sign, alevel); |
71 if (alevel > 64) { | |
72 offset = (alevel-1)>>6; | |
73 alevel -= offset<<6; | |
74 } | |
75 for (j = 0; j < 257; j++) { | |
76 if (ctx->cid_table->ac_level[j] == alevel && | |
77 (!offset || (ctx->cid_table->ac_index_flag[j] && offset)) && | |
78 (!run || (ctx->cid_table->ac_run_flag [j] && run))) { | |
6981 | 79 assert(!ctx->vlc_codes[index]); |
6978 | 80 if (alevel) { |
6981 | 81 ctx->vlc_codes[index] = (ctx->cid_table->ac_codes[j]<<1)|(sign&1); |
82 ctx->vlc_bits [index] = ctx->cid_table->ac_bits[j]+1; | |
6978 | 83 } else { |
6981 | 84 ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j]; |
85 ctx->vlc_bits [index] = ctx->cid_table->ac_bits [j]; | |
6978 | 86 } |
87 break; | |
88 } | |
89 } | |
90 assert(!alevel || j < 257); | |
91 if (offset) { | |
6981 | 92 ctx->vlc_codes[index] = (ctx->vlc_codes[index]<<ctx->cid_table->index_bits)|offset; |
93 ctx->vlc_bits [index]+= ctx->cid_table->index_bits; | |
6978 | 94 } |
95 } | |
5790 | 96 } |
97 for (i = 0; i < 62; i++) { | |
98 int run = ctx->cid_table->run[i]; | |
99 assert(run < 63); | |
6981 | 100 ctx->run_codes[run] = ctx->cid_table->run_codes[i]; |
101 ctx->run_bits [run] = ctx->cid_table->run_bits[i]; | |
5790 | 102 } |
103 return 0; | |
104 fail: | |
105 return -1; | |
106 } | |
107 | |
108 static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias) | |
109 { | |
110 // init first elem to 1 to avoid div by 0 in convert_matrix | |
111 uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t* | |
112 int qscale, i; | |
113 | |
114 CHECKED_ALLOCZ(ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int)); | |
115 CHECKED_ALLOCZ(ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int)); | |
116 CHECKED_ALLOCZ(ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t)); | |
117 CHECKED_ALLOCZ(ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t)); | |
118 | |
119 for (i = 1; i < 64; i++) { | |
120 int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; | |
5795 | 121 weight_matrix[j] = ctx->cid_table->luma_weight[i]; |
5790 | 122 } |
123 ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix, | |
124 ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1); | |
125 for (i = 1; i < 64; i++) { | |
126 int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; | |
5795 | 127 weight_matrix[j] = ctx->cid_table->chroma_weight[i]; |
5790 | 128 } |
129 ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix, | |
130 ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1); | |
131 for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { | |
132 for (i = 0; i < 64; i++) { | |
133 ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2; | |
134 ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2; | |
135 ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2; | |
136 } | |
137 } | |
138 return 0; | |
139 fail: | |
140 return -1; | |
141 } | |
142 | |
143 static int dnxhd_init_rc(DNXHDEncContext *ctx) | |
144 { | |
145 CHECKED_ALLOCZ(ctx->mb_rc, 8160*ctx->m.avctx->qmax*sizeof(RCEntry)); | |
146 if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD) | |
147 CHECKED_ALLOCZ(ctx->mb_cmp, ctx->m.mb_num*sizeof(RCCMPEntry)); | |
148 | |
149 ctx->frame_bits = (ctx->cid_table->coding_unit_size - 640 - 4) * 8; | |
150 ctx->qscale = 1; | |
151 ctx->lambda = 2<<LAMBDA_FRAC_BITS; // qscale 2 | |
152 return 0; | |
153 fail: | |
154 return -1; | |
155 } | |
156 | |
157 static int dnxhd_encode_init(AVCodecContext *avctx) | |
158 { | |
159 DNXHDEncContext *ctx = avctx->priv_data; | |
160 int i, index; | |
161 | |
6041
bb4b486c6775
add bitrate helper to choose all dnxhd variants
bcoudurier
parents:
5972
diff
changeset
|
162 ctx->cid = ff_dnxhd_find_cid(avctx); |
5971 | 163 if (!ctx->cid || avctx->pix_fmt != PIX_FMT_YUV422P) { |
5790 | 164 av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n"); |
165 return -1; | |
166 } | |
6041
bb4b486c6775
add bitrate helper to choose all dnxhd variants
bcoudurier
parents:
5972
diff
changeset
|
167 av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid); |
5790 | 168 |
169 index = ff_dnxhd_get_cid_table(ctx->cid); | |
170 ctx->cid_table = &ff_dnxhd_cid_table[index]; | |
171 | |
172 ctx->m.avctx = avctx; | |
173 ctx->m.mb_intra = 1; | |
174 ctx->m.h263_aic = 1; | |
175 | |
8303 | 176 ctx->get_pixels_8x4_sym = dnxhd_get_pixels_8x4; |
177 | |
5790 | 178 dsputil_init(&ctx->m.dsp, avctx); |
179 ff_dct_common_init(&ctx->m); | |
8303 | 180 #ifdef HAVE_MMX |
181 ff_dnxhd_init_mmx(ctx); | |
182 #endif | |
5790 | 183 if (!ctx->m.dct_quantize) |
184 ctx->m.dct_quantize = dct_quantize_c; | |
185 | |
186 ctx->m.mb_height = (avctx->height + 15) / 16; | |
187 ctx->m.mb_width = (avctx->width + 15) / 16; | |
188 | |
189 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) { | |
190 ctx->interlaced = 1; | |
191 ctx->m.mb_height /= 2; | |
192 } | |
193 | |
194 ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width; | |
195 | |
196 if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS) | |
197 ctx->m.intra_quant_bias = avctx->intra_quant_bias; | |
198 if (dnxhd_init_qmat(ctx, ctx->m.intra_quant_bias, 0) < 0) // XXX tune lbias/cbias | |
199 return -1; | |
200 | |
201 if (dnxhd_init_vlc(ctx) < 0) | |
202 return -1; | |
203 if (dnxhd_init_rc(ctx) < 0) | |
204 return -1; | |
205 | |
206 CHECKED_ALLOCZ(ctx->slice_size, ctx->m.mb_height*sizeof(uint32_t)); | |
207 CHECKED_ALLOCZ(ctx->mb_bits, ctx->m.mb_num *sizeof(uint16_t)); | |
208 CHECKED_ALLOCZ(ctx->mb_qscale, ctx->m.mb_num *sizeof(uint8_t)); | |
209 | |
210 ctx->frame.key_frame = 1; | |
211 ctx->frame.pict_type = FF_I_TYPE; | |
212 ctx->m.avctx->coded_frame = &ctx->frame; | |
213 | |
214 if (avctx->thread_count > MAX_THREADS || (avctx->thread_count > ctx->m.mb_height)) { | |
215 av_log(avctx, AV_LOG_ERROR, "too many threads\n"); | |
216 return -1; | |
217 } | |
218 | |
219 ctx->thread[0] = ctx; | |
220 for (i = 1; i < avctx->thread_count; i++) { | |
221 ctx->thread[i] = av_malloc(sizeof(DNXHDEncContext)); | |
222 memcpy(ctx->thread[i], ctx, sizeof(DNXHDEncContext)); | |
223 } | |
224 | |
225 for (i = 0; i < avctx->thread_count; i++) { | |
226 ctx->thread[i]->m.start_mb_y = (ctx->m.mb_height*(i ) + avctx->thread_count/2) / avctx->thread_count; | |
227 ctx->thread[i]->m.end_mb_y = (ctx->m.mb_height*(i+1) + avctx->thread_count/2) / avctx->thread_count; | |
228 } | |
229 | |
230 return 0; | |
231 fail: //for CHECKED_ALLOCZ | |
232 return -1; | |
233 } | |
234 | |
235 static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf) | |
236 { | |
237 DNXHDEncContext *ctx = avctx->priv_data; | |
238 const uint8_t header_prefix[5] = { 0x00,0x00,0x02,0x80,0x01 }; | |
239 | |
240 memcpy(buf, header_prefix, 5); | |
241 buf[5] = ctx->interlaced ? ctx->cur_field+2 : 0x01; | |
242 buf[6] = 0x80; // crc flag off | |
243 buf[7] = 0xa0; // reserved | |
244 AV_WB16(buf + 0x18, avctx->height); // ALPF | |
245 AV_WB16(buf + 0x1a, avctx->width); // SPL | |
246 AV_WB16(buf + 0x1d, avctx->height); // NAL | |
247 | |
248 buf[0x21] = 0x38; // FIXME 8 bit per comp | |
249 buf[0x22] = 0x88 + (ctx->frame.interlaced_frame<<2); | |
250 AV_WB32(buf + 0x28, ctx->cid); // CID | |
251 buf[0x2c] = ctx->interlaced ? 0 : 0x80; | |
252 | |
253 buf[0x5f] = 0x01; // UDL | |
254 | |
255 buf[0x167] = 0x02; // reserved | |
256 AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4); // MSIPS | |
257 buf[0x16d] = ctx->m.mb_height; // Ns | |
258 buf[0x16f] = 0x10; // reserved | |
259 | |
260 ctx->msip = buf + 0x170; | |
261 return 0; | |
262 } | |
263 | |
264 static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff) | |
265 { | |
266 int nbits; | |
267 if (diff < 0) { | |
268 nbits = av_log2_16bit(-2*diff); | |
269 diff--; | |
270 } else { | |
271 nbits = av_log2_16bit(2*diff); | |
272 } | |
273 put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits, | |
274 (ctx->cid_table->dc_codes[nbits]<<nbits) + (diff & ((1 << nbits) - 1))); | |
275 } | |
276 | |
277 static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, DCTELEM *block, int last_index, int n) | |
278 { | |
279 int last_non_zero = 0; | |
280 int slevel, i, j; | |
281 | |
282 dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]); | |
283 ctx->m.last_dc[n] = block[0]; | |
284 | |
285 for (i = 1; i <= last_index; i++) { | |
286 j = ctx->m.intra_scantable.permutated[i]; | |
287 slevel = block[j]; | |
288 if (slevel) { | |
289 int run_level = i - last_non_zero - 1; | |
6978 | 290 int rlevel = (slevel<<1)|!!run_level; |
6981 | 291 put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]); |
5790 | 292 if (run_level) |
6981 | 293 put_bits(&ctx->m.pb, ctx->run_bits[run_level], ctx->run_codes[run_level]); |
5790 | 294 last_non_zero = i; |
295 } | |
296 } | |
6981 | 297 put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]); // EOB |
5790 | 298 } |
299 | |
300 static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *block, int n, int qscale, int last_index) | |
301 { | |
5795 | 302 const uint8_t *weight_matrix; |
5790 | 303 int level; |
304 int i; | |
305 | |
5795 | 306 weight_matrix = (n&2) ? ctx->cid_table->chroma_weight : ctx->cid_table->luma_weight; |
5790 | 307 |
308 for (i = 1; i <= last_index; i++) { | |
309 int j = ctx->m.intra_scantable.permutated[i]; | |
310 level = block[j]; | |
311 if (level) { | |
312 if (level < 0) { | |
5795 | 313 level = (1-2*level) * qscale * weight_matrix[i]; |
314 if (weight_matrix[i] != 32) | |
5790 | 315 level += 32; |
316 level >>= 6; | |
317 level = -level; | |
318 } else { | |
5795 | 319 level = (2*level+1) * qscale * weight_matrix[i]; |
320 if (weight_matrix[i] != 32) | |
5790 | 321 level += 32; |
322 level >>= 6; | |
323 } | |
324 block[j] = level; | |
325 } | |
326 } | |
327 } | |
328 | |
329 static av_always_inline int dnxhd_ssd_block(DCTELEM *qblock, DCTELEM *block) | |
330 { | |
331 int score = 0; | |
332 int i; | |
333 for (i = 0; i < 64; i++) | |
334 score += (block[i]-qblock[i])*(block[i]-qblock[i]); | |
335 return score; | |
336 } | |
337 | |
338 static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *block, int last_index) | |
339 { | |
340 int last_non_zero = 0; | |
341 int bits = 0; | |
342 int i, j, level; | |
343 for (i = 1; i <= last_index; i++) { | |
344 j = ctx->m.intra_scantable.permutated[i]; | |
345 level = block[j]; | |
346 if (level) { | |
347 int run_level = i - last_non_zero - 1; | |
6981 | 348 bits += ctx->vlc_bits[(level<<1)|!!run_level]+ctx->run_bits[run_level]; |
5790 | 349 last_non_zero = i; |
350 } | |
351 } | |
352 return bits; | |
353 } | |
354 | |
355 static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) | |
356 { | |
357 const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << 4); | |
358 const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3); | |
359 const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3); | |
360 DSPContext *dsp = &ctx->m.dsp; | |
361 | |
362 dsp->get_pixels(ctx->blocks[0], ptr_y , ctx->m.linesize); | |
363 dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize); | |
364 dsp->get_pixels(ctx->blocks[2], ptr_u , ctx->m.uvlinesize); | |
365 dsp->get_pixels(ctx->blocks[3], ptr_v , ctx->m.uvlinesize); | |
366 | |
5971 | 367 if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { |
5790 | 368 if (ctx->interlaced) { |
8303 | 369 ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize); |
370 ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize); | |
371 ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize); | |
372 ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize); | |
8292 | 373 } else { |
374 dsp->clear_block(ctx->blocks[4]); dsp->clear_block(ctx->blocks[5]); | |
375 dsp->clear_block(ctx->blocks[6]); dsp->clear_block(ctx->blocks[7]); | |
376 } | |
5790 | 377 } else { |
378 dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize); | |
379 dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize); | |
380 dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize); | |
381 dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize); | |
382 } | |
383 } | |
384 | |
385 static av_always_inline int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i) | |
386 { | |
387 if (i&2) { | |
388 ctx->m.q_intra_matrix16 = ctx->qmatrix_c16; | |
389 ctx->m.q_intra_matrix = ctx->qmatrix_c; | |
390 return 1 + (i&1); | |
391 } else { | |
392 ctx->m.q_intra_matrix16 = ctx->qmatrix_l16; | |
393 ctx->m.q_intra_matrix = ctx->qmatrix_l; | |
394 return 0; | |
395 } | |
396 } | |
397 | |
398 static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg) | |
399 { | |
8129
a9734fe0811e
Making it easier to send arbitrary structures as work orders to MT workers
romansh
parents:
7040
diff
changeset
|
400 DNXHDEncContext *ctx = *(void**)arg; |
5790 | 401 int mb_y, mb_x; |
402 int qscale = ctx->thread[0]->qscale; | |
403 | |
404 for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) { | |
405 ctx->m.last_dc[0] = | |
406 ctx->m.last_dc[1] = | |
407 ctx->m.last_dc[2] = 1024; | |
408 | |
409 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { | |
410 unsigned mb = mb_y * ctx->m.mb_width + mb_x; | |
411 int ssd = 0; | |
412 int ac_bits = 0; | |
413 int dc_bits = 0; | |
414 int i; | |
415 | |
416 dnxhd_get_blocks(ctx, mb_x, mb_y); | |
417 | |
418 for (i = 0; i < 8; i++) { | |
419 DECLARE_ALIGNED_16(DCTELEM, block[64]); | |
420 DCTELEM *src_block = ctx->blocks[i]; | |
421 int overflow, nbits, diff, last_index; | |
422 int n = dnxhd_switch_matrix(ctx, i); | |
423 | |
424 memcpy(block, src_block, sizeof(block)); | |
425 last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow); | |
426 ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index); | |
427 | |
428 diff = block[0] - ctx->m.last_dc[n]; | |
429 if (diff < 0) nbits = av_log2_16bit(-2*diff); | |
430 else nbits = av_log2_16bit( 2*diff); | |
431 dc_bits += ctx->cid_table->dc_bits[nbits] + nbits; | |
432 | |
433 ctx->m.last_dc[n] = block[0]; | |
434 | |
435 if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) { | |
436 dnxhd_unquantize_c(ctx, block, i, qscale, last_index); | |
437 ctx->m.dsp.idct(block); | |
438 ssd += dnxhd_ssd_block(block, src_block); | |
439 } | |
440 } | |
441 ctx->mb_rc[qscale][mb].ssd = ssd; | |
6981 | 442 ctx->mb_rc[qscale][mb].bits = ac_bits+dc_bits+12+8*ctx->vlc_bits[0]; |
5790 | 443 } |
444 } | |
445 return 0; | |
446 } | |
447 | |
448 static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg) | |
449 { | |
8129
a9734fe0811e
Making it easier to send arbitrary structures as work orders to MT workers
romansh
parents:
7040
diff
changeset
|
450 DNXHDEncContext *ctx = *(void**)arg; |
5790 | 451 int mb_y, mb_x; |
452 | |
453 for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) { | |
454 ctx->m.last_dc[0] = | |
455 ctx->m.last_dc[1] = | |
456 ctx->m.last_dc[2] = 1024; | |
457 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { | |
458 unsigned mb = mb_y * ctx->m.mb_width + mb_x; | |
459 int qscale = ctx->mb_qscale[mb]; | |
460 int i; | |
461 | |
462 put_bits(&ctx->m.pb, 12, qscale<<1); | |
463 | |
464 dnxhd_get_blocks(ctx, mb_x, mb_y); | |
465 | |
466 for (i = 0; i < 8; i++) { | |
467 DCTELEM *block = ctx->blocks[i]; | |
468 int last_index, overflow; | |
469 int n = dnxhd_switch_matrix(ctx, i); | |
470 last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow); | |
6978 | 471 //START_TIMER; |
5790 | 472 dnxhd_encode_block(ctx, block, last_index, n); |
6978 | 473 //STOP_TIMER("encode_block"); |
5790 | 474 } |
475 } | |
476 if (put_bits_count(&ctx->m.pb)&31) | |
477 put_bits(&ctx->m.pb, 32-(put_bits_count(&ctx->m.pb)&31), 0); | |
478 } | |
479 flush_put_bits(&ctx->m.pb); | |
480 return 0; | |
481 } | |
482 | |
483 static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx, uint8_t *buf) | |
484 { | |
485 int mb_y, mb_x; | |
486 int i, offset = 0; | |
487 for (i = 0; i < ctx->m.avctx->thread_count; i++) { | |
488 int thread_size = 0; | |
489 for (mb_y = ctx->thread[i]->m.start_mb_y; mb_y < ctx->thread[i]->m.end_mb_y; mb_y++) { | |
490 ctx->slice_size[mb_y] = 0; | |
491 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { | |
492 unsigned mb = mb_y * ctx->m.mb_width + mb_x; | |
493 ctx->slice_size[mb_y] += ctx->mb_bits[mb]; | |
494 } | |
495 ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31; | |
496 ctx->slice_size[mb_y] >>= 3; | |
497 thread_size += ctx->slice_size[mb_y]; | |
498 } | |
499 init_put_bits(&ctx->thread[i]->m.pb, buf + 640 + offset, thread_size); | |
500 offset += thread_size; | |
501 } | |
502 } | |
503 | |
504 static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg) | |
505 { | |
8129
a9734fe0811e
Making it easier to send arbitrary structures as work orders to MT workers
romansh
parents:
7040
diff
changeset
|
506 DNXHDEncContext *ctx = *(void**)arg; |
5790 | 507 int mb_y, mb_x; |
508 for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) { | |
509 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { | |
510 unsigned mb = mb_y * ctx->m.mb_width + mb_x; | |
511 uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4); | |
512 int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize); | |
513 int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8; | |
514 ctx->mb_cmp[mb].value = varc; | |
515 ctx->mb_cmp[mb].mb = mb; | |
516 } | |
517 } | |
518 return 0; | |
519 } | |
520 | |
521 static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx) | |
522 { | |
5802 | 523 int lambda, up_step, down_step; |
524 int last_lower = INT_MAX, last_higher = 0; | |
5790 | 525 int x, y, q; |
526 | |
527 for (q = 1; q < avctx->qmax; q++) { | |
528 ctx->qscale = q; | |
8129
a9734fe0811e
Making it easier to send arbitrary structures as work orders to MT workers
romansh
parents:
7040
diff
changeset
|
529 avctx->execute(avctx, dnxhd_calc_bits_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count, sizeof(void*)); |
5790 | 530 } |
5802 | 531 up_step = down_step = 2<<LAMBDA_FRAC_BITS; |
5790 | 532 lambda = ctx->lambda; |
533 | |
534 for (;;) { | |
535 int bits = 0; | |
536 int end = 0; | |
5802 | 537 if (lambda == last_higher) { |
538 lambda++; | |
5790 | 539 end = 1; // need to set final qscales/bits |
540 } | |
541 for (y = 0; y < ctx->m.mb_height; y++) { | |
542 for (x = 0; x < ctx->m.mb_width; x++) { | |
543 unsigned min = UINT_MAX; | |
544 int qscale = 1; | |
545 int mb = y*ctx->m.mb_width+x; | |
546 for (q = 1; q < avctx->qmax; q++) { | |
547 unsigned score = ctx->mb_rc[q][mb].bits*lambda+(ctx->mb_rc[q][mb].ssd<<LAMBDA_FRAC_BITS); | |
548 if (score < min) { | |
549 min = score; | |
550 qscale = q; | |
551 } | |
552 } | |
553 bits += ctx->mb_rc[qscale][mb].bits; | |
554 ctx->mb_qscale[mb] = qscale; | |
555 ctx->mb_bits[mb] = ctx->mb_rc[qscale][mb].bits; | |
556 } | |
557 bits = (bits+31)&~31; // padding | |
558 if (bits > ctx->frame_bits) | |
559 break; | |
560 } | |
5802 | 561 //dprintf(ctx->m.avctx, "lambda %d, up %u, down %u, bits %d, frame %d\n", |
562 // lambda, last_higher, last_lower, bits, ctx->frame_bits); | |
5790 | 563 if (end) { |
564 if (bits > ctx->frame_bits) | |
565 return -1; | |
566 break; | |
567 } | |
568 if (bits < ctx->frame_bits) { | |
5802 | 569 last_lower = FFMIN(lambda, last_lower); |
570 if (last_higher != 0) | |
571 lambda = (lambda+last_higher)>>1; | |
572 else | |
573 lambda -= down_step; | |
574 down_step *= 5; // XXX tune ? | |
575 up_step = 1<<LAMBDA_FRAC_BITS; | |
576 lambda = FFMAX(1, lambda); | |
577 if (lambda == last_lower) | |
578 break; | |
5790 | 579 } else { |
5802 | 580 last_higher = FFMAX(lambda, last_higher); |
581 if (last_lower != INT_MAX) | |
582 lambda = (lambda+last_lower)>>1; | |
583 else | |
584 lambda += up_step; | |
585 up_step *= 5; | |
586 down_step = 1<<LAMBDA_FRAC_BITS; | |
5790 | 587 } |
588 } | |
5802 | 589 //dprintf(ctx->m.avctx, "out lambda %d\n", lambda); |
5790 | 590 ctx->lambda = lambda; |
591 return 0; | |
592 } | |
593 | |
594 static int dnxhd_find_qscale(DNXHDEncContext *ctx) | |
595 { | |
596 int bits = 0; | |
597 int up_step = 1; | |
598 int down_step = 1; | |
599 int last_higher = 0; | |
600 int last_lower = INT_MAX; | |
601 int qscale; | |
602 int x, y; | |
603 | |
604 qscale = ctx->qscale; | |
605 for (;;) { | |
606 bits = 0; | |
607 ctx->qscale = qscale; | |
608 // XXX avoid recalculating bits | |
8129
a9734fe0811e
Making it easier to send arbitrary structures as work orders to MT workers
romansh
parents:
7040
diff
changeset
|
609 ctx->m.avctx->execute(ctx->m.avctx, dnxhd_calc_bits_thread, (void**)&ctx->thread[0], NULL, ctx->m.avctx->thread_count, sizeof(void*)); |
5790 | 610 for (y = 0; y < ctx->m.mb_height; y++) { |
611 for (x = 0; x < ctx->m.mb_width; x++) | |
612 bits += ctx->mb_rc[qscale][y*ctx->m.mb_width+x].bits; | |
613 bits = (bits+31)&~31; // padding | |
614 if (bits > ctx->frame_bits) | |
615 break; | |
616 } | |
617 //dprintf(ctx->m.avctx, "%d, qscale %d, bits %d, frame %d, higher %d, lower %d\n", | |
618 // ctx->m.avctx->frame_number, qscale, bits, ctx->frame_bits, last_higher, last_lower); | |
619 if (bits < ctx->frame_bits) { | |
620 if (qscale == 1) | |
5969
c5d11f6f6a3d
fix corner case when qscale 1 bits < frame bits but max bits with worst padding > frame bits
bcoudurier
parents:
5802
diff
changeset
|
621 return 1; |
5790 | 622 if (last_higher == qscale - 1) { |
623 qscale = last_higher; | |
624 break; | |
625 } | |
626 last_lower = FFMIN(qscale, last_lower); | |
627 if (last_higher != 0) | |
628 qscale = (qscale+last_higher)>>1; | |
629 else | |
630 qscale -= down_step++; | |
631 if (qscale < 1) | |
632 qscale = 1; | |
633 up_step = 1; | |
634 } else { | |
635 if (last_lower == qscale + 1) | |
636 break; | |
637 last_higher = FFMAX(qscale, last_higher); | |
638 if (last_lower != INT_MAX) | |
639 qscale = (qscale+last_lower)>>1; | |
640 else | |
641 qscale += up_step++; | |
642 down_step = 1; | |
643 if (qscale >= ctx->m.avctx->qmax) | |
644 return -1; | |
645 } | |
646 } | |
647 //dprintf(ctx->m.avctx, "out qscale %d\n", qscale); | |
648 ctx->qscale = qscale; | |
649 return 0; | |
650 } | |
651 | |
652 static int dnxhd_rc_cmp(const void *a, const void *b) | |
653 { | |
6218 | 654 return ((const RCCMPEntry *)b)->value - ((const RCCMPEntry *)a)->value; |
5790 | 655 } |
656 | |
5970 | 657 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx) |
5790 | 658 { |
659 int max_bits = 0; | |
5969
c5d11f6f6a3d
fix corner case when qscale 1 bits < frame bits but max bits with worst padding > frame bits
bcoudurier
parents:
5802
diff
changeset
|
660 int ret, x, y; |
c5d11f6f6a3d
fix corner case when qscale 1 bits < frame bits but max bits with worst padding > frame bits
bcoudurier
parents:
5802
diff
changeset
|
661 if ((ret = dnxhd_find_qscale(ctx)) < 0) |
5790 | 662 return -1; |
663 for (y = 0; y < ctx->m.mb_height; y++) { | |
664 for (x = 0; x < ctx->m.mb_width; x++) { | |
665 int mb = y*ctx->m.mb_width+x; | |
666 int delta_bits; | |
667 ctx->mb_qscale[mb] = ctx->qscale; | |
668 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale][mb].bits; | |
669 max_bits += ctx->mb_rc[ctx->qscale][mb].bits; | |
670 if (!RC_VARIANCE) { | |
671 delta_bits = ctx->mb_rc[ctx->qscale][mb].bits-ctx->mb_rc[ctx->qscale+1][mb].bits; | |
672 ctx->mb_cmp[mb].mb = mb; | |
673 ctx->mb_cmp[mb].value = delta_bits ? | |
674 ((ctx->mb_rc[ctx->qscale][mb].ssd-ctx->mb_rc[ctx->qscale+1][mb].ssd)*100)/delta_bits | |
675 : INT_MIN; //avoid increasing qscale | |
676 } | |
677 } | |
678 max_bits += 31; //worst padding | |
679 } | |
5969
c5d11f6f6a3d
fix corner case when qscale 1 bits < frame bits but max bits with worst padding > frame bits
bcoudurier
parents:
5802
diff
changeset
|
680 if (!ret) { |
5790 | 681 if (RC_VARIANCE) |
8129
a9734fe0811e
Making it easier to send arbitrary structures as work orders to MT workers
romansh
parents:
7040
diff
changeset
|
682 avctx->execute(avctx, dnxhd_mb_var_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count, sizeof(void*)); |
5790 | 683 qsort(ctx->mb_cmp, ctx->m.mb_num, sizeof(RCEntry), dnxhd_rc_cmp); |
684 for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) { | |
685 int mb = ctx->mb_cmp[x].mb; | |
686 max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits; | |
687 ctx->mb_qscale[mb] = ctx->qscale+1; | |
688 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits; | |
689 } | |
690 } | |
691 return 0; | |
692 } | |
693 | |
6218 | 694 static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame) |
5790 | 695 { |
696 int i; | |
697 | |
698 for (i = 0; i < 3; i++) { | |
699 ctx->frame.data[i] = frame->data[i]; | |
700 ctx->frame.linesize[i] = frame->linesize[i]; | |
701 } | |
702 | |
703 for (i = 0; i < ctx->m.avctx->thread_count; i++) { | |
704 ctx->thread[i]->m.linesize = ctx->frame.linesize[0]<<ctx->interlaced; | |
705 ctx->thread[i]->m.uvlinesize = ctx->frame.linesize[1]<<ctx->interlaced; | |
706 ctx->thread[i]->dct_y_offset = ctx->m.linesize *8; | |
707 ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8; | |
708 } | |
709 | |
710 ctx->frame.interlaced_frame = frame->interlaced_frame; | |
711 ctx->cur_field = frame->interlaced_frame && !frame->top_field_first; | |
712 } | |
713 | |
6218 | 714 static int dnxhd_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, const void *data) |
5790 | 715 { |
716 DNXHDEncContext *ctx = avctx->priv_data; | |
717 int first_field = 1; | |
718 int offset, i, ret; | |
719 | |
720 if (buf_size < ctx->cid_table->frame_size) { | |
721 av_log(avctx, AV_LOG_ERROR, "output buffer is too small to compress picture\n"); | |
722 return -1; | |
723 } | |
724 | |
725 dnxhd_load_picture(ctx, data); | |
726 | |
727 encode_coding_unit: | |
728 for (i = 0; i < 3; i++) { | |
729 ctx->src[i] = ctx->frame.data[i]; | |
730 if (ctx->interlaced && ctx->cur_field) | |
731 ctx->src[i] += ctx->frame.linesize[i]; | |
732 } | |
733 | |
734 dnxhd_write_header(avctx, buf); | |
735 | |
736 if (avctx->mb_decision == FF_MB_DECISION_RD) | |
737 ret = dnxhd_encode_rdo(avctx, ctx); | |
738 else | |
5970 | 739 ret = dnxhd_encode_fast(avctx, ctx); |
5790 | 740 if (ret < 0) { |
741 av_log(avctx, AV_LOG_ERROR, "picture could not fit ratecontrol constraints\n"); | |
742 return -1; | |
743 } | |
744 | |
745 dnxhd_setup_threads_slices(ctx, buf); | |
746 | |
747 offset = 0; | |
748 for (i = 0; i < ctx->m.mb_height; i++) { | |
749 AV_WB32(ctx->msip + i * 4, offset); | |
750 offset += ctx->slice_size[i]; | |
751 assert(!(ctx->slice_size[i] & 3)); | |
752 } | |
753 | |
8129
a9734fe0811e
Making it easier to send arbitrary structures as work orders to MT workers
romansh
parents:
7040
diff
changeset
|
754 avctx->execute(avctx, dnxhd_encode_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count, sizeof(void*)); |
5790 | 755 |
756 AV_WB32(buf + ctx->cid_table->coding_unit_size - 4, 0x600DC0DE); // EOF | |
757 | |
758 if (ctx->interlaced && first_field) { | |
759 first_field = 0; | |
760 ctx->cur_field ^= 1; | |
761 buf += ctx->cid_table->coding_unit_size; | |
762 buf_size -= ctx->cid_table->coding_unit_size; | |
763 goto encode_coding_unit; | |
764 } | |
765 | |
6752 | 766 ctx->frame.quality = ctx->qscale*FF_QP2LAMBDA; |
767 | |
5790 | 768 return ctx->cid_table->frame_size; |
769 } | |
770 | |
771 static int dnxhd_encode_end(AVCodecContext *avctx) | |
772 { | |
773 DNXHDEncContext *ctx = avctx->priv_data; | |
6978 | 774 int max_level = 1<<(ctx->cid_table->bit_depth+2); |
5790 | 775 int i; |
776 | |
6981 | 777 av_free(ctx->vlc_codes-max_level*2); |
778 av_free(ctx->vlc_bits -max_level*2); | |
779 av_freep(&ctx->run_codes); | |
780 av_freep(&ctx->run_bits); | |
5790 | 781 |
782 av_freep(&ctx->mb_bits); | |
783 av_freep(&ctx->mb_qscale); | |
784 av_freep(&ctx->mb_rc); | |
785 av_freep(&ctx->mb_cmp); | |
786 av_freep(&ctx->slice_size); | |
787 | |
788 av_freep(&ctx->qmatrix_c); | |
789 av_freep(&ctx->qmatrix_l); | |
790 av_freep(&ctx->qmatrix_c16); | |
791 av_freep(&ctx->qmatrix_l16); | |
792 | |
793 for (i = 1; i < avctx->thread_count; i++) | |
794 av_freep(&ctx->thread[i]); | |
795 | |
796 return 0; | |
797 } | |
798 | |
799 AVCodec dnxhd_encoder = { | |
800 "dnxhd", | |
801 CODEC_TYPE_VIDEO, | |
802 CODEC_ID_DNXHD, | |
803 sizeof(DNXHDEncContext), | |
804 dnxhd_encode_init, | |
805 dnxhd_encode_picture, | |
806 dnxhd_encode_end, | |
6792
13191e6b439c
Replace one more occurrence of -1 with PIX_FMT_NONE.
cehoyos
parents:
6752
diff
changeset
|
807 .pix_fmts = (enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_NONE}, |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6981
diff
changeset
|
808 .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"), |
5790 | 809 }; |