Mercurial > libavcodec.hg
annotate vp8.c @ 12043:f9a0bd0888a4 libavcodec
mpegaudio: call ff_mpegaudiodec_init_mmx() only from float decoder
The mmx code is floating-point only, and this function does not know
from which decoder it is called. Without this change, the integer
decoder only "works" because the size of the context struct is smaller
in this case, and the mmx init function writes the function pointer
outside the allocated context.
author | mru |
---|---|
date | Thu, 01 Jul 2010 23:21:17 +0000 |
parents | 3c51d7ac41c9 |
children | 372f7fed2806 |
rev | line source |
---|---|
11921 | 1 /** |
2 * VP8 compatible video decoder | |
3 * | |
4 * Copyright (C) 2010 David Conrad | |
5 * Copyright (C) 2010 Ronald S. Bultje | |
6 * | |
7 * This file is part of FFmpeg. | |
8 * | |
9 * FFmpeg is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Lesser General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
14 * FFmpeg is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
20 * License along with FFmpeg; if not, write to the Free Software | |
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 */ | |
23 | |
24 #include "avcodec.h" | |
25 #include "vp56.h" | |
26 #include "vp8data.h" | |
27 #include "vp8dsp.h" | |
28 #include "h264pred.h" | |
29 #include "rectangle.h" | |
30 | |
31 typedef struct { | |
32 uint8_t segment; | |
33 uint8_t skip; | |
34 // todo: make it possible to check for at least (i4x4 or split_mv) | |
35 // in one op. are others needed? | |
36 uint8_t mode; | |
37 uint8_t ref_frame; | |
38 uint8_t partitioning; | |
39 VP56mv mv; | |
40 VP56mv bmv[16]; | |
41 } VP8Macroblock; | |
42 | |
43 typedef struct { | |
44 AVCodecContext *avctx; | |
45 DSPContext dsp; | |
46 VP8DSPContext vp8dsp; | |
47 H264PredContext hpc; | |
11974 | 48 vp8_mc_func put_pixels_tab[3][3][3]; |
11921 | 49 AVFrame frames[4]; |
50 AVFrame *framep[4]; | |
51 uint8_t *edge_emu_buffer; | |
52 VP56RangeCoder c; ///< header context, includes mb modes and motion vectors | |
53 int profile; | |
54 | |
55 int mb_width; /* number of horizontal MB */ | |
56 int mb_height; /* number of vertical MB */ | |
57 int linesize; | |
58 int uvlinesize; | |
59 | |
60 int keyframe; | |
61 int invisible; | |
62 int update_last; ///< update VP56_FRAME_PREVIOUS with the current one | |
63 int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so | |
64 int update_altref; | |
65 | |
66 /** | |
67 * If this flag is not set, all the probability updates | |
68 * are discarded after this frame is decoded. | |
69 */ | |
70 int update_probabilities; | |
71 | |
72 /** | |
73 * All coefficients are contained in separate arith coding contexts. | |
74 * There can be 1, 2, 4, or 8 of these after the header context. | |
75 */ | |
76 int num_coeff_partitions; | |
77 VP56RangeCoder coeff_partition[8]; | |
78 | |
79 VP8Macroblock *macroblocks; | |
80 VP8Macroblock *macroblocks_base; | |
81 int mb_stride; | |
82 | |
83 uint8_t *intra4x4_pred_mode; | |
84 uint8_t *intra4x4_pred_mode_base; | |
85 int b4_stride; | |
86 | |
87 /** | |
88 * For coeff decode, we need to know whether the above block had non-zero | |
89 * coefficients. This means for each macroblock, we need data for 4 luma | |
90 * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 | |
91 * per macroblock. We keep the last row in top_nnz. | |
92 */ | |
93 uint8_t (*top_nnz)[9]; | |
94 DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; | |
95 | |
96 /** | |
97 * This is the index plus one of the last non-zero coeff | |
98 * for each of the blocks in the current macroblock. | |
99 * So, 0 -> no coeffs | |
100 * 1 -> dc-only (special transform) | |
101 * 2+-> full transform | |
102 */ | |
103 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; | |
104 DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; | |
105 | |
106 int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock | |
107 | |
108 int mbskip_enabled; | |
109 int sign_bias[4]; ///< one state [0, 1] per ref frame type | |
110 | |
111 /** | |
112 * Base parameters for segmentation, i.e. per-macroblock parameters. | |
113 * These must be kept unchanged even if segmentation is not used for | |
114 * a frame, since the values persist between interframes. | |
115 */ | |
116 struct { | |
117 int enabled; | |
118 int absolute_vals; | |
119 int update_map; | |
120 int8_t base_quant[4]; | |
121 int8_t filter_level[4]; ///< base loop filter level | |
122 } segmentation; | |
123 | |
124 /** | |
125 * Macroblocks can have one of 4 different quants in a frame when | |
126 * segmentation is enabled. | |
127 * If segmentation is disabled, only the first segment's values are used. | |
128 */ | |
129 struct { | |
130 // [0] - DC qmul [1] - AC qmul | |
131 int16_t luma_qmul[2]; | |
132 int16_t luma_dc_qmul[2]; ///< luma dc-only block quant | |
133 int16_t chroma_qmul[2]; | |
134 } qmat[4]; | |
135 | |
136 struct { | |
137 int simple; | |
138 int level; | |
139 int sharpness; | |
140 } filter; | |
141 | |
142 struct { | |
143 int enabled; ///< whether each mb can have a different strength based on mode/ref | |
144 | |
145 /** | |
146 * filter strength adjustment for the following macroblock modes: | |
147 * [0] - i4x4 | |
148 * [1] - zero mv | |
149 * [2] - inter modes except for zero or split mv | |
150 * [3] - split mv | |
151 * i16x16 modes never have any adjustment | |
152 */ | |
153 int8_t mode[4]; | |
154 | |
155 /** | |
156 * filter strength adjustment for macroblocks that reference: | |
157 * [0] - intra / VP56_FRAME_CURRENT | |
158 * [1] - VP56_FRAME_PREVIOUS | |
159 * [2] - VP56_FRAME_GOLDEN | |
160 * [3] - altref / VP56_FRAME_GOLDEN2 | |
161 */ | |
162 int8_t ref[4]; | |
163 } lf_delta; | |
164 | |
165 /** | |
166 * These are all of the updatable probabilities for binary decisions. | |
167 * They are only implictly reset on keyframes, making it quite likely | |
168 * for an interframe to desync if a prior frame's header was corrupt | |
169 * or missing outright! | |
170 */ | |
171 struct { | |
172 uint8_t segmentid[3]; | |
173 uint8_t mbskip; | |
174 uint8_t intra; | |
175 uint8_t last; | |
176 uint8_t golden; | |
177 uint8_t pred16x16[4]; | |
178 uint8_t pred8x8c[3]; | |
179 uint8_t token[4][8][3][NUM_DCT_TOKENS-1]; | |
180 uint8_t mvc[2][19]; | |
181 } prob[2]; | |
182 } VP8Context; | |
183 | |
184 #define RL24(p) (AV_RL16(p) + ((p)[2] << 16)) | |
185 | |
186 static void vp8_decode_flush(AVCodecContext *avctx) | |
187 { | |
188 VP8Context *s = avctx->priv_data; | |
189 int i; | |
190 | |
191 for (i = 0; i < 4; i++) | |
192 if (s->frames[i].data[0]) | |
193 avctx->release_buffer(avctx, &s->frames[i]); | |
194 memset(s->framep, 0, sizeof(s->framep)); | |
195 | |
196 av_freep(&s->macroblocks_base); | |
197 av_freep(&s->intra4x4_pred_mode_base); | |
198 av_freep(&s->top_nnz); | |
199 av_freep(&s->edge_emu_buffer); | |
200 | |
201 s->macroblocks = NULL; | |
202 s->intra4x4_pred_mode = NULL; | |
203 } | |
204 | |
205 static int update_dimensions(VP8Context *s, int width, int height) | |
206 { | |
207 int i; | |
208 | |
209 if (avcodec_check_dimensions(s->avctx, width, height)) | |
210 return AVERROR_INVALIDDATA; | |
211 | |
212 vp8_decode_flush(s->avctx); | |
213 | |
214 avcodec_set_dimensions(s->avctx, width, height); | |
215 | |
216 s->mb_width = (s->avctx->coded_width +15) / 16; | |
217 s->mb_height = (s->avctx->coded_height+15) / 16; | |
218 | |
219 // we allocate a border around the top/left of intra4x4 modes | |
220 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle | |
221 s->mb_stride = s->mb_width+1; | |
222 s->b4_stride = 4*s->mb_stride; | |
223 | |
224 s->macroblocks_base = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks)); | |
225 s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); | |
226 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); | |
227 | |
228 s->macroblocks = s->macroblocks_base + 1 + s->mb_stride; | |
229 s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride; | |
230 | |
231 memset(s->intra4x4_pred_mode_base, DC_PRED, s->b4_stride); | |
232 for (i = 0; i < 4*s->mb_height; i++) | |
233 s->intra4x4_pred_mode[i*s->b4_stride-1] = DC_PRED; | |
234 | |
235 return 0; | |
236 } | |
237 | |
238 static void parse_segment_info(VP8Context *s) | |
239 { | |
240 VP56RangeCoder *c = &s->c; | |
241 int i; | |
242 | |
243 s->segmentation.update_map = vp8_rac_get(c); | |
244 | |
245 if (vp8_rac_get(c)) { // update segment feature data | |
246 s->segmentation.absolute_vals = vp8_rac_get(c); | |
247 | |
248 for (i = 0; i < 4; i++) | |
249 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); | |
250 | |
251 for (i = 0; i < 4; i++) | |
252 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); | |
253 } | |
254 if (s->segmentation.update_map) | |
255 for (i = 0; i < 3; i++) | |
256 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; | |
257 } | |
258 | |
259 static void update_lf_deltas(VP8Context *s) | |
260 { | |
261 VP56RangeCoder *c = &s->c; | |
262 int i; | |
263 | |
264 for (i = 0; i < 4; i++) | |
265 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6); | |
266 | |
267 for (i = 0; i < 4; i++) | |
268 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6); | |
269 } | |
270 | |
271 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) | |
272 { | |
273 const uint8_t *sizes = buf; | |
274 int i; | |
275 | |
276 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); | |
277 | |
278 buf += 3*(s->num_coeff_partitions-1); | |
279 buf_size -= 3*(s->num_coeff_partitions-1); | |
280 if (buf_size < 0) | |
281 return -1; | |
282 | |
283 for (i = 0; i < s->num_coeff_partitions-1; i++) { | |
284 int size = RL24(sizes + 3*i); | |
285 if (buf_size - size < 0) | |
286 return -1; | |
287 | |
288 vp56_init_range_decoder(&s->coeff_partition[i], buf, size); | |
289 buf += size; | |
290 buf_size -= size; | |
291 } | |
292 vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); | |
293 | |
294 return 0; | |
295 } | |
296 | |
297 static void get_quants(VP8Context *s) | |
298 { | |
299 VP56RangeCoder *c = &s->c; | |
300 int i, base_qi; | |
301 | |
302 int yac_qi = vp8_rac_get_uint(c, 7); | |
303 int ydc_delta = vp8_rac_get_sint(c, 4); | |
304 int y2dc_delta = vp8_rac_get_sint(c, 4); | |
305 int y2ac_delta = vp8_rac_get_sint(c, 4); | |
306 int uvdc_delta = vp8_rac_get_sint(c, 4); | |
307 int uvac_delta = vp8_rac_get_sint(c, 4); | |
308 | |
309 for (i = 0; i < 4; i++) { | |
310 if (s->segmentation.enabled) { | |
311 base_qi = s->segmentation.base_quant[i]; | |
312 if (!s->segmentation.absolute_vals) | |
313 base_qi += yac_qi; | |
314 } else | |
315 base_qi = yac_qi; | |
316 | |
317 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; | |
318 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; | |
319 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; | |
320 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; | |
321 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; | |
322 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; | |
323 | |
324 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); | |
325 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); | |
326 } | |
327 } | |
328 | |
329 /** | |
330 * Determine which buffers golden and altref should be updated with after this frame. | |
331 * The spec isn't clear here, so I'm going by my understanding of what libvpx does | |
332 * | |
333 * Intra frames update all 3 references | |
334 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set | |
335 * If the update (golden|altref) flag is set, it's updated with the current frame | |
336 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. | |
337 * If the flag is not set, the number read means: | |
338 * 0: no update | |
339 * 1: VP56_FRAME_PREVIOUS | |
340 * 2: update golden with altref, or update altref with golden | |
341 */ | |
342 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) | |
343 { | |
344 VP56RangeCoder *c = &s->c; | |
345 | |
346 if (update) | |
347 return VP56_FRAME_CURRENT; | |
348 | |
349 switch (vp8_rac_get_uint(c, 2)) { | |
350 case 1: | |
351 return VP56_FRAME_PREVIOUS; | |
352 case 2: | |
353 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; | |
354 } | |
355 return VP56_FRAME_NONE; | |
356 } | |
357 | |
358 static void update_refs(VP8Context *s) | |
359 { | |
360 VP56RangeCoder *c = &s->c; | |
361 | |
362 int update_golden = vp8_rac_get(c); | |
363 int update_altref = vp8_rac_get(c); | |
364 | |
365 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); | |
366 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); | |
367 } | |
368 | |
369 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) | |
370 { | |
371 VP56RangeCoder *c = &s->c; | |
372 int header_size, hscale, vscale, i, j, k, l, ret; | |
373 int width = s->avctx->width; | |
374 int height = s->avctx->height; | |
375 | |
376 s->keyframe = !(buf[0] & 1); | |
377 s->profile = (buf[0]>>1) & 7; | |
378 s->invisible = !(buf[0] & 0x10); | |
379 header_size = RL24(buf) >> 5; | |
380 buf += 3; | |
381 buf_size -= 3; | |
382 | |
11974 | 383 if (s->profile > 3) |
384 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); | |
385 | |
386 if (!s->profile) | |
387 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); | |
388 else // profile 1-3 use bilinear, 4+ aren't defined so whatever | |
389 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab)); | |
11921 | 390 |
391 if (header_size > buf_size - 7*s->keyframe) { | |
392 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); | |
393 return AVERROR_INVALIDDATA; | |
394 } | |
395 | |
396 if (s->keyframe) { | |
397 if (RL24(buf) != 0x2a019d) { | |
398 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", RL24(buf)); | |
399 return AVERROR_INVALIDDATA; | |
400 } | |
401 width = AV_RL16(buf+3) & 0x3fff; | |
402 height = AV_RL16(buf+5) & 0x3fff; | |
403 hscale = buf[4] >> 6; | |
404 vscale = buf[6] >> 6; | |
405 buf += 7; | |
406 buf_size -= 7; | |
407 | |
11970
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
408 if (hscale || vscale) |
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
409 av_log_missing_feature(s->avctx, "Upscaling", 1); |
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
410 |
11921 | 411 s->update_golden = s->update_altref = VP56_FRAME_CURRENT; |
412 memcpy(s->prob->token , vp8_token_default_probs , sizeof(s->prob->token)); | |
413 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16)); | |
414 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c)); | |
415 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc)); | |
416 memset(&s->segmentation, 0, sizeof(s->segmentation)); | |
417 } | |
418 | |
419 if (!s->macroblocks_base || /* first frame */ | |
420 width != s->avctx->width || height != s->avctx->height) { | |
421 if ((ret = update_dimensions(s, width, height) < 0)) | |
422 return ret; | |
423 } | |
424 | |
425 vp56_init_range_decoder(c, buf, header_size); | |
426 buf += header_size; | |
427 buf_size -= header_size; | |
428 | |
429 if (s->keyframe) { | |
430 if (vp8_rac_get(c)) | |
431 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); | |
432 vp8_rac_get(c); // whether we can skip clamping in dsp functions | |
433 } | |
434 | |
435 if ((s->segmentation.enabled = vp8_rac_get(c))) | |
436 parse_segment_info(s); | |
437 else | |
438 s->segmentation.update_map = 0; // FIXME: move this to some init function? | |
439 | |
440 s->filter.simple = vp8_rac_get(c); | |
441 s->filter.level = vp8_rac_get_uint(c, 6); | |
442 s->filter.sharpness = vp8_rac_get_uint(c, 3); | |
443 | |
444 if ((s->lf_delta.enabled = vp8_rac_get(c))) | |
445 if (vp8_rac_get(c)) | |
446 update_lf_deltas(s); | |
447 | |
448 if (setup_partitions(s, buf, buf_size)) { | |
449 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); | |
450 return AVERROR_INVALIDDATA; | |
451 } | |
452 | |
453 get_quants(s); | |
454 | |
455 if (!s->keyframe) { | |
456 update_refs(s); | |
457 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); | |
458 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); | |
459 } | |
460 | |
461 // if we aren't saving this frame's probabilities for future frames, | |
462 // make a copy of the current probabilities | |
463 if (!(s->update_probabilities = vp8_rac_get(c))) | |
464 s->prob[1] = s->prob[0]; | |
465 | |
466 s->update_last = s->keyframe || vp8_rac_get(c); | |
467 | |
468 for (i = 0; i < 4; i++) | |
469 for (j = 0; j < 8; j++) | |
470 for (k = 0; k < 3; k++) | |
471 for (l = 0; l < NUM_DCT_TOKENS-1; l++) | |
472 if (vp56_rac_get_prob(c, vp8_token_update_probs[i][j][k][l])) | |
473 s->prob->token[i][j][k][l] = vp8_rac_get_uint(c, 8); | |
474 | |
475 if ((s->mbskip_enabled = vp8_rac_get(c))) | |
476 s->prob->mbskip = vp8_rac_get_uint(c, 8); | |
477 | |
478 if (!s->keyframe) { | |
479 s->prob->intra = vp8_rac_get_uint(c, 8); | |
480 s->prob->last = vp8_rac_get_uint(c, 8); | |
481 s->prob->golden = vp8_rac_get_uint(c, 8); | |
482 | |
483 if (vp8_rac_get(c)) | |
484 for (i = 0; i < 4; i++) | |
485 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); | |
486 if (vp8_rac_get(c)) | |
487 for (i = 0; i < 3; i++) | |
488 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); | |
489 | |
490 // 17.2 MV probability update | |
491 for (i = 0; i < 2; i++) | |
492 for (j = 0; j < 19; j++) | |
493 if (vp56_rac_get_prob(c, vp8_mv_update_prob[i][j])) | |
494 s->prob->mvc[i][j] = vp8_rac_get_nn(c); | |
495 } | |
496 | |
497 return 0; | |
498 } | |
499 | |
500 static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, | |
501 int mb_x, int mb_y) | |
502 { | |
503 #define MARGIN (16 << 2) | |
504 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), | |
505 ((s->mb_width - 1 - mb_x) << 6) + MARGIN); | |
506 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN), | |
507 ((s->mb_height - 1 - mb_y) << 6) + MARGIN); | |
508 } | |
509 | |
510 static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, | |
511 VP56mv near[2], VP56mv *best, int cnt[4]) | |
512 { | |
513 VP8Macroblock *mb_edge[3] = { mb - s->mb_stride /* top */, | |
514 mb - 1 /* left */, | |
515 mb - s->mb_stride - 1 /* top-left */ }; | |
516 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; | |
517 VP56mv near_mv[4] = {{ 0 }}; | |
518 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; | |
519 int idx = CNT_ZERO, n; | |
520 int best_idx = CNT_ZERO; | |
521 | |
522 /* Process MB on top, left and top-left */ | |
523 for (n = 0; n < 3; n++) { | |
524 VP8Macroblock *edge = mb_edge[n]; | |
525 if (edge->ref_frame != VP56_FRAME_CURRENT) { | |
526 if (edge->mv.x | edge->mv.y) { | |
527 VP56mv tmp = edge->mv; | |
528 if (s->sign_bias[mb->ref_frame] != s->sign_bias[edge->ref_frame]) { | |
529 tmp.x *= -1; | |
530 tmp.y *= -1; | |
531 } | |
532 if ((tmp.x ^ near_mv[idx].x) | (tmp.y ^ near_mv[idx].y)) | |
533 near_mv[++idx] = tmp; | |
534 cnt[idx] += 1 + (n != 2); | |
535 } else | |
536 cnt[CNT_ZERO] += 1 + (n != 2); | |
537 } | |
538 } | |
539 | |
540 /* If we have three distinct MV's, merge first and last if they're the same */ | |
541 if (cnt[CNT_SPLITMV] && | |
542 !((near_mv[1+EDGE_TOP].x ^ near_mv[1+EDGE_TOPLEFT].x) | | |
543 (near_mv[1+EDGE_TOP].y ^ near_mv[1+EDGE_TOPLEFT].y))) | |
544 cnt[CNT_NEAREST] += 1; | |
545 | |
546 cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + | |
547 (mb_edge[EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + | |
548 (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); | |
549 | |
550 /* Swap near and nearest if necessary */ | |
551 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { | |
552 FFSWAP(int, cnt[CNT_NEAREST], cnt[CNT_NEAR]); | |
553 FFSWAP(VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); | |
554 } | |
555 | |
556 /* Choose the best mv out of 0,0 and the nearest mv */ | |
557 if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO]) | |
558 best_idx = CNT_NEAREST; | |
559 | |
560 clamp_mv(s, best, &near_mv[best_idx], mb_x, mb_y); | |
561 near[0] = near_mv[CNT_NEAREST]; | |
562 near[1] = near_mv[CNT_NEAR]; | |
563 } | |
564 | |
565 /** | |
566 * Motion vector coding, 17.1. | |
567 */ | |
568 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) | |
569 { | |
570 int x = 0; | |
571 | |
572 if (vp56_rac_get_prob(c, p[0])) { | |
573 int i; | |
574 | |
575 for (i = 0; i < 3; i++) | |
576 x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
577 for (i = 9; i > 3; i--) | |
578 x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
579 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12])) | |
580 x += 8; | |
581 } else | |
582 x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]); | |
583 | |
584 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; | |
585 } | |
586 | |
587 static const uint8_t *get_submv_prob(const VP56mv *left, const VP56mv *top) | |
588 { | |
589 int l_is_zero = !(left->x | left->y); | |
590 int t_is_zero = !(top->x | top->y); | |
591 int equal = !((left->x ^ top->x) | (left->y ^ top->y)); | |
592 | |
593 if (equal) | |
594 return l_is_zero ? vp8_submv_prob[4] : vp8_submv_prob[3]; | |
595 if (t_is_zero) | |
596 return vp8_submv_prob[2]; | |
597 return l_is_zero ? vp8_submv_prob[1] : vp8_submv_prob[0]; | |
598 } | |
599 | |
600 /** | |
601 * Split motion vector prediction, 16.4. | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
602 * @returns the number of motion vectors parsed (2, 4 or 16) |
11921 | 603 */ |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
604 static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, |
11921 | 605 VP8Macroblock *mb, VP56mv *base_mv) |
606 { | |
607 int part_idx = mb->partitioning = | |
608 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); | |
609 int n, num = vp8_mbsplit_count[part_idx]; | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
610 const uint8_t *mbsplits = vp8_mbsplits[part_idx], |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
611 *firstidx = vp8_mbfirstidx[part_idx]; |
11921 | 612 |
613 for (n = 0; n < num; n++) { | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
614 int k = firstidx[n]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
615 const VP56mv *left, *above; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
616 const uint8_t *submv_prob; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
617 |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
618 if (!(k & 3)) { |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
619 VP8Macroblock *left_mb = &mb[-1]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
620 left = &left_mb->bmv[vp8_mbsplits[left_mb->partitioning][k + 3]]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
621 } else |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
622 left = &mb->bmv[mbsplits[k - 1]]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
623 if (k <= 3) { |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
624 VP8Macroblock *above_mb = &mb[-s->mb_stride]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
625 above = &above_mb->bmv[vp8_mbsplits[above_mb->partitioning][k + 12]]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
626 } else |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
627 above = &mb->bmv[mbsplits[k - 4]]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
628 |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
629 submv_prob = get_submv_prob(left, above); |
11921 | 630 |
631 switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) { | |
632 case VP8_SUBMVMODE_NEW4X4: | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
633 mb->bmv[n].y = base_mv->y + read_mv_component(c, s->prob->mvc[0]); |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
634 mb->bmv[n].x = base_mv->x + read_mv_component(c, s->prob->mvc[1]); |
11921 | 635 break; |
636 case VP8_SUBMVMODE_ZERO4X4: | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
637 mb->bmv[n].x = 0; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
638 mb->bmv[n].y = 0; |
11921 | 639 break; |
640 case VP8_SUBMVMODE_LEFT4X4: | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
641 mb->bmv[n] = *left; |
11921 | 642 break; |
643 case VP8_SUBMVMODE_TOP4X4: | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
644 mb->bmv[n] = *above; |
11921 | 645 break; |
646 } | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
647 } |
11921 | 648 |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
649 return num; |
11921 | 650 } |
651 | |
652 static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, | |
653 int stride, int keyframe) | |
654 { | |
655 int x, y, t, l; | |
656 const uint8_t *ctx = vp8_pred4x4_prob_inter; | |
657 | |
658 for (y = 0; y < 4; y++) { | |
659 for (x = 0; x < 4; x++) { | |
660 if (keyframe) { | |
661 t = intra4x4[x - stride]; | |
662 l = intra4x4[x - 1]; | |
663 ctx = vp8_pred4x4_prob_intra[t][l]; | |
664 } | |
665 intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); | |
666 } | |
667 intra4x4 += stride; | |
668 } | |
669 } | |
670 | |
671 static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, | |
672 uint8_t *intra4x4) | |
673 { | |
674 VP56RangeCoder *c = &s->c; | |
675 int n; | |
676 | |
677 if (s->segmentation.update_map) | |
678 mb->segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); | |
679 | |
680 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; | |
681 | |
682 if (s->keyframe) { | |
683 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); | |
684 | |
685 if (mb->mode == MODE_I4x4) { | |
686 decode_intra4x4_modes(c, intra4x4, s->b4_stride, 1); | |
687 } else | |
688 fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1); | |
689 | |
690 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); | |
691 mb->ref_frame = VP56_FRAME_CURRENT; | |
692 } else if (vp56_rac_get_prob(c, s->prob->intra)) { | |
693 VP56mv near[2], best; | |
694 int cnt[4] = { 0 }; | |
695 uint8_t p[4]; | |
696 | |
697 // inter MB, 16.2 | |
698 if (vp56_rac_get_prob(c, s->prob->last)) | |
699 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? | |
700 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN; | |
701 else | |
702 mb->ref_frame = VP56_FRAME_PREVIOUS; | |
703 | |
704 // motion vectors, 16.3 | |
705 find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt); | |
706 for (n = 0; n < 4; n++) | |
707 p[n] = vp8_mode_contexts[cnt[n]][n]; | |
708 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p); | |
709 switch (mb->mode) { | |
710 case VP8_MVMODE_SPLIT: | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
711 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, &best) - 1]; |
11921 | 712 break; |
713 case VP8_MVMODE_ZERO: | |
714 mb->mv.x = 0; | |
715 mb->mv.y = 0; | |
716 break; | |
717 case VP8_MVMODE_NEAREST: | |
718 clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y); | |
719 break; | |
720 case VP8_MVMODE_NEAR: | |
721 clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y); | |
722 break; | |
723 case VP8_MVMODE_NEW: | |
724 mb->mv.y = best.y + read_mv_component(c, s->prob->mvc[0]); | |
725 mb->mv.x = best.x + read_mv_component(c, s->prob->mvc[1]); | |
726 break; | |
727 } | |
728 if (mb->mode != VP8_MVMODE_SPLIT) { | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
729 mb->partitioning = VP8_SPLITMVMODE_NONE; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
730 mb->bmv[0] = mb->mv; |
11921 | 731 } |
732 } else { | |
733 // intra MB, 16.1 | |
734 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); | |
735 | |
736 if (mb->mode == MODE_I4x4) { | |
737 decode_intra4x4_modes(c, intra4x4, s->b4_stride, 0); | |
738 } else | |
739 fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1); | |
740 | |
741 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); | |
742 mb->ref_frame = VP56_FRAME_CURRENT; | |
743 } | |
744 } | |
745 | |
746 /** | |
747 * @param i initial coeff index, 0 unless a separate DC block is coded | |
748 * @param zero_nhood the initial prediction context for number of surrounding | |
749 * all-zero blocks (only left/top, so 0-2) | |
750 * @param qmul[0] dc dequant factor | |
751 * @param qmul[1] ac dequant factor | |
752 * @return 0 if no coeffs were decoded | |
753 * otherwise, the index of the last coeff decoded plus one | |
754 */ | |
755 static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], | |
756 uint8_t probs[8][3][NUM_DCT_TOKENS-1], | |
757 int i, int zero_nhood, int16_t qmul[2]) | |
758 { | |
759 int token, nonzero = 0; | |
760 int offset = 0; | |
761 | |
762 for (; i < 16; i++) { | |
763 token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset); | |
764 | |
765 if (token == DCT_EOB) | |
766 break; | |
767 else if (token >= DCT_CAT1) { | |
768 int cat = token-DCT_CAT1; | |
769 token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); | |
770 token += vp8_dct_cat_offset[cat]; | |
771 } | |
772 | |
773 // after the first token, the non-zero prediction context becomes | |
774 // based on the last decoded coeff | |
775 if (!token) { | |
776 zero_nhood = 0; | |
777 offset = 1; | |
778 continue; | |
779 } else if (token == 1) | |
780 zero_nhood = 1; | |
781 else | |
782 zero_nhood = 2; | |
783 | |
784 // todo: full [16] qmat? load into register? | |
785 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i]; | |
786 nonzero = i+1; | |
787 offset = 0; | |
788 } | |
789 return nonzero; | |
790 } | |
791 | |
792 static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, | |
793 uint8_t t_nnz[9], uint8_t l_nnz[9]) | |
794 { | |
795 LOCAL_ALIGNED_16(DCTELEM, dc,[16]); | |
796 int i, x, y, luma_start = 0, luma_ctx = 3; | |
797 int nnz_pred, nnz, nnz_total = 0; | |
798 int segment = s->segmentation.enabled ? mb->segment : 0; | |
799 | |
800 s->dsp.clear_blocks((DCTELEM *)s->block); | |
801 | |
802 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
803 AV_ZERO128(dc); | |
804 AV_ZERO128(dc+8); | |
805 nnz_pred = t_nnz[8] + l_nnz[8]; | |
806 | |
807 // decode DC values and do hadamard | |
808 nnz = decode_block_coeffs(c, dc, s->prob->token[1], 0, nnz_pred, | |
809 s->qmat[segment].luma_dc_qmul); | |
810 l_nnz[8] = t_nnz[8] = !!nnz; | |
811 nnz_total += nnz; | |
812 s->vp8dsp.vp8_luma_dc_wht(s->block, dc); | |
813 luma_start = 1; | |
814 luma_ctx = 0; | |
815 } | |
816 | |
817 // luma blocks | |
818 for (y = 0; y < 4; y++) | |
819 for (x = 0; x < 4; x++) { | |
820 nnz_pred = l_nnz[y] + t_nnz[x]; | |
821 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, | |
822 nnz_pred, s->qmat[segment].luma_qmul); | |
823 // nnz+luma_start may be one more than the actual last index, but we don't care | |
824 s->non_zero_count_cache[y][x] = nnz + luma_start; | |
825 t_nnz[x] = l_nnz[y] = !!nnz; | |
826 nnz_total += nnz; | |
827 } | |
828 | |
829 // chroma blocks | |
830 // TODO: what to do about dimensions? 2nd dim for luma is x, | |
831 // but for chroma it's (y<<1)|x | |
832 for (i = 4; i < 6; i++) | |
833 for (y = 0; y < 2; y++) | |
834 for (x = 0; x < 2; x++) { | |
835 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; | |
836 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0, | |
837 nnz_pred, s->qmat[segment].chroma_qmul); | |
838 s->non_zero_count_cache[i][(y<<1)+x] = nnz; | |
839 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; | |
840 nnz_total += nnz; | |
841 } | |
842 | |
843 // if there were no coded coeffs despite the macroblock not being marked skip, | |
844 // we MUST not do the inner loop filter and should not do IDCT | |
845 // Since skip isn't used for bitstream prediction, just manually set it. | |
846 if (!nnz_total) | |
847 mb->skip = 1; | |
848 } | |
849 | |
850 static int check_intra_pred_mode(int mode, int mb_x, int mb_y) | |
851 { | |
852 if (mode == DC_PRED8x8) { | |
853 if (!(mb_x|mb_y)) | |
854 mode = DC_128_PRED8x8; | |
855 else if (!mb_y) | |
856 mode = LEFT_DC_PRED8x8; | |
857 else if (!mb_x) | |
858 mode = TOP_DC_PRED8x8; | |
859 } | |
860 return mode; | |
861 } | |
862 | |
863 static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, | |
864 uint8_t *bmode, int mb_x, int mb_y) | |
865 { | |
866 int x, y, mode, nnz, tr; | |
867 | |
868 if (mb->mode < MODE_I4x4) { | |
869 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); | |
870 s->hpc.pred16x16[mode](dst[0], s->linesize); | |
871 } else { | |
872 uint8_t *ptr = dst[0]; | |
873 | |
874 // all blocks on the right edge of the macroblock use bottom edge | |
875 // the top macroblock for their topright edge | |
876 uint8_t *tr_right = ptr - s->linesize + 16; | |
877 | |
878 // if we're on the right edge of the frame, said edge is extended | |
879 // from the top macroblock | |
880 if (mb_x == s->mb_width-1) { | |
881 tr = tr_right[-1]*0x01010101; | |
882 tr_right = (uint8_t *)&tr; | |
883 } | |
884 | |
885 for (y = 0; y < 4; y++) { | |
886 uint8_t *topright = ptr + 4 - s->linesize; | |
887 for (x = 0; x < 4; x++) { | |
888 if (x == 3) | |
889 topright = tr_right; | |
890 | |
891 s->hpc.pred4x4[bmode[x]](ptr+4*x, topright, s->linesize); | |
892 | |
893 nnz = s->non_zero_count_cache[y][x]; | |
894 if (nnz) { | |
895 if (nnz == 1) | |
896 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); | |
897 else | |
898 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize); | |
899 } | |
900 topright += 4; | |
901 } | |
902 | |
903 ptr += 4*s->linesize; | |
904 bmode += s->b4_stride; | |
905 } | |
906 } | |
907 | |
908 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); | |
909 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); | |
910 s->hpc.pred8x8[mode](dst[2], s->uvlinesize); | |
911 } | |
912 | |
913 /** | |
914 * Generic MC function. | |
915 * | |
916 * @param s VP8 decoding context | |
917 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes | |
918 * @param dst target buffer for block data at block position | |
919 * @param src reference picture buffer at origin (0, 0) | |
920 * @param mv motion vector (relative to block position) to get pixel data from | |
921 * @param x_off horizontal position of block from origin (0, 0) | |
922 * @param y_off vertical position of block from origin (0, 0) | |
923 * @param block_w width of block (16, 8 or 4) | |
924 * @param block_h height of block (always same as block_w) | |
925 * @param width width of src/dst plane data | |
926 * @param height height of src/dst plane data | |
927 * @param linesize size of a single line of plane data, including padding | |
928 */ | |
929 static inline void vp8_mc(VP8Context *s, int luma, | |
930 uint8_t *dst, uint8_t *src, const VP56mv *mv, | |
931 int x_off, int y_off, int block_w, int block_h, | |
932 int width, int height, int linesize, | |
11950 | 933 vp8_mc_func mc_func[3][3]) |
11921 | 934 { |
935 static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; | |
936 int mx = (mv->x << luma)&7, mx_idx = idx[mx]; | |
937 int my = (mv->y << luma)&7, my_idx = idx[my]; | |
938 | |
939 x_off += mv->x >> (3 - luma); | |
940 y_off += mv->y >> (3 - luma); | |
941 | |
942 // edge emulation | |
943 src += y_off * linesize + x_off; | |
944 if (x_off < 2 || x_off >= width - block_w - 3 || | |
945 y_off < 2 || y_off >= height - block_h - 3) { | |
946 ff_emulated_edge_mc(s->edge_emu_buffer, src - 2 * linesize - 2, linesize, | |
947 block_w + 5, block_h + 5, | |
948 x_off - 2, y_off - 2, width, height); | |
949 src = s->edge_emu_buffer + 2 + linesize * 2; | |
950 } | |
951 | |
11950 | 952 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); |
11921 | 953 } |
954 | |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
955 static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
956 AVFrame *ref_frame, int x_off, int y_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
957 int bx_off, int by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
958 int block_w, int block_h, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
959 int width, int height, VP56mv *mv) |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
960 { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
961 VP56mv uvmv = *mv; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
962 |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
963 /* Y */ |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
964 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
965 ref_frame->data[0], mv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
966 block_w, block_h, width, height, s->linesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
967 s->put_pixels_tab[block_w == 8]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
968 |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
969 /* U/V */ |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
970 if (s->profile == 3) { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
971 uvmv.x &= ~7; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
972 uvmv.y &= ~7; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
973 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
974 x_off >>= 1; y_off >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
975 bx_off >>= 1; by_off >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
976 width >>= 1; height >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
977 block_w >>= 1; block_h >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
978 vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
979 ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
980 block_w, block_h, width, height, s->uvlinesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
981 s->put_pixels_tab[1 + (block_w == 4)]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
982 vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
983 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
984 block_w, block_h, width, height, s->uvlinesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
985 s->put_pixels_tab[1 + (block_w == 4)]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
986 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
987 |
11921 | 988 /** |
989 * Apply motion vectors to prediction buffer, chapter 18. | |
990 */ | |
991 static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, | |
992 int mb_x, int mb_y) | |
993 { | |
994 int x_off = mb_x << 4, y_off = mb_y << 4; | |
995 int width = 16*s->mb_width, height = 16*s->mb_height; | |
996 | |
997 if (mb->mode < VP8_MVMODE_SPLIT) { | |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
998 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
999 0, 0, 16, 16, width, height, &mb->mv); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1000 } else switch (mb->partitioning) { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1001 case VP8_SPLITMVMODE_4x4: { |
11921 | 1002 int x, y; |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1003 VP56mv uvmv; |
11921 | 1004 |
1005 /* Y */ | |
1006 for (y = 0; y < 4; y++) { | |
1007 for (x = 0; x < 4; x++) { | |
1008 vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, | |
1009 s->framep[mb->ref_frame]->data[0], &mb->bmv[4*y + x], | |
1010 4*x + x_off, 4*y + y_off, 4, 4, | |
1011 width, height, s->linesize, | |
11974 | 1012 s->put_pixels_tab[2]); |
11921 | 1013 } |
1014 } | |
1015 | |
1016 /* U/V */ | |
1017 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; | |
1018 for (y = 0; y < 2; y++) { | |
1019 for (x = 0; x < 2; x++) { | |
1020 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x + | |
1021 mb->bmv[ 2*y * 4 + 2*x+1].x + | |
1022 mb->bmv[(2*y+1) * 4 + 2*x ].x + | |
1023 mb->bmv[(2*y+1) * 4 + 2*x+1].x; | |
1024 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y + | |
1025 mb->bmv[ 2*y * 4 + 2*x+1].y + | |
1026 mb->bmv[(2*y+1) * 4 + 2*x ].y + | |
1027 mb->bmv[(2*y+1) * 4 + 2*x+1].y; | |
11937
bc617cceacb1
avoid conditional and division in chroma MV calculation
stefang
parents:
11921
diff
changeset
|
1028 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2; |
bc617cceacb1
avoid conditional and division in chroma MV calculation
stefang
parents:
11921
diff
changeset
|
1029 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2; |
11921 | 1030 if (s->profile == 3) { |
1031 uvmv.x &= ~7; | |
1032 uvmv.y &= ~7; | |
1033 } | |
1034 vp8_mc(s, 0, dst[1] + 4*y*s->uvlinesize + x*4, | |
1035 s->framep[mb->ref_frame]->data[1], &uvmv, | |
1036 4*x + x_off, 4*y + y_off, 4, 4, | |
1037 width, height, s->uvlinesize, | |
11974 | 1038 s->put_pixels_tab[2]); |
11921 | 1039 vp8_mc(s, 0, dst[2] + 4*y*s->uvlinesize + x*4, |
1040 s->framep[mb->ref_frame]->data[2], &uvmv, | |
1041 4*x + x_off, 4*y + y_off, 4, 4, | |
1042 width, height, s->uvlinesize, | |
11974 | 1043 s->put_pixels_tab[2]); |
11921 | 1044 } |
1045 } | |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1046 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1047 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1048 case VP8_SPLITMVMODE_16x8: |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1049 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1050 0, 0, 16, 8, width, height, &mb->bmv[0]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1051 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
1052 0, 8, 16, 8, width, height, &mb->bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1053 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1054 case VP8_SPLITMVMODE_8x16: |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1055 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1056 0, 0, 8, 16, width, height, &mb->bmv[0]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1057 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
1058 8, 0, 8, 16, width, height, &mb->bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1059 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1060 case VP8_SPLITMVMODE_8x8: |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1061 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1062 0, 0, 8, 8, width, height, &mb->bmv[0]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1063 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
1064 8, 0, 8, 8, width, height, &mb->bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1065 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
1066 0, 8, 8, 8, width, height, &mb->bmv[2]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1067 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
1068 8, 8, 8, 8, width, height, &mb->bmv[3]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1069 break; |
11921 | 1070 } |
1071 } | |
1072 | |
1073 static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst, | |
1074 VP8Macroblock *mb) | |
1075 { | |
1076 int x, y, nnz; | |
1077 | |
1078 if (mb->mode != MODE_I4x4) | |
1079 for (y = 0; y < 4; y++) { | |
1080 for (x = 0; x < 4; x++) { | |
1081 nnz = s->non_zero_count_cache[y][x]; | |
1082 if (nnz) { | |
1083 if (nnz == 1) | |
1084 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); | |
1085 else | |
1086 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); | |
1087 } | |
1088 } | |
1089 y_dst += 4*s->linesize; | |
1090 } | |
1091 | |
1092 for (y = 0; y < 2; y++) { | |
1093 for (x = 0; x < 2; x++) { | |
1094 nnz = s->non_zero_count_cache[4][(y<<1)+x]; | |
1095 if (nnz) { | |
1096 if (nnz == 1) | |
1097 s->vp8dsp.vp8_idct_dc_add(u_dst+4*x, s->block[4][(y<<1)+x], s->uvlinesize); | |
1098 else | |
1099 s->vp8dsp.vp8_idct_add(u_dst+4*x, s->block[4][(y<<1)+x], s->uvlinesize); | |
1100 } | |
1101 | |
1102 nnz = s->non_zero_count_cache[5][(y<<1)+x]; | |
1103 if (nnz) { | |
1104 if (nnz == 1) | |
1105 s->vp8dsp.vp8_idct_dc_add(v_dst+4*x, s->block[5][(y<<1)+x], s->uvlinesize); | |
1106 else | |
1107 s->vp8dsp.vp8_idct_add(v_dst+4*x, s->block[5][(y<<1)+x], s->uvlinesize); | |
1108 } | |
1109 } | |
1110 u_dst += 4*s->uvlinesize; | |
1111 v_dst += 4*s->uvlinesize; | |
1112 } | |
1113 } | |
1114 | |
1115 static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, int *level, int *inner, int *hev_thresh) | |
1116 { | |
1117 int interior_limit, filter_level; | |
1118 | |
1119 if (s->segmentation.enabled) { | |
1120 filter_level = s->segmentation.filter_level[mb->segment]; | |
1121 if (!s->segmentation.absolute_vals) | |
1122 filter_level += s->filter.level; | |
1123 } else | |
1124 filter_level = s->filter.level; | |
1125 | |
1126 if (s->lf_delta.enabled) { | |
1127 filter_level += s->lf_delta.ref[mb->ref_frame]; | |
1128 | |
1129 if (mb->ref_frame == VP56_FRAME_CURRENT) { | |
1130 if (mb->mode == MODE_I4x4) | |
1131 filter_level += s->lf_delta.mode[0]; | |
1132 } else { | |
1133 if (mb->mode == VP8_MVMODE_ZERO) | |
1134 filter_level += s->lf_delta.mode[1]; | |
1135 else if (mb->mode == VP8_MVMODE_SPLIT) | |
1136 filter_level += s->lf_delta.mode[3]; | |
1137 else | |
1138 filter_level += s->lf_delta.mode[2]; | |
1139 } | |
1140 } | |
1141 filter_level = av_clip(filter_level, 0, 63); | |
1142 | |
1143 interior_limit = filter_level; | |
1144 if (s->filter.sharpness) { | |
1145 interior_limit >>= s->filter.sharpness > 4 ? 2 : 1; | |
1146 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); | |
1147 } | |
1148 interior_limit = FFMAX(interior_limit, 1); | |
1149 | |
1150 *level = filter_level; | |
1151 *inner = interior_limit; | |
1152 | |
1153 if (hev_thresh) { | |
1154 *hev_thresh = filter_level >= 15; | |
1155 | |
1156 if (s->keyframe) { | |
1157 if (filter_level >= 40) | |
1158 *hev_thresh = 2; | |
1159 } else { | |
1160 if (filter_level >= 40) | |
1161 *hev_thresh = 3; | |
1162 else if (filter_level >= 20) | |
1163 *hev_thresh = 2; | |
1164 } | |
1165 } | |
1166 } | |
1167 | |
1168 // TODO: look at backup_mb_border / xchg_mb_border in h264.c | |
1169 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y) | |
1170 { | |
1171 int filter_level, inner_limit, hev_thresh; | |
1172 | |
1173 filter_level_for_mb(s, mb, &filter_level, &inner_limit, &hev_thresh); | |
1174 if (!filter_level) | |
1175 return; | |
1176 | |
1177 if (mb_x) { | |
1178 s->vp8dsp.vp8_h_loop_filter16(dst[0], s->linesize, filter_level+2, inner_limit, hev_thresh); | |
1179 s->vp8dsp.vp8_h_loop_filter8 (dst[1], s->uvlinesize, filter_level+2, inner_limit, hev_thresh); | |
1180 s->vp8dsp.vp8_h_loop_filter8 (dst[2], s->uvlinesize, filter_level+2, inner_limit, hev_thresh); | |
1181 } | |
1182 | |
1183 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { | |
1184 s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 4, s->linesize, filter_level, inner_limit, hev_thresh); | |
1185 s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 8, s->linesize, filter_level, inner_limit, hev_thresh); | |
1186 s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+12, s->linesize, filter_level, inner_limit, hev_thresh); | |
1187 s->vp8dsp.vp8_h_loop_filter8_inner (dst[1]+ 4, s->uvlinesize, filter_level, inner_limit, hev_thresh); | |
1188 s->vp8dsp.vp8_h_loop_filter8_inner (dst[2]+ 4, s->uvlinesize, filter_level, inner_limit, hev_thresh); | |
1189 } | |
1190 | |
1191 if (mb_y) { | |
1192 s->vp8dsp.vp8_v_loop_filter16(dst[0], s->linesize, filter_level+2, inner_limit, hev_thresh); | |
1193 s->vp8dsp.vp8_v_loop_filter8 (dst[1], s->uvlinesize, filter_level+2, inner_limit, hev_thresh); | |
1194 s->vp8dsp.vp8_v_loop_filter8 (dst[2], s->uvlinesize, filter_level+2, inner_limit, hev_thresh); | |
1195 } | |
1196 | |
1197 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { | |
1198 s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 4*s->linesize, s->linesize, filter_level, inner_limit, hev_thresh); | |
1199 s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 8*s->linesize, s->linesize, filter_level, inner_limit, hev_thresh); | |
1200 s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+12*s->linesize, s->linesize, filter_level, inner_limit, hev_thresh); | |
1201 s->vp8dsp.vp8_v_loop_filter8_inner (dst[1]+ 4*s->uvlinesize, s->uvlinesize, filter_level, inner_limit, hev_thresh); | |
1202 s->vp8dsp.vp8_v_loop_filter8_inner (dst[2]+ 4*s->uvlinesize, s->uvlinesize, filter_level, inner_limit, hev_thresh); | |
1203 } | |
1204 } | |
1205 | |
1206 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y) | |
1207 { | |
1208 int filter_level, inner_limit, mbedge_lim, bedge_lim; | |
1209 | |
1210 filter_level_for_mb(s, mb, &filter_level, &inner_limit, NULL); | |
1211 if (!filter_level) | |
1212 return; | |
1213 | |
1214 mbedge_lim = 2*(filter_level+2) + inner_limit; | |
1215 bedge_lim = 2* filter_level + inner_limit; | |
1216 | |
1217 if (mb_x) | |
1218 s->vp8dsp.vp8_h_loop_filter_simple(dst, s->linesize, mbedge_lim); | |
1219 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { | |
1220 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, s->linesize, bedge_lim); | |
1221 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, s->linesize, bedge_lim); | |
1222 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, s->linesize, bedge_lim); | |
1223 } | |
1224 | |
1225 if (mb_y) | |
1226 s->vp8dsp.vp8_v_loop_filter_simple(dst, s->linesize, mbedge_lim); | |
1227 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { | |
1228 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*s->linesize, s->linesize, bedge_lim); | |
1229 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*s->linesize, s->linesize, bedge_lim); | |
1230 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*s->linesize, s->linesize, bedge_lim); | |
1231 } | |
1232 } | |
1233 | |
1234 static void filter_mb_row(VP8Context *s, int mb_y) | |
1235 { | |
1236 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; | |
1237 uint8_t *dst[3] = { | |
1238 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize, | |
1239 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize, | |
1240 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize | |
1241 }; | |
1242 int mb_x; | |
1243 | |
1244 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
1245 filter_mb(s, dst, mb++, mb_x, mb_y); | |
1246 dst[0] += 16; | |
1247 dst[1] += 8; | |
1248 dst[2] += 8; | |
1249 } | |
1250 } | |
1251 | |
1252 static void filter_mb_row_simple(VP8Context *s, int mb_y) | |
1253 { | |
1254 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize; | |
1255 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; | |
1256 int mb_x; | |
1257 | |
1258 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
1259 filter_mb_simple(s, dst, mb++, mb_x, mb_y); | |
1260 dst += 16; | |
1261 } | |
1262 } | |
1263 | |
1264 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |
1265 AVPacket *avpkt) | |
1266 { | |
1267 VP8Context *s = avctx->priv_data; | |
1268 int ret, mb_x, mb_y, i, y, referenced; | |
1269 enum AVDiscard skip_thresh; | |
1270 AVFrame *curframe; | |
1271 | |
1272 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) | |
1273 return ret; | |
1274 | |
1275 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT | |
1276 || s->update_altref == VP56_FRAME_CURRENT; | |
1277 | |
1278 skip_thresh = !referenced ? AVDISCARD_NONREF : | |
1279 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; | |
1280 | |
1281 if (avctx->skip_frame >= skip_thresh) { | |
1282 s->invisible = 1; | |
1283 goto skip_decode; | |
1284 } | |
1285 | |
1286 for (i = 0; i < 4; i++) | |
1287 if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1288 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1289 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { | |
1290 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i]; | |
1291 break; | |
1292 } | |
1293 if (curframe->data[0]) | |
1294 avctx->release_buffer(avctx, curframe); | |
1295 | |
1296 curframe->key_frame = s->keyframe; | |
1297 curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE; | |
1298 curframe->reference = referenced ? 3 : 0; | |
1299 if ((ret = avctx->get_buffer(avctx, curframe))) { | |
1300 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); | |
1301 return ret; | |
1302 } | |
1303 | |
1304 // Given that arithmetic probabilities are updated every frame, it's quite likely | |
1305 // that the values we have on a random interframe are complete junk if we didn't | |
1306 // start decode on a keyframe. So just don't display anything rather than junk. | |
1307 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || | |
1308 !s->framep[VP56_FRAME_GOLDEN] || | |
1309 !s->framep[VP56_FRAME_GOLDEN2])) { | |
1310 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); | |
1311 return AVERROR_INVALIDDATA; | |
1312 } | |
1313 | |
1314 s->linesize = curframe->linesize[0]; | |
1315 s->uvlinesize = curframe->linesize[1]; | |
1316 | |
1317 if (!s->edge_emu_buffer) | |
1318 s->edge_emu_buffer = av_malloc(21*s->linesize); | |
1319 | |
1320 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); | |
1321 | |
1322 // top edge of 127 for intra prediction | |
1323 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { | |
1324 memset(curframe->data[0] - s->linesize -1, 127, s->linesize +1); | |
1325 memset(curframe->data[1] - s->uvlinesize-1, 127, s->uvlinesize+1); | |
1326 memset(curframe->data[2] - s->uvlinesize-1, 127, s->uvlinesize+1); | |
1327 } | |
1328 | |
1329 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | |
1330 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; | |
1331 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; | |
1332 uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride; | |
1333 uint8_t *dst[3] = { | |
1334 curframe->data[0] + 16*mb_y*s->linesize, | |
1335 curframe->data[1] + 8*mb_y*s->uvlinesize, | |
1336 curframe->data[2] + 8*mb_y*s->uvlinesize | |
1337 }; | |
1338 | |
1339 memset(s->left_nnz, 0, sizeof(s->left_nnz)); | |
1340 | |
1341 // left edge of 129 for intra prediction | |
1342 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) | |
1343 for (i = 0; i < 3; i++) | |
1344 for (y = 0; y < 16>>!!i; y++) | |
1345 dst[i][y*curframe->linesize[i]-1] = 129; | |
1346 | |
1347 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
1348 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x); | |
1349 | |
1350 if (!mb->skip) | |
1351 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); | |
1352 else { | |
1353 AV_ZERO128(s->non_zero_count_cache); // luma | |
1354 AV_ZERO64(s->non_zero_count_cache[4]); // chroma | |
1355 } | |
1356 | |
1357 if (mb->mode <= MODE_I4x4) { | |
1358 intra_predict(s, dst, mb, intra4x4 + 4*mb_x, mb_x, mb_y); | |
1359 memset(mb->bmv, 0, sizeof(mb->bmv)); | |
1360 } else { | |
1361 inter_predict(s, dst, mb, mb_x, mb_y); | |
1362 } | |
1363 | |
1364 if (!mb->skip) { | |
1365 idct_mb(s, dst[0], dst[1], dst[2], mb); | |
1366 } else { | |
1367 AV_ZERO64(s->left_nnz); | |
1368 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned | |
1369 | |
1370 // Reset DC block predictors if they would exist if the mb had coefficients | |
1371 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
1372 s->left_nnz[8] = 0; | |
1373 s->top_nnz[mb_x][8] = 0; | |
1374 } | |
1375 } | |
1376 | |
1377 dst[0] += 16; | |
1378 dst[1] += 8; | |
1379 dst[2] += 8; | |
1380 mb++; | |
1381 } | |
1382 if (mb_y && s->filter.level && avctx->skip_loop_filter < skip_thresh) { | |
1383 if (s->filter.simple) | |
1384 filter_mb_row_simple(s, mb_y-1); | |
1385 else | |
1386 filter_mb_row(s, mb_y-1); | |
1387 } | |
1388 } | |
1389 if (s->filter.level && avctx->skip_loop_filter < skip_thresh) { | |
1390 if (s->filter.simple) | |
1391 filter_mb_row_simple(s, mb_y-1); | |
1392 else | |
1393 filter_mb_row(s, mb_y-1); | |
1394 } | |
1395 | |
1396 skip_decode: | |
1397 // if future frames don't use the updated probabilities, | |
1398 // reset them to the values we saved | |
1399 if (!s->update_probabilities) | |
1400 s->prob[0] = s->prob[1]; | |
1401 | |
1402 // check if golden and altref are swapped | |
1403 if (s->update_altref == VP56_FRAME_GOLDEN && | |
1404 s->update_golden == VP56_FRAME_GOLDEN2) | |
1405 FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]); | |
1406 else { | |
1407 if (s->update_altref != VP56_FRAME_NONE) | |
1408 s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; | |
1409 | |
1410 if (s->update_golden != VP56_FRAME_NONE) | |
1411 s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; | |
1412 } | |
1413 | |
1414 if (s->update_last) // move cur->prev | |
1415 s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT]; | |
1416 | |
1417 // release no longer referenced frames | |
1418 for (i = 0; i < 4; i++) | |
1419 if (s->frames[i].data[0] && | |
1420 &s->frames[i] != s->framep[VP56_FRAME_CURRENT] && | |
1421 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1422 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1423 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) | |
1424 avctx->release_buffer(avctx, &s->frames[i]); | |
1425 | |
1426 if (!s->invisible) { | |
1427 *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT]; | |
1428 *data_size = sizeof(AVFrame); | |
1429 } | |
1430 | |
1431 return avpkt->size; | |
1432 } | |
1433 | |
1434 static av_cold int vp8_decode_init(AVCodecContext *avctx) | |
1435 { | |
1436 VP8Context *s = avctx->priv_data; | |
1437 | |
1438 s->avctx = avctx; | |
1439 avctx->pix_fmt = PIX_FMT_YUV420P; | |
1440 | |
1441 dsputil_init(&s->dsp, avctx); | |
1442 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8); | |
1443 ff_vp8dsp_init(&s->vp8dsp); | |
1444 | |
1445 // intra pred needs edge emulation among other things | |
1446 if (avctx->flags&CODEC_FLAG_EMU_EDGE) { | |
11947 | 1447 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported\n"); |
11921 | 1448 return AVERROR_PATCHWELCOME; |
1449 } | |
1450 | |
1451 return 0; | |
1452 } | |
1453 | |
1454 static av_cold int vp8_decode_free(AVCodecContext *avctx) | |
1455 { | |
1456 vp8_decode_flush(avctx); | |
1457 return 0; | |
1458 } | |
1459 | |
1460 AVCodec vp8_decoder = { | |
1461 "vp8", | |
1462 AVMEDIA_TYPE_VIDEO, | |
1463 CODEC_ID_VP8, | |
1464 sizeof(VP8Context), | |
1465 vp8_decode_init, | |
1466 NULL, | |
1467 vp8_decode_free, | |
1468 vp8_decode_frame, | |
1469 CODEC_CAP_DR1, | |
1470 .flush = vp8_decode_flush, | |
1471 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), | |
1472 }; |