Mercurial > libavcodec.hg
annotate vp8.c @ 12197:fbf4d5b1b664 libavcodec
Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than
regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag,
FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that
have been checked specifically on such CPUs and are actually faster than
their MMX counterparts.
In addition, use this flag to enable particular VP8 and LPC SSE2 functions
that are faster than their MMX counterparts.
Based on a patch by Loren Merritt <lorenm AT u washington edu>.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 22:38:23 +0000 |
parents | 80b142c2e9f7 |
children | b768afb88d1a |
rev | line source |
---|---|
11921 | 1 /** |
2 * VP8 compatible video decoder | |
3 * | |
4 * Copyright (C) 2010 David Conrad | |
5 * Copyright (C) 2010 Ronald S. Bultje | |
6 * | |
7 * This file is part of FFmpeg. | |
8 * | |
9 * FFmpeg is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Lesser General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
14 * FFmpeg is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
20 * License along with FFmpeg; if not, write to the Free Software | |
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 */ | |
23 | |
24 #include "avcodec.h" | |
25 #include "vp56.h" | |
26 #include "vp8data.h" | |
27 #include "vp8dsp.h" | |
28 #include "h264pred.h" | |
29 #include "rectangle.h" | |
30 | |
31 typedef struct { | |
32 uint8_t segment; | |
33 uint8_t skip; | |
34 // todo: make it possible to check for at least (i4x4 or split_mv) | |
35 // in one op. are others needed? | |
36 uint8_t mode; | |
37 uint8_t ref_frame; | |
38 uint8_t partitioning; | |
39 VP56mv mv; | |
40 VP56mv bmv[16]; | |
41 } VP8Macroblock; | |
42 | |
43 typedef struct { | |
44 AVCodecContext *avctx; | |
45 DSPContext dsp; | |
46 VP8DSPContext vp8dsp; | |
47 H264PredContext hpc; | |
11974 | 48 vp8_mc_func put_pixels_tab[3][3][3]; |
11921 | 49 AVFrame frames[4]; |
50 AVFrame *framep[4]; | |
51 uint8_t *edge_emu_buffer; | |
52 VP56RangeCoder c; ///< header context, includes mb modes and motion vectors | |
53 int profile; | |
54 | |
55 int mb_width; /* number of horizontal MB */ | |
56 int mb_height; /* number of vertical MB */ | |
57 int linesize; | |
58 int uvlinesize; | |
59 | |
60 int keyframe; | |
61 int invisible; | |
62 int update_last; ///< update VP56_FRAME_PREVIOUS with the current one | |
63 int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so | |
64 int update_altref; | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
65 int deblock_filter; |
11921 | 66 |
67 /** | |
68 * If this flag is not set, all the probability updates | |
69 * are discarded after this frame is decoded. | |
70 */ | |
71 int update_probabilities; | |
72 | |
73 /** | |
74 * All coefficients are contained in separate arith coding contexts. | |
75 * There can be 1, 2, 4, or 8 of these after the header context. | |
76 */ | |
77 int num_coeff_partitions; | |
78 VP56RangeCoder coeff_partition[8]; | |
79 | |
80 VP8Macroblock *macroblocks; | |
81 VP8Macroblock *macroblocks_base; | |
82 int mb_stride; | |
83 | |
84 uint8_t *intra4x4_pred_mode; | |
85 uint8_t *intra4x4_pred_mode_base; | |
86 int b4_stride; | |
87 | |
88 /** | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
89 * Cache of the top row needed for intra prediction |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
90 * 16 for luma, 8 for each chroma plane |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
91 */ |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
92 uint8_t (*top_border)[16+8+8]; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
93 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
94 /** |
11921 | 95 * For coeff decode, we need to know whether the above block had non-zero |
96 * coefficients. This means for each macroblock, we need data for 4 luma | |
97 * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 | |
98 * per macroblock. We keep the last row in top_nnz. | |
99 */ | |
100 uint8_t (*top_nnz)[9]; | |
101 DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; | |
102 | |
103 /** | |
104 * This is the index plus one of the last non-zero coeff | |
105 * for each of the blocks in the current macroblock. | |
106 * So, 0 -> no coeffs | |
107 * 1 -> dc-only (special transform) | |
108 * 2+-> full transform | |
109 */ | |
110 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; | |
111 DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; | |
112 | |
113 int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock | |
114 | |
115 int mbskip_enabled; | |
116 int sign_bias[4]; ///< one state [0, 1] per ref frame type | |
117 | |
118 /** | |
119 * Base parameters for segmentation, i.e. per-macroblock parameters. | |
120 * These must be kept unchanged even if segmentation is not used for | |
121 * a frame, since the values persist between interframes. | |
122 */ | |
123 struct { | |
124 int enabled; | |
125 int absolute_vals; | |
126 int update_map; | |
127 int8_t base_quant[4]; | |
128 int8_t filter_level[4]; ///< base loop filter level | |
129 } segmentation; | |
130 | |
131 /** | |
132 * Macroblocks can have one of 4 different quants in a frame when | |
133 * segmentation is enabled. | |
134 * If segmentation is disabled, only the first segment's values are used. | |
135 */ | |
136 struct { | |
137 // [0] - DC qmul [1] - AC qmul | |
138 int16_t luma_qmul[2]; | |
139 int16_t luma_dc_qmul[2]; ///< luma dc-only block quant | |
140 int16_t chroma_qmul[2]; | |
141 } qmat[4]; | |
142 | |
143 struct { | |
144 int simple; | |
145 int level; | |
146 int sharpness; | |
147 } filter; | |
148 | |
149 struct { | |
150 int enabled; ///< whether each mb can have a different strength based on mode/ref | |
151 | |
152 /** | |
153 * filter strength adjustment for the following macroblock modes: | |
154 * [0] - i4x4 | |
155 * [1] - zero mv | |
156 * [2] - inter modes except for zero or split mv | |
157 * [3] - split mv | |
158 * i16x16 modes never have any adjustment | |
159 */ | |
160 int8_t mode[4]; | |
161 | |
162 /** | |
163 * filter strength adjustment for macroblocks that reference: | |
164 * [0] - intra / VP56_FRAME_CURRENT | |
165 * [1] - VP56_FRAME_PREVIOUS | |
166 * [2] - VP56_FRAME_GOLDEN | |
167 * [3] - altref / VP56_FRAME_GOLDEN2 | |
168 */ | |
169 int8_t ref[4]; | |
170 } lf_delta; | |
171 | |
172 /** | |
173 * These are all of the updatable probabilities for binary decisions. | |
174 * They are only implictly reset on keyframes, making it quite likely | |
175 * for an interframe to desync if a prior frame's header was corrupt | |
176 * or missing outright! | |
177 */ | |
178 struct { | |
179 uint8_t segmentid[3]; | |
180 uint8_t mbskip; | |
181 uint8_t intra; | |
182 uint8_t last; | |
183 uint8_t golden; | |
184 uint8_t pred16x16[4]; | |
185 uint8_t pred8x8c[3]; | |
186 uint8_t token[4][8][3][NUM_DCT_TOKENS-1]; | |
187 uint8_t mvc[2][19]; | |
188 } prob[2]; | |
189 } VP8Context; | |
190 | |
191 #define RL24(p) (AV_RL16(p) + ((p)[2] << 16)) | |
192 | |
193 static void vp8_decode_flush(AVCodecContext *avctx) | |
194 { | |
195 VP8Context *s = avctx->priv_data; | |
196 int i; | |
197 | |
198 for (i = 0; i < 4; i++) | |
199 if (s->frames[i].data[0]) | |
200 avctx->release_buffer(avctx, &s->frames[i]); | |
201 memset(s->framep, 0, sizeof(s->framep)); | |
202 | |
203 av_freep(&s->macroblocks_base); | |
204 av_freep(&s->intra4x4_pred_mode_base); | |
205 av_freep(&s->top_nnz); | |
206 av_freep(&s->edge_emu_buffer); | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
207 av_freep(&s->top_border); |
11921 | 208 |
209 s->macroblocks = NULL; | |
210 s->intra4x4_pred_mode = NULL; | |
211 } | |
212 | |
213 static int update_dimensions(VP8Context *s, int width, int height) | |
214 { | |
215 int i; | |
216 | |
217 if (avcodec_check_dimensions(s->avctx, width, height)) | |
218 return AVERROR_INVALIDDATA; | |
219 | |
220 vp8_decode_flush(s->avctx); | |
221 | |
222 avcodec_set_dimensions(s->avctx, width, height); | |
223 | |
224 s->mb_width = (s->avctx->coded_width +15) / 16; | |
225 s->mb_height = (s->avctx->coded_height+15) / 16; | |
226 | |
227 // we allocate a border around the top/left of intra4x4 modes | |
228 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle | |
229 s->mb_stride = s->mb_width+1; | |
230 s->b4_stride = 4*s->mb_stride; | |
231 | |
232 s->macroblocks_base = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks)); | |
233 s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); | |
234 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
235 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); |
11921 | 236 |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
237 if (!s->macroblocks_base || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border) |
12169 | 238 return AVERROR(ENOMEM); |
239 | |
11921 | 240 s->macroblocks = s->macroblocks_base + 1 + s->mb_stride; |
241 s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride; | |
242 | |
243 memset(s->intra4x4_pred_mode_base, DC_PRED, s->b4_stride); | |
244 for (i = 0; i < 4*s->mb_height; i++) | |
245 s->intra4x4_pred_mode[i*s->b4_stride-1] = DC_PRED; | |
246 | |
247 return 0; | |
248 } | |
249 | |
250 static void parse_segment_info(VP8Context *s) | |
251 { | |
252 VP56RangeCoder *c = &s->c; | |
253 int i; | |
254 | |
255 s->segmentation.update_map = vp8_rac_get(c); | |
256 | |
257 if (vp8_rac_get(c)) { // update segment feature data | |
258 s->segmentation.absolute_vals = vp8_rac_get(c); | |
259 | |
260 for (i = 0; i < 4; i++) | |
261 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); | |
262 | |
263 for (i = 0; i < 4; i++) | |
264 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); | |
265 } | |
266 if (s->segmentation.update_map) | |
267 for (i = 0; i < 3; i++) | |
268 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; | |
269 } | |
270 | |
271 static void update_lf_deltas(VP8Context *s) | |
272 { | |
273 VP56RangeCoder *c = &s->c; | |
274 int i; | |
275 | |
276 for (i = 0; i < 4; i++) | |
277 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6); | |
278 | |
279 for (i = 0; i < 4; i++) | |
280 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6); | |
281 } | |
282 | |
283 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) | |
284 { | |
285 const uint8_t *sizes = buf; | |
286 int i; | |
287 | |
288 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); | |
289 | |
290 buf += 3*(s->num_coeff_partitions-1); | |
291 buf_size -= 3*(s->num_coeff_partitions-1); | |
292 if (buf_size < 0) | |
293 return -1; | |
294 | |
295 for (i = 0; i < s->num_coeff_partitions-1; i++) { | |
296 int size = RL24(sizes + 3*i); | |
297 if (buf_size - size < 0) | |
298 return -1; | |
299 | |
300 vp56_init_range_decoder(&s->coeff_partition[i], buf, size); | |
301 buf += size; | |
302 buf_size -= size; | |
303 } | |
304 vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); | |
305 | |
306 return 0; | |
307 } | |
308 | |
309 static void get_quants(VP8Context *s) | |
310 { | |
311 VP56RangeCoder *c = &s->c; | |
312 int i, base_qi; | |
313 | |
314 int yac_qi = vp8_rac_get_uint(c, 7); | |
315 int ydc_delta = vp8_rac_get_sint(c, 4); | |
316 int y2dc_delta = vp8_rac_get_sint(c, 4); | |
317 int y2ac_delta = vp8_rac_get_sint(c, 4); | |
318 int uvdc_delta = vp8_rac_get_sint(c, 4); | |
319 int uvac_delta = vp8_rac_get_sint(c, 4); | |
320 | |
321 for (i = 0; i < 4; i++) { | |
322 if (s->segmentation.enabled) { | |
323 base_qi = s->segmentation.base_quant[i]; | |
324 if (!s->segmentation.absolute_vals) | |
325 base_qi += yac_qi; | |
326 } else | |
327 base_qi = yac_qi; | |
328 | |
329 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; | |
330 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; | |
331 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; | |
332 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; | |
333 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; | |
334 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; | |
335 | |
336 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); | |
337 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); | |
338 } | |
339 } | |
340 | |
341 /** | |
342 * Determine which buffers golden and altref should be updated with after this frame. | |
343 * The spec isn't clear here, so I'm going by my understanding of what libvpx does | |
344 * | |
345 * Intra frames update all 3 references | |
346 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set | |
347 * If the update (golden|altref) flag is set, it's updated with the current frame | |
348 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. | |
349 * If the flag is not set, the number read means: | |
350 * 0: no update | |
351 * 1: VP56_FRAME_PREVIOUS | |
352 * 2: update golden with altref, or update altref with golden | |
353 */ | |
354 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) | |
355 { | |
356 VP56RangeCoder *c = &s->c; | |
357 | |
358 if (update) | |
359 return VP56_FRAME_CURRENT; | |
360 | |
361 switch (vp8_rac_get_uint(c, 2)) { | |
362 case 1: | |
363 return VP56_FRAME_PREVIOUS; | |
364 case 2: | |
365 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; | |
366 } | |
367 return VP56_FRAME_NONE; | |
368 } | |
369 | |
370 static void update_refs(VP8Context *s) | |
371 { | |
372 VP56RangeCoder *c = &s->c; | |
373 | |
374 int update_golden = vp8_rac_get(c); | |
375 int update_altref = vp8_rac_get(c); | |
376 | |
377 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); | |
378 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); | |
379 } | |
380 | |
381 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) | |
382 { | |
383 VP56RangeCoder *c = &s->c; | |
384 int header_size, hscale, vscale, i, j, k, l, ret; | |
385 int width = s->avctx->width; | |
386 int height = s->avctx->height; | |
387 | |
388 s->keyframe = !(buf[0] & 1); | |
389 s->profile = (buf[0]>>1) & 7; | |
390 s->invisible = !(buf[0] & 0x10); | |
391 header_size = RL24(buf) >> 5; | |
392 buf += 3; | |
393 buf_size -= 3; | |
394 | |
11974 | 395 if (s->profile > 3) |
396 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); | |
397 | |
398 if (!s->profile) | |
399 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); | |
400 else // profile 1-3 use bilinear, 4+ aren't defined so whatever | |
401 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab)); | |
11921 | 402 |
403 if (header_size > buf_size - 7*s->keyframe) { | |
404 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); | |
405 return AVERROR_INVALIDDATA; | |
406 } | |
407 | |
408 if (s->keyframe) { | |
409 if (RL24(buf) != 0x2a019d) { | |
410 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", RL24(buf)); | |
411 return AVERROR_INVALIDDATA; | |
412 } | |
413 width = AV_RL16(buf+3) & 0x3fff; | |
414 height = AV_RL16(buf+5) & 0x3fff; | |
415 hscale = buf[4] >> 6; | |
416 vscale = buf[6] >> 6; | |
417 buf += 7; | |
418 buf_size -= 7; | |
419 | |
11970
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
420 if (hscale || vscale) |
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
421 av_log_missing_feature(s->avctx, "Upscaling", 1); |
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
422 |
11921 | 423 s->update_golden = s->update_altref = VP56_FRAME_CURRENT; |
424 memcpy(s->prob->token , vp8_token_default_probs , sizeof(s->prob->token)); | |
425 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16)); | |
426 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c)); | |
427 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc)); | |
428 memset(&s->segmentation, 0, sizeof(s->segmentation)); | |
429 } | |
430 | |
431 if (!s->macroblocks_base || /* first frame */ | |
432 width != s->avctx->width || height != s->avctx->height) { | |
433 if ((ret = update_dimensions(s, width, height) < 0)) | |
434 return ret; | |
435 } | |
436 | |
437 vp56_init_range_decoder(c, buf, header_size); | |
438 buf += header_size; | |
439 buf_size -= header_size; | |
440 | |
441 if (s->keyframe) { | |
442 if (vp8_rac_get(c)) | |
443 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); | |
444 vp8_rac_get(c); // whether we can skip clamping in dsp functions | |
445 } | |
446 | |
447 if ((s->segmentation.enabled = vp8_rac_get(c))) | |
448 parse_segment_info(s); | |
449 else | |
450 s->segmentation.update_map = 0; // FIXME: move this to some init function? | |
451 | |
452 s->filter.simple = vp8_rac_get(c); | |
453 s->filter.level = vp8_rac_get_uint(c, 6); | |
454 s->filter.sharpness = vp8_rac_get_uint(c, 3); | |
455 | |
456 if ((s->lf_delta.enabled = vp8_rac_get(c))) | |
457 if (vp8_rac_get(c)) | |
458 update_lf_deltas(s); | |
459 | |
460 if (setup_partitions(s, buf, buf_size)) { | |
461 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); | |
462 return AVERROR_INVALIDDATA; | |
463 } | |
464 | |
465 get_quants(s); | |
466 | |
467 if (!s->keyframe) { | |
468 update_refs(s); | |
469 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); | |
470 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); | |
471 } | |
472 | |
473 // if we aren't saving this frame's probabilities for future frames, | |
474 // make a copy of the current probabilities | |
475 if (!(s->update_probabilities = vp8_rac_get(c))) | |
476 s->prob[1] = s->prob[0]; | |
477 | |
478 s->update_last = s->keyframe || vp8_rac_get(c); | |
479 | |
480 for (i = 0; i < 4; i++) | |
481 for (j = 0; j < 8; j++) | |
482 for (k = 0; k < 3; k++) | |
483 for (l = 0; l < NUM_DCT_TOKENS-1; l++) | |
484 if (vp56_rac_get_prob(c, vp8_token_update_probs[i][j][k][l])) | |
485 s->prob->token[i][j][k][l] = vp8_rac_get_uint(c, 8); | |
486 | |
487 if ((s->mbskip_enabled = vp8_rac_get(c))) | |
488 s->prob->mbskip = vp8_rac_get_uint(c, 8); | |
489 | |
490 if (!s->keyframe) { | |
491 s->prob->intra = vp8_rac_get_uint(c, 8); | |
492 s->prob->last = vp8_rac_get_uint(c, 8); | |
493 s->prob->golden = vp8_rac_get_uint(c, 8); | |
494 | |
495 if (vp8_rac_get(c)) | |
496 for (i = 0; i < 4; i++) | |
497 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); | |
498 if (vp8_rac_get(c)) | |
499 for (i = 0; i < 3; i++) | |
500 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); | |
501 | |
502 // 17.2 MV probability update | |
503 for (i = 0; i < 2; i++) | |
504 for (j = 0; j < 19; j++) | |
505 if (vp56_rac_get_prob(c, vp8_mv_update_prob[i][j])) | |
506 s->prob->mvc[i][j] = vp8_rac_get_nn(c); | |
507 } | |
508 | |
509 return 0; | |
510 } | |
511 | |
512 static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, | |
513 int mb_x, int mb_y) | |
514 { | |
515 #define MARGIN (16 << 2) | |
516 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), | |
517 ((s->mb_width - 1 - mb_x) << 6) + MARGIN); | |
518 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN), | |
519 ((s->mb_height - 1 - mb_y) << 6) + MARGIN); | |
520 } | |
521 | |
522 static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, | |
523 VP56mv near[2], VP56mv *best, int cnt[4]) | |
524 { | |
525 VP8Macroblock *mb_edge[3] = { mb - s->mb_stride /* top */, | |
526 mb - 1 /* left */, | |
527 mb - s->mb_stride - 1 /* top-left */ }; | |
528 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; | |
529 VP56mv near_mv[4] = {{ 0 }}; | |
530 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; | |
531 int idx = CNT_ZERO, n; | |
532 int best_idx = CNT_ZERO; | |
533 | |
534 /* Process MB on top, left and top-left */ | |
535 for (n = 0; n < 3; n++) { | |
536 VP8Macroblock *edge = mb_edge[n]; | |
537 if (edge->ref_frame != VP56_FRAME_CURRENT) { | |
538 if (edge->mv.x | edge->mv.y) { | |
539 VP56mv tmp = edge->mv; | |
540 if (s->sign_bias[mb->ref_frame] != s->sign_bias[edge->ref_frame]) { | |
541 tmp.x *= -1; | |
542 tmp.y *= -1; | |
543 } | |
544 if ((tmp.x ^ near_mv[idx].x) | (tmp.y ^ near_mv[idx].y)) | |
545 near_mv[++idx] = tmp; | |
546 cnt[idx] += 1 + (n != 2); | |
547 } else | |
548 cnt[CNT_ZERO] += 1 + (n != 2); | |
549 } | |
550 } | |
551 | |
552 /* If we have three distinct MV's, merge first and last if they're the same */ | |
553 if (cnt[CNT_SPLITMV] && | |
554 !((near_mv[1+EDGE_TOP].x ^ near_mv[1+EDGE_TOPLEFT].x) | | |
555 (near_mv[1+EDGE_TOP].y ^ near_mv[1+EDGE_TOPLEFT].y))) | |
556 cnt[CNT_NEAREST] += 1; | |
557 | |
558 cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + | |
559 (mb_edge[EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + | |
560 (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); | |
561 | |
562 /* Swap near and nearest if necessary */ | |
563 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { | |
564 FFSWAP(int, cnt[CNT_NEAREST], cnt[CNT_NEAR]); | |
565 FFSWAP(VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); | |
566 } | |
567 | |
568 /* Choose the best mv out of 0,0 and the nearest mv */ | |
569 if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO]) | |
570 best_idx = CNT_NEAREST; | |
571 | |
572 clamp_mv(s, best, &near_mv[best_idx], mb_x, mb_y); | |
573 near[0] = near_mv[CNT_NEAREST]; | |
574 near[1] = near_mv[CNT_NEAR]; | |
575 } | |
576 | |
577 /** | |
578 * Motion vector coding, 17.1. | |
579 */ | |
580 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) | |
581 { | |
582 int x = 0; | |
583 | |
584 if (vp56_rac_get_prob(c, p[0])) { | |
585 int i; | |
586 | |
587 for (i = 0; i < 3; i++) | |
588 x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
589 for (i = 9; i > 3; i--) | |
590 x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
591 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12])) | |
592 x += 8; | |
593 } else | |
594 x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]); | |
595 | |
596 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; | |
597 } | |
598 | |
599 static const uint8_t *get_submv_prob(const VP56mv *left, const VP56mv *top) | |
600 { | |
601 int l_is_zero = !(left->x | left->y); | |
602 int t_is_zero = !(top->x | top->y); | |
603 int equal = !((left->x ^ top->x) | (left->y ^ top->y)); | |
604 | |
605 if (equal) | |
606 return l_is_zero ? vp8_submv_prob[4] : vp8_submv_prob[3]; | |
607 if (t_is_zero) | |
608 return vp8_submv_prob[2]; | |
609 return l_is_zero ? vp8_submv_prob[1] : vp8_submv_prob[0]; | |
610 } | |
611 | |
612 /** | |
613 * Split motion vector prediction, 16.4. | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
614 * @returns the number of motion vectors parsed (2, 4 or 16) |
11921 | 615 */ |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
616 static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, |
11921 | 617 VP8Macroblock *mb, VP56mv *base_mv) |
618 { | |
619 int part_idx = mb->partitioning = | |
620 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); | |
621 int n, num = vp8_mbsplit_count[part_idx]; | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
622 const uint8_t *mbsplits = vp8_mbsplits[part_idx], |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
623 *firstidx = vp8_mbfirstidx[part_idx]; |
11921 | 624 |
625 for (n = 0; n < num; n++) { | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
626 int k = firstidx[n]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
627 const VP56mv *left, *above; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
628 const uint8_t *submv_prob; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
629 |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
630 if (!(k & 3)) { |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
631 VP8Macroblock *left_mb = &mb[-1]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
632 left = &left_mb->bmv[vp8_mbsplits[left_mb->partitioning][k + 3]]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
633 } else |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
634 left = &mb->bmv[mbsplits[k - 1]]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
635 if (k <= 3) { |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
636 VP8Macroblock *above_mb = &mb[-s->mb_stride]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
637 above = &above_mb->bmv[vp8_mbsplits[above_mb->partitioning][k + 12]]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
638 } else |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
639 above = &mb->bmv[mbsplits[k - 4]]; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
640 |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
641 submv_prob = get_submv_prob(left, above); |
11921 | 642 |
643 switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) { | |
644 case VP8_SUBMVMODE_NEW4X4: | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
645 mb->bmv[n].y = base_mv->y + read_mv_component(c, s->prob->mvc[0]); |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
646 mb->bmv[n].x = base_mv->x + read_mv_component(c, s->prob->mvc[1]); |
11921 | 647 break; |
648 case VP8_SUBMVMODE_ZERO4X4: | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
649 mb->bmv[n].x = 0; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
650 mb->bmv[n].y = 0; |
11921 | 651 break; |
652 case VP8_SUBMVMODE_LEFT4X4: | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
653 mb->bmv[n] = *left; |
11921 | 654 break; |
655 case VP8_SUBMVMODE_TOP4X4: | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
656 mb->bmv[n] = *above; |
11921 | 657 break; |
658 } | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
659 } |
11921 | 660 |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
661 return num; |
11921 | 662 } |
663 | |
664 static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, | |
665 int stride, int keyframe) | |
666 { | |
667 int x, y, t, l; | |
668 const uint8_t *ctx = vp8_pred4x4_prob_inter; | |
669 | |
670 for (y = 0; y < 4; y++) { | |
671 for (x = 0; x < 4; x++) { | |
672 if (keyframe) { | |
673 t = intra4x4[x - stride]; | |
674 l = intra4x4[x - 1]; | |
675 ctx = vp8_pred4x4_prob_intra[t][l]; | |
676 } | |
677 intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); | |
678 } | |
679 intra4x4 += stride; | |
680 } | |
681 } | |
682 | |
683 static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, | |
684 uint8_t *intra4x4) | |
685 { | |
686 VP56RangeCoder *c = &s->c; | |
687 int n; | |
688 | |
689 if (s->segmentation.update_map) | |
690 mb->segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); | |
691 | |
692 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; | |
693 | |
694 if (s->keyframe) { | |
695 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); | |
696 | |
697 if (mb->mode == MODE_I4x4) { | |
698 decode_intra4x4_modes(c, intra4x4, s->b4_stride, 1); | |
699 } else | |
700 fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1); | |
701 | |
702 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); | |
703 mb->ref_frame = VP56_FRAME_CURRENT; | |
704 } else if (vp56_rac_get_prob(c, s->prob->intra)) { | |
705 VP56mv near[2], best; | |
706 int cnt[4] = { 0 }; | |
707 uint8_t p[4]; | |
708 | |
709 // inter MB, 16.2 | |
710 if (vp56_rac_get_prob(c, s->prob->last)) | |
711 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? | |
712 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN; | |
713 else | |
714 mb->ref_frame = VP56_FRAME_PREVIOUS; | |
715 | |
716 // motion vectors, 16.3 | |
717 find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt); | |
718 for (n = 0; n < 4; n++) | |
719 p[n] = vp8_mode_contexts[cnt[n]][n]; | |
720 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p); | |
721 switch (mb->mode) { | |
722 case VP8_MVMODE_SPLIT: | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
723 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, &best) - 1]; |
11921 | 724 break; |
725 case VP8_MVMODE_ZERO: | |
726 mb->mv.x = 0; | |
727 mb->mv.y = 0; | |
728 break; | |
729 case VP8_MVMODE_NEAREST: | |
730 clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y); | |
731 break; | |
732 case VP8_MVMODE_NEAR: | |
733 clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y); | |
734 break; | |
735 case VP8_MVMODE_NEW: | |
736 mb->mv.y = best.y + read_mv_component(c, s->prob->mvc[0]); | |
737 mb->mv.x = best.x + read_mv_component(c, s->prob->mvc[1]); | |
738 break; | |
739 } | |
740 if (mb->mode != VP8_MVMODE_SPLIT) { | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
741 mb->partitioning = VP8_SPLITMVMODE_NONE; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
742 mb->bmv[0] = mb->mv; |
11921 | 743 } |
744 } else { | |
745 // intra MB, 16.1 | |
746 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); | |
747 | |
748 if (mb->mode == MODE_I4x4) { | |
749 decode_intra4x4_modes(c, intra4x4, s->b4_stride, 0); | |
750 } else | |
751 fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1); | |
752 | |
753 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); | |
754 mb->ref_frame = VP56_FRAME_CURRENT; | |
755 } | |
756 } | |
757 | |
758 /** | |
12115 | 759 * @param c arithmetic bitstream reader context |
760 * @param block destination for block coefficients | |
761 * @param probs probabilities to use when reading trees from the bitstream | |
11921 | 762 * @param i initial coeff index, 0 unless a separate DC block is coded |
763 * @param zero_nhood the initial prediction context for number of surrounding | |
764 * all-zero blocks (only left/top, so 0-2) | |
12062
372f7fed2806
Avoid square brackets in Doxygen comments; Doxygen chokes on them.
diego
parents:
11990
diff
changeset
|
765 * @param qmul array holding the dc/ac dequant factor at position 0/1 |
11921 | 766 * @return 0 if no coeffs were decoded |
767 * otherwise, the index of the last coeff decoded plus one | |
768 */ | |
769 static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], | |
770 uint8_t probs[8][3][NUM_DCT_TOKENS-1], | |
771 int i, int zero_nhood, int16_t qmul[2]) | |
772 { | |
773 int token, nonzero = 0; | |
774 int offset = 0; | |
775 | |
776 for (; i < 16; i++) { | |
777 token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset); | |
778 | |
779 if (token == DCT_EOB) | |
780 break; | |
781 else if (token >= DCT_CAT1) { | |
782 int cat = token-DCT_CAT1; | |
783 token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); | |
784 token += vp8_dct_cat_offset[cat]; | |
785 } | |
786 | |
787 // after the first token, the non-zero prediction context becomes | |
788 // based on the last decoded coeff | |
789 if (!token) { | |
790 zero_nhood = 0; | |
791 offset = 1; | |
792 continue; | |
793 } else if (token == 1) | |
794 zero_nhood = 1; | |
795 else | |
796 zero_nhood = 2; | |
797 | |
798 // todo: full [16] qmat? load into register? | |
799 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i]; | |
800 nonzero = i+1; | |
801 offset = 0; | |
802 } | |
803 return nonzero; | |
804 } | |
805 | |
806 static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, | |
807 uint8_t t_nnz[9], uint8_t l_nnz[9]) | |
808 { | |
809 LOCAL_ALIGNED_16(DCTELEM, dc,[16]); | |
810 int i, x, y, luma_start = 0, luma_ctx = 3; | |
811 int nnz_pred, nnz, nnz_total = 0; | |
812 int segment = s->segmentation.enabled ? mb->segment : 0; | |
813 | |
814 s->dsp.clear_blocks((DCTELEM *)s->block); | |
815 | |
816 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
817 AV_ZERO128(dc); | |
818 AV_ZERO128(dc+8); | |
819 nnz_pred = t_nnz[8] + l_nnz[8]; | |
820 | |
821 // decode DC values and do hadamard | |
822 nnz = decode_block_coeffs(c, dc, s->prob->token[1], 0, nnz_pred, | |
823 s->qmat[segment].luma_dc_qmul); | |
824 l_nnz[8] = t_nnz[8] = !!nnz; | |
825 nnz_total += nnz; | |
826 s->vp8dsp.vp8_luma_dc_wht(s->block, dc); | |
827 luma_start = 1; | |
828 luma_ctx = 0; | |
829 } | |
830 | |
831 // luma blocks | |
832 for (y = 0; y < 4; y++) | |
833 for (x = 0; x < 4; x++) { | |
834 nnz_pred = l_nnz[y] + t_nnz[x]; | |
835 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, | |
836 nnz_pred, s->qmat[segment].luma_qmul); | |
837 // nnz+luma_start may be one more than the actual last index, but we don't care | |
838 s->non_zero_count_cache[y][x] = nnz + luma_start; | |
839 t_nnz[x] = l_nnz[y] = !!nnz; | |
840 nnz_total += nnz; | |
841 } | |
842 | |
843 // chroma blocks | |
844 // TODO: what to do about dimensions? 2nd dim for luma is x, | |
845 // but for chroma it's (y<<1)|x | |
846 for (i = 4; i < 6; i++) | |
847 for (y = 0; y < 2; y++) | |
848 for (x = 0; x < 2; x++) { | |
849 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; | |
850 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0, | |
851 nnz_pred, s->qmat[segment].chroma_qmul); | |
852 s->non_zero_count_cache[i][(y<<1)+x] = nnz; | |
853 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; | |
854 nnz_total += nnz; | |
855 } | |
856 | |
857 // if there were no coded coeffs despite the macroblock not being marked skip, | |
858 // we MUST not do the inner loop filter and should not do IDCT | |
859 // Since skip isn't used for bitstream prediction, just manually set it. | |
860 if (!nnz_total) | |
861 mb->skip = 1; | |
862 } | |
863 | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
864 static av_always_inline |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
865 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
866 int linesize, int uvlinesize, int simple) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
867 { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
868 AV_COPY128(top_border, src_y + 15*linesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
869 if (!simple) { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
870 AV_COPY64(top_border+16, src_cb + 7*uvlinesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
871 AV_COPY64(top_border+24, src_cr + 7*uvlinesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
872 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
873 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
874 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
875 static av_always_inline |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
876 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
877 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
878 int simple, int xchg) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
879 { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
880 uint8_t *top_border_m1 = top_border-32; // for TL prediction |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
881 src_y -= linesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
882 src_cb -= uvlinesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
883 src_cr -= uvlinesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
884 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
885 #define XCHG(a,b,xchg)\ |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
886 if (xchg) AV_SWAP64(b,a);\ |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
887 else AV_COPY64(b,a); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
888 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
889 XCHG(top_border_m1+8, src_y-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
890 XCHG(top_border, src_y, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
891 XCHG(top_border+8, src_y+8, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
892 if (mb_x < mb_width-1) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
893 XCHG(top_border+32, src_y+16, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
894 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
895 // only copy chroma for normal loop filter |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
896 // or to initialize the top row to 127 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
897 if (!simple || !mb_y) { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
898 XCHG(top_border_m1+16, src_cb-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
899 XCHG(top_border_m1+24, src_cr-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
900 XCHG(top_border+16, src_cb, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
901 XCHG(top_border+24, src_cr, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
902 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
903 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
904 |
11921 | 905 static int check_intra_pred_mode(int mode, int mb_x, int mb_y) |
906 { | |
907 if (mode == DC_PRED8x8) { | |
908 if (!(mb_x|mb_y)) | |
909 mode = DC_128_PRED8x8; | |
910 else if (!mb_y) | |
911 mode = LEFT_DC_PRED8x8; | |
912 else if (!mb_x) | |
913 mode = TOP_DC_PRED8x8; | |
914 } | |
915 return mode; | |
916 } | |
917 | |
918 static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, | |
919 uint8_t *bmode, int mb_x, int mb_y) | |
920 { | |
921 int x, y, mode, nnz, tr; | |
922 | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
923 // for the first row, we need to run xchg_mb_border to init the top edge to 127 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
924 // otherwise, skip it if we aren't going to deblock |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
925 if (s->deblock_filter || !mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
926 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
927 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
928 s->filter.simple, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
929 |
11921 | 930 if (mb->mode < MODE_I4x4) { |
931 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); | |
932 s->hpc.pred16x16[mode](dst[0], s->linesize); | |
933 } else { | |
934 uint8_t *ptr = dst[0]; | |
935 | |
936 // all blocks on the right edge of the macroblock use bottom edge | |
937 // the top macroblock for their topright edge | |
938 uint8_t *tr_right = ptr - s->linesize + 16; | |
939 | |
940 // if we're on the right edge of the frame, said edge is extended | |
941 // from the top macroblock | |
942 if (mb_x == s->mb_width-1) { | |
943 tr = tr_right[-1]*0x01010101; | |
944 tr_right = (uint8_t *)&tr; | |
945 } | |
946 | |
947 for (y = 0; y < 4; y++) { | |
948 uint8_t *topright = ptr + 4 - s->linesize; | |
949 for (x = 0; x < 4; x++) { | |
950 if (x == 3) | |
951 topright = tr_right; | |
952 | |
953 s->hpc.pred4x4[bmode[x]](ptr+4*x, topright, s->linesize); | |
954 | |
955 nnz = s->non_zero_count_cache[y][x]; | |
956 if (nnz) { | |
957 if (nnz == 1) | |
958 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); | |
959 else | |
960 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize); | |
961 } | |
962 topright += 4; | |
963 } | |
964 | |
965 ptr += 4*s->linesize; | |
966 bmode += s->b4_stride; | |
967 } | |
968 } | |
969 | |
970 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); | |
971 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); | |
972 s->hpc.pred8x8[mode](dst[2], s->uvlinesize); | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
973 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
974 if (s->deblock_filter || !mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
975 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
976 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
977 s->filter.simple, 0); |
11921 | 978 } |
979 | |
980 /** | |
981 * Generic MC function. | |
982 * | |
983 * @param s VP8 decoding context | |
984 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes | |
985 * @param dst target buffer for block data at block position | |
986 * @param src reference picture buffer at origin (0, 0) | |
987 * @param mv motion vector (relative to block position) to get pixel data from | |
988 * @param x_off horizontal position of block from origin (0, 0) | |
989 * @param y_off vertical position of block from origin (0, 0) | |
990 * @param block_w width of block (16, 8 or 4) | |
991 * @param block_h height of block (always same as block_w) | |
992 * @param width width of src/dst plane data | |
993 * @param height height of src/dst plane data | |
994 * @param linesize size of a single line of plane data, including padding | |
12115 | 995 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) |
11921 | 996 */ |
997 static inline void vp8_mc(VP8Context *s, int luma, | |
998 uint8_t *dst, uint8_t *src, const VP56mv *mv, | |
999 int x_off, int y_off, int block_w, int block_h, | |
1000 int width, int height, int linesize, | |
11950 | 1001 vp8_mc_func mc_func[3][3]) |
11921 | 1002 { |
1003 static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; | |
1004 int mx = (mv->x << luma)&7, mx_idx = idx[mx]; | |
1005 int my = (mv->y << luma)&7, my_idx = idx[my]; | |
1006 | |
1007 x_off += mv->x >> (3 - luma); | |
1008 y_off += mv->y >> (3 - luma); | |
1009 | |
1010 // edge emulation | |
1011 src += y_off * linesize + x_off; | |
1012 if (x_off < 2 || x_off >= width - block_w - 3 || | |
1013 y_off < 2 || y_off >= height - block_h - 3) { | |
1014 ff_emulated_edge_mc(s->edge_emu_buffer, src - 2 * linesize - 2, linesize, | |
1015 block_w + 5, block_h + 5, | |
1016 x_off - 2, y_off - 2, width, height); | |
1017 src = s->edge_emu_buffer + 2 + linesize * 2; | |
1018 } | |
1019 | |
11950 | 1020 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); |
11921 | 1021 } |
1022 | |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1023 static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1024 AVFrame *ref_frame, int x_off, int y_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1025 int bx_off, int by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1026 int block_w, int block_h, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1027 int width, int height, VP56mv *mv) |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1028 { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1029 VP56mv uvmv = *mv; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1030 |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1031 /* Y */ |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1032 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1033 ref_frame->data[0], mv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1034 block_w, block_h, width, height, s->linesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1035 s->put_pixels_tab[block_w == 8]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1036 |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1037 /* U/V */ |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1038 if (s->profile == 3) { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1039 uvmv.x &= ~7; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1040 uvmv.y &= ~7; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1041 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1042 x_off >>= 1; y_off >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1043 bx_off >>= 1; by_off >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1044 width >>= 1; height >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1045 block_w >>= 1; block_h >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1046 vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1047 ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1048 block_w, block_h, width, height, s->uvlinesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1049 s->put_pixels_tab[1 + (block_w == 4)]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1050 vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1051 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1052 block_w, block_h, width, height, s->uvlinesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1053 s->put_pixels_tab[1 + (block_w == 4)]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1054 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1055 |
11921 | 1056 /** |
1057 * Apply motion vectors to prediction buffer, chapter 18. | |
1058 */ | |
1059 static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, | |
1060 int mb_x, int mb_y) | |
1061 { | |
1062 int x_off = mb_x << 4, y_off = mb_y << 4; | |
1063 int width = 16*s->mb_width, height = 16*s->mb_height; | |
1064 | |
1065 if (mb->mode < VP8_MVMODE_SPLIT) { | |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1066 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1067 0, 0, 16, 16, width, height, &mb->mv); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1068 } else switch (mb->partitioning) { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1069 case VP8_SPLITMVMODE_4x4: { |
11921 | 1070 int x, y; |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1071 VP56mv uvmv; |
11921 | 1072 |
1073 /* Y */ | |
1074 for (y = 0; y < 4; y++) { | |
1075 for (x = 0; x < 4; x++) { | |
1076 vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, | |
1077 s->framep[mb->ref_frame]->data[0], &mb->bmv[4*y + x], | |
1078 4*x + x_off, 4*y + y_off, 4, 4, | |
1079 width, height, s->linesize, | |
11974 | 1080 s->put_pixels_tab[2]); |
11921 | 1081 } |
1082 } | |
1083 | |
1084 /* U/V */ | |
1085 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; | |
1086 for (y = 0; y < 2; y++) { | |
1087 for (x = 0; x < 2; x++) { | |
1088 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x + | |
1089 mb->bmv[ 2*y * 4 + 2*x+1].x + | |
1090 mb->bmv[(2*y+1) * 4 + 2*x ].x + | |
1091 mb->bmv[(2*y+1) * 4 + 2*x+1].x; | |
1092 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y + | |
1093 mb->bmv[ 2*y * 4 + 2*x+1].y + | |
1094 mb->bmv[(2*y+1) * 4 + 2*x ].y + | |
1095 mb->bmv[(2*y+1) * 4 + 2*x+1].y; | |
11937
bc617cceacb1
avoid conditional and division in chroma MV calculation
stefang
parents:
11921
diff
changeset
|
1096 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2; |
bc617cceacb1
avoid conditional and division in chroma MV calculation
stefang
parents:
11921
diff
changeset
|
1097 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2; |
11921 | 1098 if (s->profile == 3) { |
1099 uvmv.x &= ~7; | |
1100 uvmv.y &= ~7; | |
1101 } | |
1102 vp8_mc(s, 0, dst[1] + 4*y*s->uvlinesize + x*4, | |
1103 s->framep[mb->ref_frame]->data[1], &uvmv, | |
1104 4*x + x_off, 4*y + y_off, 4, 4, | |
1105 width, height, s->uvlinesize, | |
11974 | 1106 s->put_pixels_tab[2]); |
11921 | 1107 vp8_mc(s, 0, dst[2] + 4*y*s->uvlinesize + x*4, |
1108 s->framep[mb->ref_frame]->data[2], &uvmv, | |
1109 4*x + x_off, 4*y + y_off, 4, 4, | |
1110 width, height, s->uvlinesize, | |
11974 | 1111 s->put_pixels_tab[2]); |
11921 | 1112 } |
1113 } | |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1114 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1115 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1116 case VP8_SPLITMVMODE_16x8: |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1117 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1118 0, 0, 16, 8, width, height, &mb->bmv[0]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1119 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
1120 0, 8, 16, 8, width, height, &mb->bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1121 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1122 case VP8_SPLITMVMODE_8x16: |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1123 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1124 0, 0, 8, 16, width, height, &mb->bmv[0]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1125 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
1126 8, 0, 8, 16, width, height, &mb->bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1127 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1128 case VP8_SPLITMVMODE_8x8: |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1129 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1130 0, 0, 8, 8, width, height, &mb->bmv[0]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1131 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
1132 8, 0, 8, 8, width, height, &mb->bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1133 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
1134 0, 8, 8, 8, width, height, &mb->bmv[2]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1135 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
1136 8, 8, 8, 8, width, height, &mb->bmv[3]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1137 break; |
11921 | 1138 } |
1139 } | |
1140 | |
1141 static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst, | |
1142 VP8Macroblock *mb) | |
1143 { | |
1144 int x, y, nnz; | |
1145 | |
1146 if (mb->mode != MODE_I4x4) | |
1147 for (y = 0; y < 4; y++) { | |
1148 for (x = 0; x < 4; x++) { | |
1149 nnz = s->non_zero_count_cache[y][x]; | |
1150 if (nnz) { | |
1151 if (nnz == 1) | |
1152 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); | |
1153 else | |
1154 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); | |
1155 } | |
1156 } | |
1157 y_dst += 4*s->linesize; | |
1158 } | |
1159 | |
1160 for (y = 0; y < 2; y++) { | |
1161 for (x = 0; x < 2; x++) { | |
1162 nnz = s->non_zero_count_cache[4][(y<<1)+x]; | |
1163 if (nnz) { | |
1164 if (nnz == 1) | |
1165 s->vp8dsp.vp8_idct_dc_add(u_dst+4*x, s->block[4][(y<<1)+x], s->uvlinesize); | |
1166 else | |
1167 s->vp8dsp.vp8_idct_add(u_dst+4*x, s->block[4][(y<<1)+x], s->uvlinesize); | |
1168 } | |
1169 | |
1170 nnz = s->non_zero_count_cache[5][(y<<1)+x]; | |
1171 if (nnz) { | |
1172 if (nnz == 1) | |
1173 s->vp8dsp.vp8_idct_dc_add(v_dst+4*x, s->block[5][(y<<1)+x], s->uvlinesize); | |
1174 else | |
1175 s->vp8dsp.vp8_idct_add(v_dst+4*x, s->block[5][(y<<1)+x], s->uvlinesize); | |
1176 } | |
1177 } | |
1178 u_dst += 4*s->uvlinesize; | |
1179 v_dst += 4*s->uvlinesize; | |
1180 } | |
1181 } | |
1182 | |
1183 static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, int *level, int *inner, int *hev_thresh) | |
1184 { | |
1185 int interior_limit, filter_level; | |
1186 | |
1187 if (s->segmentation.enabled) { | |
1188 filter_level = s->segmentation.filter_level[mb->segment]; | |
1189 if (!s->segmentation.absolute_vals) | |
1190 filter_level += s->filter.level; | |
1191 } else | |
1192 filter_level = s->filter.level; | |
1193 | |
1194 if (s->lf_delta.enabled) { | |
1195 filter_level += s->lf_delta.ref[mb->ref_frame]; | |
1196 | |
1197 if (mb->ref_frame == VP56_FRAME_CURRENT) { | |
1198 if (mb->mode == MODE_I4x4) | |
1199 filter_level += s->lf_delta.mode[0]; | |
1200 } else { | |
1201 if (mb->mode == VP8_MVMODE_ZERO) | |
1202 filter_level += s->lf_delta.mode[1]; | |
1203 else if (mb->mode == VP8_MVMODE_SPLIT) | |
1204 filter_level += s->lf_delta.mode[3]; | |
1205 else | |
1206 filter_level += s->lf_delta.mode[2]; | |
1207 } | |
1208 } | |
1209 filter_level = av_clip(filter_level, 0, 63); | |
1210 | |
1211 interior_limit = filter_level; | |
1212 if (s->filter.sharpness) { | |
1213 interior_limit >>= s->filter.sharpness > 4 ? 2 : 1; | |
1214 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); | |
1215 } | |
1216 interior_limit = FFMAX(interior_limit, 1); | |
1217 | |
1218 *level = filter_level; | |
1219 *inner = interior_limit; | |
1220 | |
1221 if (hev_thresh) { | |
1222 *hev_thresh = filter_level >= 15; | |
1223 | |
1224 if (s->keyframe) { | |
1225 if (filter_level >= 40) | |
1226 *hev_thresh = 2; | |
1227 } else { | |
1228 if (filter_level >= 40) | |
1229 *hev_thresh = 3; | |
1230 else if (filter_level >= 20) | |
1231 *hev_thresh = 2; | |
1232 } | |
1233 } | |
1234 } | |
1235 | |
1236 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y) | |
1237 { | |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1238 int filter_level, inner_limit, hev_thresh, mbedge_lim, bedge_lim; |
11921 | 1239 |
1240 filter_level_for_mb(s, mb, &filter_level, &inner_limit, &hev_thresh); | |
1241 if (!filter_level) | |
1242 return; | |
1243 | |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1244 mbedge_lim = 2*(filter_level+2) + inner_limit; |
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1245 bedge_lim = 2* filter_level + inner_limit; |
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1246 |
11921 | 1247 if (mb_x) { |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1248 s->vp8dsp.vp8_h_loop_filter16y(dst[0], s->linesize, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1249 mbedge_lim, inner_limit, hev_thresh); |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1250 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], s->uvlinesize, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1251 mbedge_lim, inner_limit, hev_thresh); |
11921 | 1252 } |
1253 | |
1254 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1255 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, s->linesize, bedge_lim, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1256 inner_limit, hev_thresh); |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1257 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, s->linesize, bedge_lim, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1258 inner_limit, hev_thresh); |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1259 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, s->linesize, bedge_lim, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1260 inner_limit, hev_thresh); |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1261 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1262 s->uvlinesize, bedge_lim, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1263 inner_limit, hev_thresh); |
11921 | 1264 } |
1265 | |
1266 if (mb_y) { | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1267 s->vp8dsp.vp8_v_loop_filter16y(dst[0], s->linesize, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1268 mbedge_lim, inner_limit, hev_thresh); |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1269 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], s->uvlinesize, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1270 mbedge_lim, inner_limit, hev_thresh); |
11921 | 1271 } |
1272 | |
1273 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1274 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*s->linesize, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1275 s->linesize, bedge_lim, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1276 inner_limit, hev_thresh); |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1277 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*s->linesize, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1278 s->linesize, bedge_lim, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1279 inner_limit, hev_thresh); |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1280 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*s->linesize, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1281 s->linesize, bedge_lim, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1282 inner_limit, hev_thresh); |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1283 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * s->uvlinesize, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1284 dst[2] + 4 * s->uvlinesize, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1285 s->uvlinesize, bedge_lim, |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1286 inner_limit, hev_thresh); |
11921 | 1287 } |
1288 } | |
1289 | |
1290 static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y) | |
1291 { | |
1292 int filter_level, inner_limit, mbedge_lim, bedge_lim; | |
1293 | |
1294 filter_level_for_mb(s, mb, &filter_level, &inner_limit, NULL); | |
1295 if (!filter_level) | |
1296 return; | |
1297 | |
1298 mbedge_lim = 2*(filter_level+2) + inner_limit; | |
1299 bedge_lim = 2* filter_level + inner_limit; | |
1300 | |
1301 if (mb_x) | |
1302 s->vp8dsp.vp8_h_loop_filter_simple(dst, s->linesize, mbedge_lim); | |
1303 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { | |
1304 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, s->linesize, bedge_lim); | |
1305 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, s->linesize, bedge_lim); | |
1306 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, s->linesize, bedge_lim); | |
1307 } | |
1308 | |
1309 if (mb_y) | |
1310 s->vp8dsp.vp8_v_loop_filter_simple(dst, s->linesize, mbedge_lim); | |
1311 if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { | |
1312 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*s->linesize, s->linesize, bedge_lim); | |
1313 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*s->linesize, s->linesize, bedge_lim); | |
1314 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*s->linesize, s->linesize, bedge_lim); | |
1315 } | |
1316 } | |
1317 | |
1318 static void filter_mb_row(VP8Context *s, int mb_y) | |
1319 { | |
1320 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; | |
1321 uint8_t *dst[3] = { | |
1322 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize, | |
1323 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize, | |
1324 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize | |
1325 }; | |
1326 int mb_x; | |
1327 | |
1328 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1329 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); |
11921 | 1330 filter_mb(s, dst, mb++, mb_x, mb_y); |
1331 dst[0] += 16; | |
1332 dst[1] += 8; | |
1333 dst[2] += 8; | |
1334 } | |
1335 } | |
1336 | |
1337 static void filter_mb_row_simple(VP8Context *s, int mb_y) | |
1338 { | |
1339 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize; | |
1340 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; | |
1341 int mb_x; | |
1342 | |
1343 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1344 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1); |
11921 | 1345 filter_mb_simple(s, dst, mb++, mb_x, mb_y); |
1346 dst += 16; | |
1347 } | |
1348 } | |
1349 | |
1350 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |
1351 AVPacket *avpkt) | |
1352 { | |
1353 VP8Context *s = avctx->priv_data; | |
1354 int ret, mb_x, mb_y, i, y, referenced; | |
1355 enum AVDiscard skip_thresh; | |
1356 AVFrame *curframe; | |
1357 | |
1358 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) | |
1359 return ret; | |
1360 | |
1361 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT | |
1362 || s->update_altref == VP56_FRAME_CURRENT; | |
1363 | |
1364 skip_thresh = !referenced ? AVDISCARD_NONREF : | |
1365 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; | |
1366 | |
1367 if (avctx->skip_frame >= skip_thresh) { | |
1368 s->invisible = 1; | |
1369 goto skip_decode; | |
1370 } | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1371 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; |
11921 | 1372 |
1373 for (i = 0; i < 4; i++) | |
1374 if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1375 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1376 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { | |
1377 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i]; | |
1378 break; | |
1379 } | |
1380 if (curframe->data[0]) | |
1381 avctx->release_buffer(avctx, curframe); | |
1382 | |
1383 curframe->key_frame = s->keyframe; | |
1384 curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE; | |
1385 curframe->reference = referenced ? 3 : 0; | |
1386 if ((ret = avctx->get_buffer(avctx, curframe))) { | |
1387 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); | |
1388 return ret; | |
1389 } | |
1390 | |
1391 // Given that arithmetic probabilities are updated every frame, it's quite likely | |
1392 // that the values we have on a random interframe are complete junk if we didn't | |
1393 // start decode on a keyframe. So just don't display anything rather than junk. | |
1394 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || | |
1395 !s->framep[VP56_FRAME_GOLDEN] || | |
1396 !s->framep[VP56_FRAME_GOLDEN2])) { | |
1397 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); | |
1398 return AVERROR_INVALIDDATA; | |
1399 } | |
1400 | |
1401 s->linesize = curframe->linesize[0]; | |
1402 s->uvlinesize = curframe->linesize[1]; | |
1403 | |
1404 if (!s->edge_emu_buffer) | |
1405 s->edge_emu_buffer = av_malloc(21*s->linesize); | |
1406 | |
1407 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); | |
1408 | |
1409 // top edge of 127 for intra prediction | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1410 memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border)); |
11921 | 1411 |
1412 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | |
1413 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; | |
1414 VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; | |
1415 uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride; | |
1416 uint8_t *dst[3] = { | |
1417 curframe->data[0] + 16*mb_y*s->linesize, | |
1418 curframe->data[1] + 8*mb_y*s->uvlinesize, | |
1419 curframe->data[2] + 8*mb_y*s->uvlinesize | |
1420 }; | |
1421 | |
1422 memset(s->left_nnz, 0, sizeof(s->left_nnz)); | |
1423 | |
1424 // left edge of 129 for intra prediction | |
1425 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) | |
1426 for (i = 0; i < 3; i++) | |
1427 for (y = 0; y < 16>>!!i; y++) | |
1428 dst[i][y*curframe->linesize[i]-1] = 129; | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1429 if (mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1430 memset(s->top_border, 129, sizeof(*s->top_border)); |
11921 | 1431 |
1432 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
1433 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x); | |
1434 | |
1435 if (!mb->skip) | |
1436 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); | |
1437 else { | |
1438 AV_ZERO128(s->non_zero_count_cache); // luma | |
1439 AV_ZERO64(s->non_zero_count_cache[4]); // chroma | |
1440 } | |
1441 | |
1442 if (mb->mode <= MODE_I4x4) { | |
1443 intra_predict(s, dst, mb, intra4x4 + 4*mb_x, mb_x, mb_y); | |
1444 memset(mb->bmv, 0, sizeof(mb->bmv)); | |
1445 } else { | |
1446 inter_predict(s, dst, mb, mb_x, mb_y); | |
1447 } | |
1448 | |
1449 if (!mb->skip) { | |
1450 idct_mb(s, dst[0], dst[1], dst[2], mb); | |
1451 } else { | |
1452 AV_ZERO64(s->left_nnz); | |
1453 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned | |
1454 | |
1455 // Reset DC block predictors if they would exist if the mb had coefficients | |
1456 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
1457 s->left_nnz[8] = 0; | |
1458 s->top_nnz[mb_x][8] = 0; | |
1459 } | |
1460 } | |
1461 | |
1462 dst[0] += 16; | |
1463 dst[1] += 8; | |
1464 dst[2] += 8; | |
1465 mb++; | |
1466 } | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1467 if (s->deblock_filter) { |
11921 | 1468 if (s->filter.simple) |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1469 filter_mb_row_simple(s, mb_y); |
11921 | 1470 else |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1471 filter_mb_row(s, mb_y); |
11921 | 1472 } |
1473 } | |
1474 | |
1475 skip_decode: | |
1476 // if future frames don't use the updated probabilities, | |
1477 // reset them to the values we saved | |
1478 if (!s->update_probabilities) | |
1479 s->prob[0] = s->prob[1]; | |
1480 | |
1481 // check if golden and altref are swapped | |
1482 if (s->update_altref == VP56_FRAME_GOLDEN && | |
1483 s->update_golden == VP56_FRAME_GOLDEN2) | |
1484 FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]); | |
1485 else { | |
1486 if (s->update_altref != VP56_FRAME_NONE) | |
1487 s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; | |
1488 | |
1489 if (s->update_golden != VP56_FRAME_NONE) | |
1490 s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; | |
1491 } | |
1492 | |
1493 if (s->update_last) // move cur->prev | |
1494 s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT]; | |
1495 | |
1496 // release no longer referenced frames | |
1497 for (i = 0; i < 4; i++) | |
1498 if (s->frames[i].data[0] && | |
1499 &s->frames[i] != s->framep[VP56_FRAME_CURRENT] && | |
1500 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1501 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1502 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) | |
1503 avctx->release_buffer(avctx, &s->frames[i]); | |
1504 | |
1505 if (!s->invisible) { | |
1506 *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT]; | |
1507 *data_size = sizeof(AVFrame); | |
1508 } | |
1509 | |
1510 return avpkt->size; | |
1511 } | |
1512 | |
1513 static av_cold int vp8_decode_init(AVCodecContext *avctx) | |
1514 { | |
1515 VP8Context *s = avctx->priv_data; | |
1516 | |
1517 s->avctx = avctx; | |
1518 avctx->pix_fmt = PIX_FMT_YUV420P; | |
1519 | |
1520 dsputil_init(&s->dsp, avctx); | |
1521 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8); | |
1522 ff_vp8dsp_init(&s->vp8dsp); | |
1523 | |
1524 // intra pred needs edge emulation among other things | |
1525 if (avctx->flags&CODEC_FLAG_EMU_EDGE) { | |
11947 | 1526 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported\n"); |
11921 | 1527 return AVERROR_PATCHWELCOME; |
1528 } | |
1529 | |
1530 return 0; | |
1531 } | |
1532 | |
1533 static av_cold int vp8_decode_free(AVCodecContext *avctx) | |
1534 { | |
1535 vp8_decode_flush(avctx); | |
1536 return 0; | |
1537 } | |
1538 | |
1539 AVCodec vp8_decoder = { | |
1540 "vp8", | |
1541 AVMEDIA_TYPE_VIDEO, | |
1542 CODEC_ID_VP8, | |
1543 sizeof(VP8Context), | |
1544 vp8_decode_init, | |
1545 NULL, | |
1546 vp8_decode_free, | |
1547 vp8_decode_frame, | |
1548 CODEC_CAP_DR1, | |
1549 .flush = vp8_decode_flush, | |
1550 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), | |
1551 }; |