Mercurial > libavcodec.hg
annotate vp8.c @ 12266:48d6738904a9 libavcodec
Fix SPLATB_REG mess. Used to be a if/elseif/elseif/elseif spaghetti, so this
splits it into small optimization-specific macros which are selected for each
DSP function. The advantage of this approach is that the sse4 functions now
use the ssse3 codepath also without needing an explicit sse4 codepath.
author | rbultje |
---|---|
date | Sat, 24 Jul 2010 19:33:05 +0000 |
parents | 7db147ea02c4 |
children | 161c205dcdd2 |
rev | line source |
---|---|
11921 | 1 /** |
2 * VP8 compatible video decoder | |
3 * | |
4 * Copyright (C) 2010 David Conrad | |
5 * Copyright (C) 2010 Ronald S. Bultje | |
12249 | 6 * Copyright (C) 2010 Jason Garrett-Glaser |
11921 | 7 * |
8 * This file is part of FFmpeg. | |
9 * | |
10 * FFmpeg is free software; you can redistribute it and/or | |
11 * modify it under the terms of the GNU Lesser General Public | |
12 * License as published by the Free Software Foundation; either | |
13 * version 2.1 of the License, or (at your option) any later version. | |
14 * | |
15 * FFmpeg is distributed in the hope that it will be useful, | |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 * Lesser General Public License for more details. | |
19 * | |
20 * You should have received a copy of the GNU Lesser General Public | |
21 * License along with FFmpeg; if not, write to the Free Software | |
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
23 */ | |
24 | |
25 #include "avcodec.h" | |
26 #include "vp56.h" | |
27 #include "vp8data.h" | |
28 #include "vp8dsp.h" | |
29 #include "h264pred.h" | |
30 #include "rectangle.h" | |
31 | |
32 typedef struct { | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
33 uint8_t filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
34 uint8_t inner_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
35 uint8_t inner_filter; |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
36 } VP8FilterStrength; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
37 |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
38 typedef struct { |
11921 | 39 uint8_t skip; |
40 // todo: make it possible to check for at least (i4x4 or split_mv) | |
41 // in one op. are others needed? | |
42 uint8_t mode; | |
43 uint8_t ref_frame; | |
44 uint8_t partitioning; | |
45 VP56mv mv; | |
46 VP56mv bmv[16]; | |
47 } VP8Macroblock; | |
48 | |
49 typedef struct { | |
50 AVCodecContext *avctx; | |
51 DSPContext dsp; | |
52 VP8DSPContext vp8dsp; | |
53 H264PredContext hpc; | |
11974 | 54 vp8_mc_func put_pixels_tab[3][3][3]; |
11921 | 55 AVFrame frames[4]; |
56 AVFrame *framep[4]; | |
57 uint8_t *edge_emu_buffer; | |
58 VP56RangeCoder c; ///< header context, includes mb modes and motion vectors | |
59 int profile; | |
60 | |
61 int mb_width; /* number of horizontal MB */ | |
62 int mb_height; /* number of vertical MB */ | |
63 int linesize; | |
64 int uvlinesize; | |
65 | |
66 int keyframe; | |
67 int invisible; | |
68 int update_last; ///< update VP56_FRAME_PREVIOUS with the current one | |
69 int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so | |
70 int update_altref; | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
71 int deblock_filter; |
11921 | 72 |
73 /** | |
74 * If this flag is not set, all the probability updates | |
75 * are discarded after this frame is decoded. | |
76 */ | |
77 int update_probabilities; | |
78 | |
79 /** | |
80 * All coefficients are contained in separate arith coding contexts. | |
81 * There can be 1, 2, 4, or 8 of these after the header context. | |
82 */ | |
83 int num_coeff_partitions; | |
84 VP56RangeCoder coeff_partition[8]; | |
85 | |
86 VP8Macroblock *macroblocks; | |
87 VP8Macroblock *macroblocks_base; | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
88 VP8FilterStrength *filter_strength; |
11921 | 89 int mb_stride; |
90 | |
91 uint8_t *intra4x4_pred_mode; | |
92 uint8_t *intra4x4_pred_mode_base; | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
93 uint8_t *segmentation_map; |
11921 | 94 int b4_stride; |
95 | |
96 /** | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
97 * Cache of the top row needed for intra prediction |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
98 * 16 for luma, 8 for each chroma plane |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
99 */ |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
100 uint8_t (*top_border)[16+8+8]; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
101 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
102 /** |
11921 | 103 * For coeff decode, we need to know whether the above block had non-zero |
104 * coefficients. This means for each macroblock, we need data for 4 luma | |
105 * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 | |
106 * per macroblock. We keep the last row in top_nnz. | |
107 */ | |
108 uint8_t (*top_nnz)[9]; | |
109 DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; | |
110 | |
111 /** | |
112 * This is the index plus one of the last non-zero coeff | |
113 * for each of the blocks in the current macroblock. | |
114 * So, 0 -> no coeffs | |
115 * 1 -> dc-only (special transform) | |
116 * 2+-> full transform | |
117 */ | |
118 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; | |
119 DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; | |
12221 | 120 uint8_t intra4x4_pred_mode_mb[16]; |
11921 | 121 |
122 int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock | |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
123 int segment; ///< segment of the current macroblock |
11921 | 124 |
125 int mbskip_enabled; | |
126 int sign_bias[4]; ///< one state [0, 1] per ref frame type | |
12231 | 127 int ref_count[3]; |
11921 | 128 |
129 /** | |
130 * Base parameters for segmentation, i.e. per-macroblock parameters. | |
131 * These must be kept unchanged even if segmentation is not used for | |
132 * a frame, since the values persist between interframes. | |
133 */ | |
134 struct { | |
135 int enabled; | |
136 int absolute_vals; | |
137 int update_map; | |
138 int8_t base_quant[4]; | |
139 int8_t filter_level[4]; ///< base loop filter level | |
140 } segmentation; | |
141 | |
142 /** | |
143 * Macroblocks can have one of 4 different quants in a frame when | |
144 * segmentation is enabled. | |
145 * If segmentation is disabled, only the first segment's values are used. | |
146 */ | |
147 struct { | |
148 // [0] - DC qmul [1] - AC qmul | |
149 int16_t luma_qmul[2]; | |
150 int16_t luma_dc_qmul[2]; ///< luma dc-only block quant | |
151 int16_t chroma_qmul[2]; | |
152 } qmat[4]; | |
153 | |
154 struct { | |
155 int simple; | |
156 int level; | |
157 int sharpness; | |
158 } filter; | |
159 | |
160 struct { | |
161 int enabled; ///< whether each mb can have a different strength based on mode/ref | |
162 | |
163 /** | |
164 * filter strength adjustment for the following macroblock modes: | |
165 * [0] - i4x4 | |
166 * [1] - zero mv | |
167 * [2] - inter modes except for zero or split mv | |
168 * [3] - split mv | |
169 * i16x16 modes never have any adjustment | |
170 */ | |
171 int8_t mode[4]; | |
172 | |
173 /** | |
174 * filter strength adjustment for macroblocks that reference: | |
175 * [0] - intra / VP56_FRAME_CURRENT | |
176 * [1] - VP56_FRAME_PREVIOUS | |
177 * [2] - VP56_FRAME_GOLDEN | |
178 * [3] - altref / VP56_FRAME_GOLDEN2 | |
179 */ | |
180 int8_t ref[4]; | |
181 } lf_delta; | |
182 | |
183 /** | |
184 * These are all of the updatable probabilities for binary decisions. | |
185 * They are only implictly reset on keyframes, making it quite likely | |
186 * for an interframe to desync if a prior frame's header was corrupt | |
187 * or missing outright! | |
188 */ | |
189 struct { | |
190 uint8_t segmentid[3]; | |
191 uint8_t mbskip; | |
192 uint8_t intra; | |
193 uint8_t last; | |
194 uint8_t golden; | |
195 uint8_t pred16x16[4]; | |
196 uint8_t pred8x8c[3]; | |
197 uint8_t token[4][8][3][NUM_DCT_TOKENS-1]; | |
198 uint8_t mvc[2][19]; | |
199 } prob[2]; | |
200 } VP8Context; | |
201 | |
202 static void vp8_decode_flush(AVCodecContext *avctx) | |
203 { | |
204 VP8Context *s = avctx->priv_data; | |
205 int i; | |
206 | |
207 for (i = 0; i < 4; i++) | |
208 if (s->frames[i].data[0]) | |
209 avctx->release_buffer(avctx, &s->frames[i]); | |
210 memset(s->framep, 0, sizeof(s->framep)); | |
211 | |
212 av_freep(&s->macroblocks_base); | |
213 av_freep(&s->intra4x4_pred_mode_base); | |
214 av_freep(&s->top_nnz); | |
215 av_freep(&s->edge_emu_buffer); | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
216 av_freep(&s->top_border); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
217 av_freep(&s->segmentation_map); |
11921 | 218 |
219 s->macroblocks = NULL; | |
220 s->intra4x4_pred_mode = NULL; | |
221 } | |
222 | |
223 static int update_dimensions(VP8Context *s, int width, int height) | |
224 { | |
225 int i; | |
226 | |
227 if (avcodec_check_dimensions(s->avctx, width, height)) | |
228 return AVERROR_INVALIDDATA; | |
229 | |
230 vp8_decode_flush(s->avctx); | |
231 | |
232 avcodec_set_dimensions(s->avctx, width, height); | |
233 | |
234 s->mb_width = (s->avctx->coded_width +15) / 16; | |
235 s->mb_height = (s->avctx->coded_height+15) / 16; | |
236 | |
237 // we allocate a border around the top/left of intra4x4 modes | |
238 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle | |
239 s->mb_stride = s->mb_width+1; | |
240 s->b4_stride = 4*s->mb_stride; | |
241 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
242 s->macroblocks_base = av_mallocz((s->mb_stride+s->mb_height*2+2)*sizeof(*s->macroblocks)); |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
243 s->filter_strength = av_mallocz(s->mb_stride*sizeof(*s->filter_strength)); |
11921 | 244 s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); |
245 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
246 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
247 s->segmentation_map = av_mallocz(s->mb_stride*s->mb_height); |
11921 | 248 |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
249 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_base || |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
250 !s->top_nnz || !s->top_border || !s->segmentation_map) |
12169 | 251 return AVERROR(ENOMEM); |
252 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
253 s->macroblocks = s->macroblocks_base + 1; |
11921 | 254 s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride; |
255 | |
256 memset(s->intra4x4_pred_mode_base, DC_PRED, s->b4_stride); | |
257 for (i = 0; i < 4*s->mb_height; i++) | |
258 s->intra4x4_pred_mode[i*s->b4_stride-1] = DC_PRED; | |
259 | |
260 return 0; | |
261 } | |
262 | |
263 static void parse_segment_info(VP8Context *s) | |
264 { | |
265 VP56RangeCoder *c = &s->c; | |
266 int i; | |
267 | |
268 s->segmentation.update_map = vp8_rac_get(c); | |
269 | |
270 if (vp8_rac_get(c)) { // update segment feature data | |
271 s->segmentation.absolute_vals = vp8_rac_get(c); | |
272 | |
273 for (i = 0; i < 4; i++) | |
274 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); | |
275 | |
276 for (i = 0; i < 4; i++) | |
277 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); | |
278 } | |
279 if (s->segmentation.update_map) | |
280 for (i = 0; i < 3; i++) | |
281 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; | |
282 } | |
283 | |
284 static void update_lf_deltas(VP8Context *s) | |
285 { | |
286 VP56RangeCoder *c = &s->c; | |
287 int i; | |
288 | |
289 for (i = 0; i < 4; i++) | |
290 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6); | |
291 | |
292 for (i = 0; i < 4; i++) | |
293 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6); | |
294 } | |
295 | |
296 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) | |
297 { | |
298 const uint8_t *sizes = buf; | |
299 int i; | |
300 | |
301 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); | |
302 | |
303 buf += 3*(s->num_coeff_partitions-1); | |
304 buf_size -= 3*(s->num_coeff_partitions-1); | |
305 if (buf_size < 0) | |
306 return -1; | |
307 | |
308 for (i = 0; i < s->num_coeff_partitions-1; i++) { | |
12247
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
309 int size = AV_RL24(sizes + 3*i); |
11921 | 310 if (buf_size - size < 0) |
311 return -1; | |
312 | |
313 vp56_init_range_decoder(&s->coeff_partition[i], buf, size); | |
314 buf += size; | |
315 buf_size -= size; | |
316 } | |
317 vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); | |
318 | |
319 return 0; | |
320 } | |
321 | |
322 static void get_quants(VP8Context *s) | |
323 { | |
324 VP56RangeCoder *c = &s->c; | |
325 int i, base_qi; | |
326 | |
327 int yac_qi = vp8_rac_get_uint(c, 7); | |
328 int ydc_delta = vp8_rac_get_sint(c, 4); | |
329 int y2dc_delta = vp8_rac_get_sint(c, 4); | |
330 int y2ac_delta = vp8_rac_get_sint(c, 4); | |
331 int uvdc_delta = vp8_rac_get_sint(c, 4); | |
332 int uvac_delta = vp8_rac_get_sint(c, 4); | |
333 | |
334 for (i = 0; i < 4; i++) { | |
335 if (s->segmentation.enabled) { | |
336 base_qi = s->segmentation.base_quant[i]; | |
337 if (!s->segmentation.absolute_vals) | |
338 base_qi += yac_qi; | |
339 } else | |
340 base_qi = yac_qi; | |
341 | |
342 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; | |
343 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; | |
344 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; | |
345 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; | |
346 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; | |
347 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; | |
348 | |
349 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); | |
350 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); | |
351 } | |
352 } | |
353 | |
354 /** | |
355 * Determine which buffers golden and altref should be updated with after this frame. | |
356 * The spec isn't clear here, so I'm going by my understanding of what libvpx does | |
357 * | |
358 * Intra frames update all 3 references | |
359 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set | |
360 * If the update (golden|altref) flag is set, it's updated with the current frame | |
361 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. | |
362 * If the flag is not set, the number read means: | |
363 * 0: no update | |
364 * 1: VP56_FRAME_PREVIOUS | |
365 * 2: update golden with altref, or update altref with golden | |
366 */ | |
367 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) | |
368 { | |
369 VP56RangeCoder *c = &s->c; | |
370 | |
371 if (update) | |
372 return VP56_FRAME_CURRENT; | |
373 | |
374 switch (vp8_rac_get_uint(c, 2)) { | |
375 case 1: | |
376 return VP56_FRAME_PREVIOUS; | |
377 case 2: | |
378 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; | |
379 } | |
380 return VP56_FRAME_NONE; | |
381 } | |
382 | |
383 static void update_refs(VP8Context *s) | |
384 { | |
385 VP56RangeCoder *c = &s->c; | |
386 | |
387 int update_golden = vp8_rac_get(c); | |
388 int update_altref = vp8_rac_get(c); | |
389 | |
390 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); | |
391 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); | |
392 } | |
393 | |
394 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) | |
395 { | |
396 VP56RangeCoder *c = &s->c; | |
397 int header_size, hscale, vscale, i, j, k, l, ret; | |
398 int width = s->avctx->width; | |
399 int height = s->avctx->height; | |
400 | |
401 s->keyframe = !(buf[0] & 1); | |
402 s->profile = (buf[0]>>1) & 7; | |
403 s->invisible = !(buf[0] & 0x10); | |
12247
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
404 header_size = AV_RL24(buf) >> 5; |
11921 | 405 buf += 3; |
406 buf_size -= 3; | |
407 | |
11974 | 408 if (s->profile > 3) |
409 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); | |
410 | |
411 if (!s->profile) | |
412 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); | |
413 else // profile 1-3 use bilinear, 4+ aren't defined so whatever | |
414 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab)); | |
11921 | 415 |
416 if (header_size > buf_size - 7*s->keyframe) { | |
417 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); | |
418 return AVERROR_INVALIDDATA; | |
419 } | |
420 | |
421 if (s->keyframe) { | |
12247
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
422 if (AV_RL24(buf) != 0x2a019d) { |
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
423 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf)); |
11921 | 424 return AVERROR_INVALIDDATA; |
425 } | |
426 width = AV_RL16(buf+3) & 0x3fff; | |
427 height = AV_RL16(buf+5) & 0x3fff; | |
428 hscale = buf[4] >> 6; | |
429 vscale = buf[6] >> 6; | |
430 buf += 7; | |
431 buf_size -= 7; | |
432 | |
11970
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
433 if (hscale || vscale) |
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
434 av_log_missing_feature(s->avctx, "Upscaling", 1); |
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
435 |
11921 | 436 s->update_golden = s->update_altref = VP56_FRAME_CURRENT; |
437 memcpy(s->prob->token , vp8_token_default_probs , sizeof(s->prob->token)); | |
438 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16)); | |
439 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c)); | |
440 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc)); | |
441 memset(&s->segmentation, 0, sizeof(s->segmentation)); | |
442 } | |
443 | |
444 if (!s->macroblocks_base || /* first frame */ | |
445 width != s->avctx->width || height != s->avctx->height) { | |
446 if ((ret = update_dimensions(s, width, height) < 0)) | |
447 return ret; | |
448 } | |
449 | |
450 vp56_init_range_decoder(c, buf, header_size); | |
451 buf += header_size; | |
452 buf_size -= header_size; | |
453 | |
454 if (s->keyframe) { | |
455 if (vp8_rac_get(c)) | |
456 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); | |
457 vp8_rac_get(c); // whether we can skip clamping in dsp functions | |
458 } | |
459 | |
460 if ((s->segmentation.enabled = vp8_rac_get(c))) | |
461 parse_segment_info(s); | |
462 else | |
463 s->segmentation.update_map = 0; // FIXME: move this to some init function? | |
464 | |
465 s->filter.simple = vp8_rac_get(c); | |
466 s->filter.level = vp8_rac_get_uint(c, 6); | |
467 s->filter.sharpness = vp8_rac_get_uint(c, 3); | |
468 | |
469 if ((s->lf_delta.enabled = vp8_rac_get(c))) | |
470 if (vp8_rac_get(c)) | |
471 update_lf_deltas(s); | |
472 | |
473 if (setup_partitions(s, buf, buf_size)) { | |
474 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); | |
475 return AVERROR_INVALIDDATA; | |
476 } | |
477 | |
478 get_quants(s); | |
479 | |
480 if (!s->keyframe) { | |
481 update_refs(s); | |
482 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); | |
483 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); | |
484 } | |
485 | |
486 // if we aren't saving this frame's probabilities for future frames, | |
487 // make a copy of the current probabilities | |
488 if (!(s->update_probabilities = vp8_rac_get(c))) | |
489 s->prob[1] = s->prob[0]; | |
490 | |
491 s->update_last = s->keyframe || vp8_rac_get(c); | |
492 | |
493 for (i = 0; i < 4; i++) | |
494 for (j = 0; j < 8; j++) | |
495 for (k = 0; k < 3; k++) | |
496 for (l = 0; l < NUM_DCT_TOKENS-1; l++) | |
12254
17c151e1280a
VP8: Use vp56_rac_get_prob_branchy when the bit is only used by an if()
conrad
parents:
12253
diff
changeset
|
497 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) |
11921 | 498 s->prob->token[i][j][k][l] = vp8_rac_get_uint(c, 8); |
499 | |
500 if ((s->mbskip_enabled = vp8_rac_get(c))) | |
501 s->prob->mbskip = vp8_rac_get_uint(c, 8); | |
502 | |
503 if (!s->keyframe) { | |
504 s->prob->intra = vp8_rac_get_uint(c, 8); | |
505 s->prob->last = vp8_rac_get_uint(c, 8); | |
506 s->prob->golden = vp8_rac_get_uint(c, 8); | |
507 | |
508 if (vp8_rac_get(c)) | |
509 for (i = 0; i < 4; i++) | |
510 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); | |
511 if (vp8_rac_get(c)) | |
512 for (i = 0; i < 3; i++) | |
513 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); | |
514 | |
515 // 17.2 MV probability update | |
516 for (i = 0; i < 2; i++) | |
517 for (j = 0; j < 19; j++) | |
12254
17c151e1280a
VP8: Use vp56_rac_get_prob_branchy when the bit is only used by an if()
conrad
parents:
12253
diff
changeset
|
518 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) |
11921 | 519 s->prob->mvc[i][j] = vp8_rac_get_nn(c); |
520 } | |
521 | |
522 return 0; | |
523 } | |
524 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
525 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
526 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) |
11921 | 527 { |
528 #define MARGIN (16 << 2) | |
529 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), | |
530 ((s->mb_width - 1 - mb_x) << 6) + MARGIN); | |
531 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN), | |
532 ((s->mb_height - 1 - mb_y) << 6) + MARGIN); | |
533 } | |
534 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
535 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
536 void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
537 VP56mv near[2], VP56mv *best, uint8_t cnt[4]) |
11921 | 538 { |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
539 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
540 mb - 1 /* left */, |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
541 mb + 1 /* top-left */ }; |
11921 | 542 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; |
543 VP56mv near_mv[4] = {{ 0 }}; | |
544 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; | |
12217 | 545 int idx = CNT_ZERO; |
11921 | 546 int best_idx = CNT_ZERO; |
12217 | 547 int cur_sign_bias = s->sign_bias[mb->ref_frame]; |
548 int *sign_bias = s->sign_bias; | |
11921 | 549 |
550 /* Process MB on top, left and top-left */ | |
12217 | 551 #define MV_EDGE_CHECK(n)\ |
552 {\ | |
553 VP8Macroblock *edge = mb_edge[n];\ | |
554 int edge_ref = edge->ref_frame;\ | |
555 if (edge_ref != VP56_FRAME_CURRENT) {\ | |
556 uint32_t mv = AV_RN32A(&edge->mv);\ | |
557 if (mv) {\ | |
558 if (cur_sign_bias != sign_bias[edge_ref]) {\ | |
559 /* SWAR negate of the values in mv. */\ | |
12242 | 560 mv = ~mv;\ |
561 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ | |
12217 | 562 }\ |
563 if (!n || mv != AV_RN32A(&near_mv[idx]))\ | |
564 AV_WN32A(&near_mv[++idx], mv);\ | |
565 cnt[idx] += 1 + (n != 2);\ | |
566 } else\ | |
567 cnt[CNT_ZERO] += 1 + (n != 2);\ | |
568 }\ | |
11921 | 569 } |
12217 | 570 MV_EDGE_CHECK(0) |
571 MV_EDGE_CHECK(1) | |
572 MV_EDGE_CHECK(2) | |
11921 | 573 |
12217 | 574 /* If we have three distinct MVs, merge first and last if they're the same */ |
575 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT])) | |
11921 | 576 cnt[CNT_NEAREST] += 1; |
577 | |
578 cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + | |
579 (mb_edge[EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + | |
580 (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); | |
581 | |
582 /* Swap near and nearest if necessary */ | |
583 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { | |
12217 | 584 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); |
585 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); | |
11921 | 586 } |
587 | |
588 /* Choose the best mv out of 0,0 and the nearest mv */ | |
589 if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO]) | |
590 best_idx = CNT_NEAREST; | |
591 | |
12246 | 592 mb->mv = near_mv[best_idx]; |
11921 | 593 near[0] = near_mv[CNT_NEAREST]; |
594 near[1] = near_mv[CNT_NEAR]; | |
595 } | |
596 | |
597 /** | |
598 * Motion vector coding, 17.1. | |
599 */ | |
600 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) | |
601 { | |
12255 | 602 int bit, x = 0; |
11921 | 603 |
12254
17c151e1280a
VP8: Use vp56_rac_get_prob_branchy when the bit is only used by an if()
conrad
parents:
12253
diff
changeset
|
604 if (vp56_rac_get_prob_branchy(c, p[0])) { |
11921 | 605 int i; |
606 | |
607 for (i = 0; i < 3; i++) | |
608 x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
609 for (i = 9; i > 3; i--) | |
610 x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
611 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12])) | |
612 x += 8; | |
12255 | 613 } else { |
614 // small_mvtree | |
615 const uint8_t *ps = p+2; | |
616 bit = vp56_rac_get_prob(c, *ps); | |
617 ps += 1 + 3*bit; | |
618 x += 4*bit; | |
619 bit = vp56_rac_get_prob(c, *ps); | |
620 ps += 1 + bit; | |
621 x += 2*bit; | |
622 x += vp56_rac_get_prob(c, *ps); | |
623 } | |
11921 | 624 |
625 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; | |
626 } | |
627 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
628 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
629 const uint8_t *get_submv_prob(uint32_t left, uint32_t top) |
11921 | 630 { |
12219 | 631 if (left == top) |
632 return vp8_submv_prob[4-!!left]; | |
633 if (!top) | |
11921 | 634 return vp8_submv_prob[2]; |
12219 | 635 return vp8_submv_prob[1-!!left]; |
11921 | 636 } |
637 | |
638 /** | |
639 * Split motion vector prediction, 16.4. | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
640 * @returns the number of motion vectors parsed (2, 4 or 16) |
11921 | 641 */ |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
642 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
643 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) |
11921 | 644 { |
645 int part_idx = mb->partitioning = | |
646 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); | |
647 int n, num = vp8_mbsplit_count[part_idx]; | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
648 VP8Macroblock *top_mb = &mb[2]; |
12219 | 649 VP8Macroblock *left_mb = &mb[-1]; |
650 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning], | |
651 *mbsplits_top = vp8_mbsplits[top_mb->partitioning], | |
652 *mbsplits_cur = vp8_mbsplits[part_idx], | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
653 *firstidx = vp8_mbfirstidx[part_idx]; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
654 VP56mv *top_mv = top_mb->bmv; |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
655 VP56mv *left_mv = left_mb->bmv; |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
656 VP56mv *cur_mv = mb->bmv; |
11921 | 657 |
658 for (n = 0; n < num; n++) { | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
659 int k = firstidx[n]; |
12219 | 660 uint32_t left, above; |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
661 const uint8_t *submv_prob; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
662 |
12219 | 663 if (!(k & 3)) |
664 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); | |
665 else | |
666 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); | |
667 if (k <= 3) | |
668 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); | |
669 else | |
670 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]); | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
671 |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
672 submv_prob = get_submv_prob(left, above); |
11921 | 673 |
674 switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) { | |
675 case VP8_SUBMVMODE_NEW4X4: | |
12246 | 676 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]); |
677 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]); | |
11921 | 678 break; |
679 case VP8_SUBMVMODE_ZERO4X4: | |
12245
ca82c3ce90c1
VP8: use AV_ZERO32 instead of AV_WN32A where relevant
darkshikari
parents:
12244
diff
changeset
|
680 AV_ZERO32(&mb->bmv[n]); |
11921 | 681 break; |
682 case VP8_SUBMVMODE_LEFT4X4: | |
12219 | 683 AV_WN32A(&mb->bmv[n], left); |
11921 | 684 break; |
685 case VP8_SUBMVMODE_TOP4X4: | |
12219 | 686 AV_WN32A(&mb->bmv[n], above); |
11921 | 687 break; |
688 } | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
689 } |
11921 | 690 |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
691 return num; |
11921 | 692 } |
693 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
694 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
695 void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
696 int stride, int keyframe) |
11921 | 697 { |
12221 | 698 int x, y, t, l, i; |
11921 | 699 |
12221 | 700 if (keyframe) { |
701 const uint8_t *ctx; | |
702 for (y = 0; y < 4; y++) { | |
703 for (x = 0; x < 4; x++) { | |
11921 | 704 t = intra4x4[x - stride]; |
705 l = intra4x4[x - 1]; | |
706 ctx = vp8_pred4x4_prob_intra[t][l]; | |
12221 | 707 intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); |
11921 | 708 } |
12221 | 709 intra4x4 += stride; |
11921 | 710 } |
12221 | 711 } else { |
712 for (i = 0; i < 16; i++) | |
713 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); | |
11921 | 714 } |
715 } | |
716 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
717 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
718 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
719 uint8_t *intra4x4, uint8_t *segment) |
11921 | 720 { |
721 VP56RangeCoder *c = &s->c; | |
722 | |
723 if (s->segmentation.update_map) | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
724 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
725 s->segment = *segment; |
11921 | 726 |
727 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; | |
728 | |
729 if (s->keyframe) { | |
730 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); | |
731 | |
732 if (mb->mode == MODE_I4x4) { | |
733 decode_intra4x4_modes(c, intra4x4, s->b4_stride, 1); | |
734 } else | |
735 fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1); | |
736 | |
737 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); | |
738 mb->ref_frame = VP56_FRAME_CURRENT; | |
12254
17c151e1280a
VP8: Use vp56_rac_get_prob_branchy when the bit is only used by an if()
conrad
parents:
12253
diff
changeset
|
739 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { |
11921 | 740 VP56mv near[2], best; |
12217 | 741 uint8_t cnt[4] = { 0 }; |
11921 | 742 uint8_t p[4]; |
743 | |
744 // inter MB, 16.2 | |
12254
17c151e1280a
VP8: Use vp56_rac_get_prob_branchy when the bit is only used by an if()
conrad
parents:
12253
diff
changeset
|
745 if (vp56_rac_get_prob_branchy(c, s->prob->last)) |
11921 | 746 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? |
747 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN; | |
748 else | |
749 mb->ref_frame = VP56_FRAME_PREVIOUS; | |
12231 | 750 s->ref_count[mb->ref_frame-1]++; |
11921 | 751 |
752 // motion vectors, 16.3 | |
753 find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt); | |
12217 | 754 p[0] = vp8_mode_contexts[cnt[0]][0]; |
755 p[1] = vp8_mode_contexts[cnt[1]][1]; | |
756 p[2] = vp8_mode_contexts[cnt[2]][2]; | |
757 p[3] = vp8_mode_contexts[cnt[3]][3]; | |
11921 | 758 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p); |
759 switch (mb->mode) { | |
760 case VP8_MVMODE_SPLIT: | |
12246 | 761 clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); |
762 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1]; | |
11921 | 763 break; |
764 case VP8_MVMODE_ZERO: | |
12245
ca82c3ce90c1
VP8: use AV_ZERO32 instead of AV_WN32A where relevant
darkshikari
parents:
12244
diff
changeset
|
765 AV_ZERO32(&mb->mv); |
11921 | 766 break; |
767 case VP8_MVMODE_NEAREST: | |
768 clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y); | |
769 break; | |
770 case VP8_MVMODE_NEAR: | |
771 clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y); | |
772 break; | |
773 case VP8_MVMODE_NEW: | |
12246 | 774 clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); |
775 mb->mv.y += + read_mv_component(c, s->prob->mvc[0]); | |
776 mb->mv.x += + read_mv_component(c, s->prob->mvc[1]); | |
11921 | 777 break; |
778 } | |
779 if (mb->mode != VP8_MVMODE_SPLIT) { | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
780 mb->partitioning = VP8_SPLITMVMODE_NONE; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
781 mb->bmv[0] = mb->mv; |
11921 | 782 } |
783 } else { | |
784 // intra MB, 16.1 | |
785 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); | |
786 | |
12220
0f635b1f7861
Avoid useless fill_rectangle in P-frames in VP8
darkshikari
parents:
12219
diff
changeset
|
787 if (mb->mode == MODE_I4x4) |
12221 | 788 decode_intra4x4_modes(c, intra4x4, 4, 0); |
11921 | 789 |
790 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); | |
791 mb->ref_frame = VP56_FRAME_CURRENT; | |
12225
c3e11b3108d7
Eliminate a pointless memset for intra blocks in P-frames in VP8
darkshikari
parents:
12224
diff
changeset
|
792 mb->partitioning = VP8_SPLITMVMODE_NONE; |
12245
ca82c3ce90c1
VP8: use AV_ZERO32 instead of AV_WN32A where relevant
darkshikari
parents:
12244
diff
changeset
|
793 AV_ZERO32(&mb->bmv[0]); |
11921 | 794 } |
795 } | |
796 | |
797 /** | |
12115 | 798 * @param c arithmetic bitstream reader context |
799 * @param block destination for block coefficients | |
800 * @param probs probabilities to use when reading trees from the bitstream | |
11921 | 801 * @param i initial coeff index, 0 unless a separate DC block is coded |
802 * @param zero_nhood the initial prediction context for number of surrounding | |
803 * all-zero blocks (only left/top, so 0-2) | |
12062
372f7fed2806
Avoid square brackets in Doxygen comments; Doxygen chokes on them.
diego
parents:
11990
diff
changeset
|
804 * @param qmul array holding the dc/ac dequant factor at position 0/1 |
11921 | 805 * @return 0 if no coeffs were decoded |
806 * otherwise, the index of the last coeff decoded plus one | |
807 */ | |
808 static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], | |
809 uint8_t probs[8][3][NUM_DCT_TOKENS-1], | |
810 int i, int zero_nhood, int16_t qmul[2]) | |
811 { | |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
812 uint8_t *token_prob; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
813 int nonzero = 0; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
814 int coeff; |
11921 | 815 |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
816 do { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
817 token_prob = probs[vp8_coeff_band[i]][zero_nhood]; |
11921 | 818 |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
819 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
820 return nonzero; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
821 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
822 skip_eob: |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
823 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
824 zero_nhood = 0; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
825 token_prob = probs[vp8_coeff_band[++i]][0]; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
826 if (i < 16) |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
827 goto skip_eob; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
828 return nonzero; // invalid input; blocks should end with EOB |
11921 | 829 } |
830 | |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
831 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
832 coeff = 1; |
11921 | 833 zero_nhood = 1; |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
834 } else { |
11921 | 835 zero_nhood = 2; |
836 | |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
837 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
838 coeff = vp56_rac_get_prob(c, token_prob[4]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
839 if (coeff) |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
840 coeff += vp56_rac_get_prob(c, token_prob[5]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
841 coeff += 2; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
842 } else { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
843 // DCT_CAT* |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
844 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
845 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
846 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
847 } else { // DCT_CAT2 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
848 coeff = 7; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
849 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
850 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
851 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
852 } else { // DCT_CAT3 and up |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
853 int a = vp56_rac_get_prob(c, token_prob[8]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
854 int b = vp56_rac_get_prob(c, token_prob[9+a]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
855 int cat = (a<<1) + b; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
856 coeff = 3 + (8<<cat); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
857 coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
858 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
859 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
860 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
861 |
11921 | 862 // todo: full [16] qmat? load into register? |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
863 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
864 nonzero = ++i; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
865 } while (i < 16); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
866 |
11921 | 867 return nonzero; |
868 } | |
869 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
870 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
871 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
872 uint8_t t_nnz[9], uint8_t l_nnz[9]) |
11921 | 873 { |
874 LOCAL_ALIGNED_16(DCTELEM, dc,[16]); | |
875 int i, x, y, luma_start = 0, luma_ctx = 3; | |
876 int nnz_pred, nnz, nnz_total = 0; | |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
877 int segment = s->segment; |
11921 | 878 |
879 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
880 AV_ZERO128(dc); | |
881 AV_ZERO128(dc+8); | |
882 nnz_pred = t_nnz[8] + l_nnz[8]; | |
883 | |
884 // decode DC values and do hadamard | |
885 nnz = decode_block_coeffs(c, dc, s->prob->token[1], 0, nnz_pred, | |
886 s->qmat[segment].luma_dc_qmul); | |
887 l_nnz[8] = t_nnz[8] = !!nnz; | |
888 nnz_total += nnz; | |
889 s->vp8dsp.vp8_luma_dc_wht(s->block, dc); | |
890 luma_start = 1; | |
891 luma_ctx = 0; | |
892 } | |
893 | |
894 // luma blocks | |
895 for (y = 0; y < 4; y++) | |
896 for (x = 0; x < 4; x++) { | |
897 nnz_pred = l_nnz[y] + t_nnz[x]; | |
898 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, | |
899 nnz_pred, s->qmat[segment].luma_qmul); | |
900 // nnz+luma_start may be one more than the actual last index, but we don't care | |
901 s->non_zero_count_cache[y][x] = nnz + luma_start; | |
902 t_nnz[x] = l_nnz[y] = !!nnz; | |
903 nnz_total += nnz; | |
904 } | |
905 | |
906 // chroma blocks | |
907 // TODO: what to do about dimensions? 2nd dim for luma is x, | |
908 // but for chroma it's (y<<1)|x | |
909 for (i = 4; i < 6; i++) | |
910 for (y = 0; y < 2; y++) | |
911 for (x = 0; x < 2; x++) { | |
912 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; | |
913 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0, | |
914 nnz_pred, s->qmat[segment].chroma_qmul); | |
915 s->non_zero_count_cache[i][(y<<1)+x] = nnz; | |
916 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; | |
917 nnz_total += nnz; | |
918 } | |
919 | |
920 // if there were no coded coeffs despite the macroblock not being marked skip, | |
921 // we MUST not do the inner loop filter and should not do IDCT | |
922 // Since skip isn't used for bitstream prediction, just manually set it. | |
923 if (!nnz_total) | |
924 mb->skip = 1; | |
925 } | |
926 | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
927 static av_always_inline |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
928 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
929 int linesize, int uvlinesize, int simple) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
930 { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
931 AV_COPY128(top_border, src_y + 15*linesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
932 if (!simple) { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
933 AV_COPY64(top_border+16, src_cb + 7*uvlinesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
934 AV_COPY64(top_border+24, src_cr + 7*uvlinesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
935 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
936 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
937 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
938 static av_always_inline |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
939 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
940 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
941 int simple, int xchg) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
942 { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
943 uint8_t *top_border_m1 = top_border-32; // for TL prediction |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
944 src_y -= linesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
945 src_cb -= uvlinesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
946 src_cr -= uvlinesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
947 |
12202 | 948 #define XCHG(a,b,xchg) do { \ |
949 if (xchg) AV_SWAP64(b,a); \ | |
950 else AV_COPY64(b,a); \ | |
951 } while (0) | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
952 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
953 XCHG(top_border_m1+8, src_y-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
954 XCHG(top_border, src_y, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
955 XCHG(top_border+8, src_y+8, 1); |
12201
c4b53914f286
vp8: add do { } while(0) around XCHG() macro to avoid confusing if/else
mru
parents:
12200
diff
changeset
|
956 if (mb_x < mb_width-1) |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
957 XCHG(top_border+32, src_y+16, 1); |
12201
c4b53914f286
vp8: add do { } while(0) around XCHG() macro to avoid confusing if/else
mru
parents:
12200
diff
changeset
|
958 |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
959 // only copy chroma for normal loop filter |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
960 // or to initialize the top row to 127 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
961 if (!simple || !mb_y) { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
962 XCHG(top_border_m1+16, src_cb-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
963 XCHG(top_border_m1+24, src_cr-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
964 XCHG(top_border+16, src_cb, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
965 XCHG(top_border+24, src_cr, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
966 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
967 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
968 |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
969 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
970 int check_intra_pred_mode(int mode, int mb_x, int mb_y) |
11921 | 971 { |
972 if (mode == DC_PRED8x8) { | |
12243
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
973 if (!mb_x) { |
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
974 mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; |
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
975 } else if (!mb_y) { |
12244 | 976 mode = LEFT_DC_PRED8x8; |
12243
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
977 } |
11921 | 978 } |
979 return mode; | |
980 } | |
981 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
982 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
983 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
984 uint8_t *intra4x4, int mb_x, int mb_y) |
11921 | 985 { |
986 int x, y, mode, nnz, tr; | |
987 | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
988 // for the first row, we need to run xchg_mb_border to init the top edge to 127 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
989 // otherwise, skip it if we aren't going to deblock |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
990 if (s->deblock_filter || !mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
991 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
992 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
993 s->filter.simple, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
994 |
11921 | 995 if (mb->mode < MODE_I4x4) { |
996 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); | |
997 s->hpc.pred16x16[mode](dst[0], s->linesize); | |
998 } else { | |
999 uint8_t *ptr = dst[0]; | |
12221 | 1000 int stride = s->keyframe ? s->b4_stride : 4; |
11921 | 1001 |
1002 // all blocks on the right edge of the macroblock use bottom edge | |
1003 // the top macroblock for their topright edge | |
1004 uint8_t *tr_right = ptr - s->linesize + 16; | |
1005 | |
1006 // if we're on the right edge of the frame, said edge is extended | |
1007 // from the top macroblock | |
1008 if (mb_x == s->mb_width-1) { | |
1009 tr = tr_right[-1]*0x01010101; | |
1010 tr_right = (uint8_t *)&tr; | |
1011 } | |
1012 | |
12234
bba849c2a113
VP8: avoid a memset for non-i4x4 blocks with no coefficients
darkshikari
parents:
12233
diff
changeset
|
1013 if (mb->skip) |
bba849c2a113
VP8: avoid a memset for non-i4x4 blocks with no coefficients
darkshikari
parents:
12233
diff
changeset
|
1014 AV_ZERO128(s->non_zero_count_cache); |
bba849c2a113
VP8: avoid a memset for non-i4x4 blocks with no coefficients
darkshikari
parents:
12233
diff
changeset
|
1015 |
11921 | 1016 for (y = 0; y < 4; y++) { |
1017 uint8_t *topright = ptr + 4 - s->linesize; | |
1018 for (x = 0; x < 4; x++) { | |
1019 if (x == 3) | |
1020 topright = tr_right; | |
1021 | |
12221 | 1022 s->hpc.pred4x4[intra4x4[x]](ptr+4*x, topright, s->linesize); |
11921 | 1023 |
1024 nnz = s->non_zero_count_cache[y][x]; | |
1025 if (nnz) { | |
1026 if (nnz == 1) | |
1027 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); | |
1028 else | |
1029 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize); | |
1030 } | |
1031 topright += 4; | |
1032 } | |
1033 | |
1034 ptr += 4*s->linesize; | |
12221 | 1035 intra4x4 += stride; |
11921 | 1036 } |
1037 } | |
1038 | |
1039 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); | |
1040 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); | |
1041 s->hpc.pred8x8[mode](dst[2], s->uvlinesize); | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1042 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1043 if (s->deblock_filter || !mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1044 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1045 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1046 s->filter.simple, 0); |
11921 | 1047 } |
1048 | |
1049 /** | |
1050 * Generic MC function. | |
1051 * | |
1052 * @param s VP8 decoding context | |
1053 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes | |
1054 * @param dst target buffer for block data at block position | |
1055 * @param src reference picture buffer at origin (0, 0) | |
1056 * @param mv motion vector (relative to block position) to get pixel data from | |
1057 * @param x_off horizontal position of block from origin (0, 0) | |
1058 * @param y_off vertical position of block from origin (0, 0) | |
1059 * @param block_w width of block (16, 8 or 4) | |
1060 * @param block_h height of block (always same as block_w) | |
1061 * @param width width of src/dst plane data | |
1062 * @param height height of src/dst plane data | |
1063 * @param linesize size of a single line of plane data, including padding | |
12115 | 1064 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) |
11921 | 1065 */ |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1066 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1067 void vp8_mc(VP8Context *s, int luma, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1068 uint8_t *dst, uint8_t *src, const VP56mv *mv, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1069 int x_off, int y_off, int block_w, int block_h, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1070 int width, int height, int linesize, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1071 vp8_mc_func mc_func[3][3]) |
11921 | 1072 { |
12218 | 1073 if (AV_RN32A(mv)) { |
1074 static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; | |
1075 int mx = (mv->x << luma)&7, mx_idx = idx[mx]; | |
1076 int my = (mv->y << luma)&7, my_idx = idx[my]; | |
11921 | 1077 |
12218 | 1078 x_off += mv->x >> (3 - luma); |
1079 y_off += mv->y >> (3 - luma); | |
11921 | 1080 |
12218 | 1081 // edge emulation |
1082 src += y_off * linesize + x_off; | |
1083 if (x_off < 2 || x_off >= width - block_w - 3 || | |
1084 y_off < 2 || y_off >= height - block_h - 3) { | |
1085 ff_emulated_edge_mc(s->edge_emu_buffer, src - 2 * linesize - 2, linesize, | |
1086 block_w + 5, block_h + 5, | |
1087 x_off - 2, y_off - 2, width, height); | |
1088 src = s->edge_emu_buffer + 2 + linesize * 2; | |
1089 } | |
1090 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); | |
1091 } else | |
1092 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); | |
11921 | 1093 } |
1094 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1095 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1096 void vp8_mc_part(VP8Context *s, uint8_t *dst[3], |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1097 AVFrame *ref_frame, int x_off, int y_off, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1098 int bx_off, int by_off, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1099 int block_w, int block_h, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1100 int width, int height, VP56mv *mv) |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1101 { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1102 VP56mv uvmv = *mv; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1103 |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1104 /* Y */ |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1105 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1106 ref_frame->data[0], mv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1107 block_w, block_h, width, height, s->linesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1108 s->put_pixels_tab[block_w == 8]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1109 |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1110 /* U/V */ |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1111 if (s->profile == 3) { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1112 uvmv.x &= ~7; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1113 uvmv.y &= ~7; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1114 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1115 x_off >>= 1; y_off >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1116 bx_off >>= 1; by_off >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1117 width >>= 1; height >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1118 block_w >>= 1; block_h >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1119 vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1120 ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1121 block_w, block_h, width, height, s->uvlinesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1122 s->put_pixels_tab[1 + (block_w == 4)]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1123 vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1124 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1125 block_w, block_h, width, height, s->uvlinesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1126 s->put_pixels_tab[1 + (block_w == 4)]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1127 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1128 |
12215 | 1129 /* Fetch pixels for estimated mv 4 macroblocks ahead. |
1130 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1131 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) |
12215 | 1132 { |
12237 | 1133 /* Don't prefetch refs that haven't been used very often this frame. */ |
1134 if (s->ref_count[ref-1] > (mb_xy >> 5)) { | |
12231 | 1135 int x_off = mb_x << 4, y_off = mb_y << 4; |
1136 int mx = mb->mv.x + x_off + 8; | |
1137 int my = mb->mv.y + y_off; | |
1138 uint8_t **src= s->framep[ref]->data; | |
1139 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64; | |
1140 s->dsp.prefetch(src[0]+off, s->linesize, 4); | |
1141 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64; | |
1142 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); | |
1143 } | |
12215 | 1144 } |
1145 | |
11921 | 1146 /** |
1147 * Apply motion vectors to prediction buffer, chapter 18. | |
1148 */ | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1149 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1150 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1151 int mb_x, int mb_y) |
11921 | 1152 { |
1153 int x_off = mb_x << 4, y_off = mb_y << 4; | |
1154 int width = 16*s->mb_width, height = 16*s->mb_height; | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1155 AVFrame *ref = s->framep[mb->ref_frame]; |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1156 VP56mv *bmv = mb->bmv; |
11921 | 1157 |
1158 if (mb->mode < VP8_MVMODE_SPLIT) { | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1159 vp8_mc_part(s, dst, ref, x_off, y_off, |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1160 0, 0, 16, 16, width, height, &mb->mv); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1161 } else switch (mb->partitioning) { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1162 case VP8_SPLITMVMODE_4x4: { |
11921 | 1163 int x, y; |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1164 VP56mv uvmv; |
11921 | 1165 |
1166 /* Y */ | |
1167 for (y = 0; y < 4; y++) { | |
1168 for (x = 0; x < 4; x++) { | |
1169 vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1170 ref->data[0], &bmv[4*y + x], |
11921 | 1171 4*x + x_off, 4*y + y_off, 4, 4, |
1172 width, height, s->linesize, | |
11974 | 1173 s->put_pixels_tab[2]); |
11921 | 1174 } |
1175 } | |
1176 | |
1177 /* U/V */ | |
1178 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; | |
1179 for (y = 0; y < 2; y++) { | |
1180 for (x = 0; x < 2; x++) { | |
1181 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x + | |
1182 mb->bmv[ 2*y * 4 + 2*x+1].x + | |
1183 mb->bmv[(2*y+1) * 4 + 2*x ].x + | |
1184 mb->bmv[(2*y+1) * 4 + 2*x+1].x; | |
1185 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y + | |
1186 mb->bmv[ 2*y * 4 + 2*x+1].y + | |
1187 mb->bmv[(2*y+1) * 4 + 2*x ].y + | |
1188 mb->bmv[(2*y+1) * 4 + 2*x+1].y; | |
11937
bc617cceacb1
avoid conditional and division in chroma MV calculation
stefang
parents:
11921
diff
changeset
|
1189 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2; |
bc617cceacb1
avoid conditional and division in chroma MV calculation
stefang
parents:
11921
diff
changeset
|
1190 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2; |
11921 | 1191 if (s->profile == 3) { |
1192 uvmv.x &= ~7; | |
1193 uvmv.y &= ~7; | |
1194 } | |
1195 vp8_mc(s, 0, dst[1] + 4*y*s->uvlinesize + x*4, | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1196 ref->data[1], &uvmv, |
11921 | 1197 4*x + x_off, 4*y + y_off, 4, 4, |
1198 width, height, s->uvlinesize, | |
11974 | 1199 s->put_pixels_tab[2]); |
11921 | 1200 vp8_mc(s, 0, dst[2] + 4*y*s->uvlinesize + x*4, |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1201 ref->data[2], &uvmv, |
11921 | 1202 4*x + x_off, 4*y + y_off, 4, 4, |
1203 width, height, s->uvlinesize, | |
11974 | 1204 s->put_pixels_tab[2]); |
11921 | 1205 } |
1206 } | |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1207 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1208 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1209 case VP8_SPLITMVMODE_16x8: |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1210 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1211 0, 0, 16, 8, width, height, &bmv[0]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1212 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1213 0, 8, 16, 8, width, height, &bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1214 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1215 case VP8_SPLITMVMODE_8x16: |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1216 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1217 0, 0, 8, 16, width, height, &bmv[0]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1218 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1219 8, 0, 8, 16, width, height, &bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1220 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1221 case VP8_SPLITMVMODE_8x8: |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1222 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1223 0, 0, 8, 8, width, height, &bmv[0]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1224 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1225 8, 0, 8, 8, width, height, &bmv[1]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1226 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1227 0, 8, 8, 8, width, height, &bmv[2]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1228 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1229 8, 8, 8, 8, width, height, &bmv[3]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1230 break; |
11921 | 1231 } |
1232 } | |
1233 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1234 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) |
11921 | 1235 { |
12240 | 1236 int x, y, ch; |
11921 | 1237 |
12238 | 1238 if (mb->mode != MODE_I4x4) { |
1239 uint8_t *y_dst = dst[0]; | |
11921 | 1240 for (y = 0; y < 4; y++) { |
12240 | 1241 uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[y]); |
1242 if (nnz4) { | |
1243 if (nnz4&~0x01010101) { | |
12238 | 1244 for (x = 0; x < 4; x++) { |
12240 | 1245 int nnz = s->non_zero_count_cache[y][x]; |
12238 | 1246 if (nnz) { |
1247 if (nnz == 1) | |
1248 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); | |
1249 else | |
1250 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); | |
1251 } | |
1252 } | |
1253 } else { | |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1254 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize); |
11921 | 1255 } |
1256 } | |
1257 y_dst += 4*s->linesize; | |
1258 } | |
12238 | 1259 } |
11921 | 1260 |
12238 | 1261 for (ch = 0; ch < 2; ch++) { |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1262 uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1263 if (nnz4) { |
12238 | 1264 uint8_t *ch_dst = dst[1+ch]; |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1265 if (nnz4&~0x01010101) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1266 for (y = 0; y < 2; y++) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1267 for (x = 0; x < 2; x++) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1268 int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1269 if (nnz) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1270 if (nnz == 1) |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1271 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1272 else |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1273 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1274 } |
12238 | 1275 } |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1276 ch_dst += 4*s->uvlinesize; |
12238 | 1277 } |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1278 } else { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1279 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize); |
11921 | 1280 } |
1281 } | |
1282 } | |
1283 } | |
1284 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1285 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) |
11921 | 1286 { |
1287 int interior_limit, filter_level; | |
1288 | |
1289 if (s->segmentation.enabled) { | |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
1290 filter_level = s->segmentation.filter_level[s->segment]; |
11921 | 1291 if (!s->segmentation.absolute_vals) |
1292 filter_level += s->filter.level; | |
1293 } else | |
1294 filter_level = s->filter.level; | |
1295 | |
1296 if (s->lf_delta.enabled) { | |
1297 filter_level += s->lf_delta.ref[mb->ref_frame]; | |
1298 | |
1299 if (mb->ref_frame == VP56_FRAME_CURRENT) { | |
1300 if (mb->mode == MODE_I4x4) | |
1301 filter_level += s->lf_delta.mode[0]; | |
1302 } else { | |
1303 if (mb->mode == VP8_MVMODE_ZERO) | |
1304 filter_level += s->lf_delta.mode[1]; | |
1305 else if (mb->mode == VP8_MVMODE_SPLIT) | |
1306 filter_level += s->lf_delta.mode[3]; | |
1307 else | |
1308 filter_level += s->lf_delta.mode[2]; | |
1309 } | |
1310 } | |
1311 filter_level = av_clip(filter_level, 0, 63); | |
1312 | |
1313 interior_limit = filter_level; | |
1314 if (s->filter.sharpness) { | |
1315 interior_limit >>= s->filter.sharpness > 4 ? 2 : 1; | |
1316 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); | |
1317 } | |
1318 interior_limit = FFMAX(interior_limit, 1); | |
1319 | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1320 f->filter_level = filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1321 f->inner_limit = interior_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1322 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; |
11921 | 1323 } |
1324 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1325 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) |
11921 | 1326 { |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1327 int mbedge_lim, bedge_lim, hev_thresh; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1328 int filter_level = f->filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1329 int inner_limit = f->inner_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1330 int inner_filter = f->inner_filter; |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1331 int linesize = s->linesize; |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1332 int uvlinesize = s->uvlinesize; |
11921 | 1333 |
1334 if (!filter_level) | |
1335 return; | |
1336 | |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1337 mbedge_lim = 2*(filter_level+2) + inner_limit; |
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1338 bedge_lim = 2* filter_level + inner_limit; |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1339 hev_thresh = filter_level >= 15; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1340 |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1341 if (s->keyframe) { |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1342 if (filter_level >= 40) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1343 hev_thresh = 2; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1344 } else { |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1345 if (filter_level >= 40) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1346 hev_thresh = 3; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1347 else if (filter_level >= 20) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1348 hev_thresh = 2; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1349 } |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1350 |
11921 | 1351 if (mb_x) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1352 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1353 mbedge_lim, inner_limit, hev_thresh); |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1354 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1355 mbedge_lim, inner_limit, hev_thresh); |
11921 | 1356 } |
1357 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1358 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1359 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1360 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1361 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1362 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1363 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1364 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1365 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1366 uvlinesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1367 inner_limit, hev_thresh); |
11921 | 1368 } |
1369 | |
1370 if (mb_y) { | |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1371 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1372 mbedge_lim, inner_limit, hev_thresh); |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1373 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1374 mbedge_lim, inner_limit, hev_thresh); |
11921 | 1375 } |
1376 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1377 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1378 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1379 linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1380 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1381 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1382 linesize, bedge_lim, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1383 inner_limit, hev_thresh); |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1384 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1385 linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1386 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1387 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1388 dst[2] + 4 * uvlinesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1389 uvlinesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1390 inner_limit, hev_thresh); |
11921 | 1391 } |
1392 } | |
1393 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1394 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) |
11921 | 1395 { |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1396 int mbedge_lim, bedge_lim; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1397 int filter_level = f->filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1398 int inner_limit = f->inner_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1399 int inner_filter = f->inner_filter; |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1400 int linesize = s->linesize; |
11921 | 1401 |
1402 if (!filter_level) | |
1403 return; | |
1404 | |
1405 mbedge_lim = 2*(filter_level+2) + inner_limit; | |
1406 bedge_lim = 2* filter_level + inner_limit; | |
1407 | |
1408 if (mb_x) | |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1409 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1410 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1411 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1412 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1413 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim); |
11921 | 1414 } |
1415 | |
1416 if (mb_y) | |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1417 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1418 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1419 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1420 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1421 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim); |
11921 | 1422 } |
1423 } | |
1424 | |
1425 static void filter_mb_row(VP8Context *s, int mb_y) | |
1426 { | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1427 VP8FilterStrength *f = s->filter_strength; |
11921 | 1428 uint8_t *dst[3] = { |
1429 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize, | |
1430 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize, | |
1431 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize | |
1432 }; | |
1433 int mb_x; | |
1434 | |
1435 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1436 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1437 filter_mb(s, dst, f++, mb_x, mb_y); |
11921 | 1438 dst[0] += 16; |
1439 dst[1] += 8; | |
1440 dst[2] += 8; | |
1441 } | |
1442 } | |
1443 | |
1444 static void filter_mb_row_simple(VP8Context *s, int mb_y) | |
1445 { | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1446 VP8FilterStrength *f = s->filter_strength; |
11921 | 1447 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize; |
1448 int mb_x; | |
1449 | |
1450 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1451 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1452 filter_mb_simple(s, dst, f++, mb_x, mb_y); |
11921 | 1453 dst += 16; |
1454 } | |
1455 } | |
1456 | |
1457 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |
1458 AVPacket *avpkt) | |
1459 { | |
1460 VP8Context *s = avctx->priv_data; | |
1461 int ret, mb_x, mb_y, i, y, referenced; | |
1462 enum AVDiscard skip_thresh; | |
12232 | 1463 AVFrame *curframe = NULL; |
11921 | 1464 |
1465 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) | |
1466 return ret; | |
1467 | |
1468 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT | |
1469 || s->update_altref == VP56_FRAME_CURRENT; | |
1470 | |
1471 skip_thresh = !referenced ? AVDISCARD_NONREF : | |
1472 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; | |
1473 | |
1474 if (avctx->skip_frame >= skip_thresh) { | |
1475 s->invisible = 1; | |
1476 goto skip_decode; | |
1477 } | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1478 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; |
11921 | 1479 |
1480 for (i = 0; i < 4; i++) | |
1481 if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1482 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1483 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { | |
1484 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i]; | |
1485 break; | |
1486 } | |
1487 if (curframe->data[0]) | |
1488 avctx->release_buffer(avctx, curframe); | |
1489 | |
1490 curframe->key_frame = s->keyframe; | |
1491 curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE; | |
1492 curframe->reference = referenced ? 3 : 0; | |
1493 if ((ret = avctx->get_buffer(avctx, curframe))) { | |
1494 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); | |
1495 return ret; | |
1496 } | |
1497 | |
1498 // Given that arithmetic probabilities are updated every frame, it's quite likely | |
1499 // that the values we have on a random interframe are complete junk if we didn't | |
1500 // start decode on a keyframe. So just don't display anything rather than junk. | |
1501 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || | |
1502 !s->framep[VP56_FRAME_GOLDEN] || | |
1503 !s->framep[VP56_FRAME_GOLDEN2])) { | |
1504 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); | |
1505 return AVERROR_INVALIDDATA; | |
1506 } | |
1507 | |
1508 s->linesize = curframe->linesize[0]; | |
1509 s->uvlinesize = curframe->linesize[1]; | |
1510 | |
1511 if (!s->edge_emu_buffer) | |
1512 s->edge_emu_buffer = av_malloc(21*s->linesize); | |
1513 | |
1514 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); | |
1515 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1516 /* Zero macroblock structures for top/left prediction from outside the frame. */ |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1517 memset(s->macroblocks, 0, (s->mb_width + s->mb_height*2)*sizeof(*s->macroblocks)); |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1518 |
11921 | 1519 // top edge of 127 for intra prediction |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1520 memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border)); |
12231 | 1521 memset(s->ref_count, 0, sizeof(s->ref_count)); |
11921 | 1522 |
1523 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | |
1524 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1525 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; |
11921 | 1526 uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1527 uint8_t *segment_map = s->segmentation_map + mb_y*s->mb_stride; |
12237 | 1528 int mb_xy = mb_y * s->mb_stride; |
11921 | 1529 uint8_t *dst[3] = { |
1530 curframe->data[0] + 16*mb_y*s->linesize, | |
1531 curframe->data[1] + 8*mb_y*s->uvlinesize, | |
1532 curframe->data[2] + 8*mb_y*s->uvlinesize | |
1533 }; | |
1534 | |
1535 memset(s->left_nnz, 0, sizeof(s->left_nnz)); | |
1536 | |
1537 // left edge of 129 for intra prediction | |
1538 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) | |
1539 for (i = 0; i < 3; i++) | |
1540 for (y = 0; y < 16>>!!i; y++) | |
1541 dst[i][y*curframe->linesize[i]-1] = 129; | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1542 if (mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1543 memset(s->top_border, 129, sizeof(*s->top_border)); |
11921 | 1544 |
12237 | 1545 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { |
12221 | 1546 uint8_t *intra4x4_mb = s->keyframe ? intra4x4 + 4*mb_x : s->intra4x4_pred_mode_mb; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1547 uint8_t *segment_mb = segment_map+mb_x; |
12221 | 1548 |
12215 | 1549 /* Prefetch the current frame, 4 MBs ahead */ |
1550 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); | |
1551 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); | |
1552 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1553 decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb, segment_mb); |
11921 | 1554 |
12237 | 1555 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); |
12231 | 1556 |
11921 | 1557 if (!mb->skip) |
1558 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); | |
1559 | |
12225
c3e11b3108d7
Eliminate a pointless memset for intra blocks in P-frames in VP8
darkshikari
parents:
12224
diff
changeset
|
1560 if (mb->mode <= MODE_I4x4) |
12221 | 1561 intra_predict(s, dst, mb, intra4x4_mb, mb_x, mb_y); |
12225
c3e11b3108d7
Eliminate a pointless memset for intra blocks in P-frames in VP8
darkshikari
parents:
12224
diff
changeset
|
1562 else |
11921 | 1563 inter_predict(s, dst, mb, mb_x, mb_y); |
1564 | |
12237 | 1565 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); |
12231 | 1566 |
11921 | 1567 if (!mb->skip) { |
12238 | 1568 idct_mb(s, dst, mb); |
11921 | 1569 } else { |
1570 AV_ZERO64(s->left_nnz); | |
1571 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned | |
1572 | |
1573 // Reset DC block predictors if they would exist if the mb had coefficients | |
1574 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
1575 s->left_nnz[8] = 0; | |
1576 s->top_nnz[mb_x][8] = 0; | |
1577 } | |
1578 } | |
1579 | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1580 if (s->deblock_filter) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1581 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]); |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1582 |
12237 | 1583 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); |
12231 | 1584 |
11921 | 1585 dst[0] += 16; |
1586 dst[1] += 8; | |
1587 dst[2] += 8; | |
1588 } | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1589 if (s->deblock_filter) { |
11921 | 1590 if (s->filter.simple) |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1591 filter_mb_row_simple(s, mb_y); |
11921 | 1592 else |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1593 filter_mb_row(s, mb_y); |
11921 | 1594 } |
1595 } | |
1596 | |
1597 skip_decode: | |
1598 // if future frames don't use the updated probabilities, | |
1599 // reset them to the values we saved | |
1600 if (!s->update_probabilities) | |
1601 s->prob[0] = s->prob[1]; | |
1602 | |
1603 // check if golden and altref are swapped | |
1604 if (s->update_altref == VP56_FRAME_GOLDEN && | |
1605 s->update_golden == VP56_FRAME_GOLDEN2) | |
1606 FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]); | |
1607 else { | |
1608 if (s->update_altref != VP56_FRAME_NONE) | |
1609 s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; | |
1610 | |
1611 if (s->update_golden != VP56_FRAME_NONE) | |
1612 s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; | |
1613 } | |
1614 | |
1615 if (s->update_last) // move cur->prev | |
1616 s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT]; | |
1617 | |
1618 // release no longer referenced frames | |
1619 for (i = 0; i < 4; i++) | |
1620 if (s->frames[i].data[0] && | |
1621 &s->frames[i] != s->framep[VP56_FRAME_CURRENT] && | |
1622 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1623 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1624 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) | |
1625 avctx->release_buffer(avctx, &s->frames[i]); | |
1626 | |
1627 if (!s->invisible) { | |
1628 *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT]; | |
1629 *data_size = sizeof(AVFrame); | |
1630 } | |
1631 | |
1632 return avpkt->size; | |
1633 } | |
1634 | |
1635 static av_cold int vp8_decode_init(AVCodecContext *avctx) | |
1636 { | |
1637 VP8Context *s = avctx->priv_data; | |
1638 | |
1639 s->avctx = avctx; | |
1640 avctx->pix_fmt = PIX_FMT_YUV420P; | |
1641 | |
1642 dsputil_init(&s->dsp, avctx); | |
1643 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8); | |
1644 ff_vp8dsp_init(&s->vp8dsp); | |
1645 | |
1646 // intra pred needs edge emulation among other things | |
1647 if (avctx->flags&CODEC_FLAG_EMU_EDGE) { | |
11947 | 1648 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported\n"); |
11921 | 1649 return AVERROR_PATCHWELCOME; |
1650 } | |
1651 | |
1652 return 0; | |
1653 } | |
1654 | |
1655 static av_cold int vp8_decode_free(AVCodecContext *avctx) | |
1656 { | |
1657 vp8_decode_flush(avctx); | |
1658 return 0; | |
1659 } | |
1660 | |
1661 AVCodec vp8_decoder = { | |
1662 "vp8", | |
1663 AVMEDIA_TYPE_VIDEO, | |
1664 CODEC_ID_VP8, | |
1665 sizeof(VP8Context), | |
1666 vp8_decode_init, | |
1667 NULL, | |
1668 vp8_decode_free, | |
1669 vp8_decode_frame, | |
1670 CODEC_CAP_DR1, | |
1671 .flush = vp8_decode_flush, | |
1672 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), | |
1673 }; |