Mercurial > libavcodec.hg
annotate vp8.c @ 12494:94eaea836bf4 libavcodec
Check avctx width/height more thoroughly (e.g. all values 0 except width would
have been accepted before).
Also do not fail if they are invalid but instead override them to 0.
This allows decoding e.g. MPEG video when only the container values are corrupted.
For encoding a value of 0,0 of course makes no sense, but was allowed
through before and will be caught by an extra check in the encode function.
author | reimar |
---|---|
date | Wed, 15 Sep 2010 04:46:55 +0000 |
parents | ffb3668ff7af |
children |
rev | line source |
---|---|
11921 | 1 /** |
2 * VP8 compatible video decoder | |
3 * | |
4 * Copyright (C) 2010 David Conrad | |
5 * Copyright (C) 2010 Ronald S. Bultje | |
12249 | 6 * Copyright (C) 2010 Jason Garrett-Glaser |
11921 | 7 * |
8 * This file is part of FFmpeg. | |
9 * | |
10 * FFmpeg is free software; you can redistribute it and/or | |
11 * modify it under the terms of the GNU Lesser General Public | |
12 * License as published by the Free Software Foundation; either | |
13 * version 2.1 of the License, or (at your option) any later version. | |
14 * | |
15 * FFmpeg is distributed in the hope that it will be useful, | |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 * Lesser General Public License for more details. | |
19 * | |
20 * You should have received a copy of the GNU Lesser General Public | |
21 * License along with FFmpeg; if not, write to the Free Software | |
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
23 */ | |
24 | |
12372
914f484bb476
Remove use of the deprecated function avcodec_check_dimensions(), use
stefano
parents:
12369
diff
changeset
|
25 #include "libavcore/imgutils.h" |
11921 | 26 #include "avcodec.h" |
27 #include "vp56.h" | |
28 #include "vp8data.h" | |
29 #include "vp8dsp.h" | |
30 #include "h264pred.h" | |
31 #include "rectangle.h" | |
32 | |
33 typedef struct { | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
34 uint8_t filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
35 uint8_t inner_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
36 uint8_t inner_filter; |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
37 } VP8FilterStrength; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
38 |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
39 typedef struct { |
11921 | 40 uint8_t skip; |
41 // todo: make it possible to check for at least (i4x4 or split_mv) | |
42 // in one op. are others needed? | |
43 uint8_t mode; | |
44 uint8_t ref_frame; | |
45 uint8_t partitioning; | |
46 VP56mv mv; | |
47 VP56mv bmv[16]; | |
48 } VP8Macroblock; | |
49 | |
50 typedef struct { | |
51 AVCodecContext *avctx; | |
52 DSPContext dsp; | |
53 VP8DSPContext vp8dsp; | |
54 H264PredContext hpc; | |
11974 | 55 vp8_mc_func put_pixels_tab[3][3][3]; |
11921 | 56 AVFrame frames[4]; |
57 AVFrame *framep[4]; | |
58 uint8_t *edge_emu_buffer; | |
59 VP56RangeCoder c; ///< header context, includes mb modes and motion vectors | |
60 int profile; | |
61 | |
62 int mb_width; /* number of horizontal MB */ | |
63 int mb_height; /* number of vertical MB */ | |
64 int linesize; | |
65 int uvlinesize; | |
66 | |
67 int keyframe; | |
68 int invisible; | |
69 int update_last; ///< update VP56_FRAME_PREVIOUS with the current one | |
70 int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so | |
71 int update_altref; | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
72 int deblock_filter; |
11921 | 73 |
74 /** | |
75 * If this flag is not set, all the probability updates | |
76 * are discarded after this frame is decoded. | |
77 */ | |
78 int update_probabilities; | |
79 | |
80 /** | |
81 * All coefficients are contained in separate arith coding contexts. | |
82 * There can be 1, 2, 4, or 8 of these after the header context. | |
83 */ | |
84 int num_coeff_partitions; | |
85 VP56RangeCoder coeff_partition[8]; | |
86 | |
87 VP8Macroblock *macroblocks; | |
88 VP8Macroblock *macroblocks_base; | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
89 VP8FilterStrength *filter_strength; |
11921 | 90 |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
91 uint8_t *intra4x4_pred_mode_top; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
92 uint8_t intra4x4_pred_mode_left[4]; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
93 uint8_t *segmentation_map; |
11921 | 94 |
95 /** | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
96 * Cache of the top row needed for intra prediction |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
97 * 16 for luma, 8 for each chroma plane |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
98 */ |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
99 uint8_t (*top_border)[16+8+8]; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
100 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
101 /** |
11921 | 102 * For coeff decode, we need to know whether the above block had non-zero |
103 * coefficients. This means for each macroblock, we need data for 4 luma | |
104 * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 | |
105 * per macroblock. We keep the last row in top_nnz. | |
106 */ | |
107 uint8_t (*top_nnz)[9]; | |
108 DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; | |
109 | |
110 /** | |
111 * This is the index plus one of the last non-zero coeff | |
112 * for each of the blocks in the current macroblock. | |
113 * So, 0 -> no coeffs | |
114 * 1 -> dc-only (special transform) | |
115 * 2+-> full transform | |
116 */ | |
117 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; | |
118 DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; | |
12340
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12339
diff
changeset
|
119 DECLARE_ALIGNED(16, DCTELEM, block_dc)[16]; |
12221 | 120 uint8_t intra4x4_pred_mode_mb[16]; |
11921 | 121 |
122 int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock | |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
123 int segment; ///< segment of the current macroblock |
11921 | 124 |
125 int mbskip_enabled; | |
126 int sign_bias[4]; ///< one state [0, 1] per ref frame type | |
12231 | 127 int ref_count[3]; |
11921 | 128 |
129 /** | |
130 * Base parameters for segmentation, i.e. per-macroblock parameters. | |
131 * These must be kept unchanged even if segmentation is not used for | |
132 * a frame, since the values persist between interframes. | |
133 */ | |
134 struct { | |
135 int enabled; | |
136 int absolute_vals; | |
137 int update_map; | |
138 int8_t base_quant[4]; | |
139 int8_t filter_level[4]; ///< base loop filter level | |
140 } segmentation; | |
141 | |
142 /** | |
143 * Macroblocks can have one of 4 different quants in a frame when | |
144 * segmentation is enabled. | |
145 * If segmentation is disabled, only the first segment's values are used. | |
146 */ | |
147 struct { | |
148 // [0] - DC qmul [1] - AC qmul | |
149 int16_t luma_qmul[2]; | |
150 int16_t luma_dc_qmul[2]; ///< luma dc-only block quant | |
151 int16_t chroma_qmul[2]; | |
152 } qmat[4]; | |
153 | |
154 struct { | |
155 int simple; | |
156 int level; | |
157 int sharpness; | |
158 } filter; | |
159 | |
160 struct { | |
161 int enabled; ///< whether each mb can have a different strength based on mode/ref | |
162 | |
163 /** | |
164 * filter strength adjustment for the following macroblock modes: | |
165 * [0] - i4x4 | |
166 * [1] - zero mv | |
167 * [2] - inter modes except for zero or split mv | |
168 * [3] - split mv | |
169 * i16x16 modes never have any adjustment | |
170 */ | |
171 int8_t mode[4]; | |
172 | |
173 /** | |
174 * filter strength adjustment for macroblocks that reference: | |
175 * [0] - intra / VP56_FRAME_CURRENT | |
176 * [1] - VP56_FRAME_PREVIOUS | |
177 * [2] - VP56_FRAME_GOLDEN | |
178 * [3] - altref / VP56_FRAME_GOLDEN2 | |
179 */ | |
180 int8_t ref[4]; | |
181 } lf_delta; | |
182 | |
183 /** | |
184 * These are all of the updatable probabilities for binary decisions. | |
185 * They are only implictly reset on keyframes, making it quite likely | |
186 * for an interframe to desync if a prior frame's header was corrupt | |
187 * or missing outright! | |
188 */ | |
189 struct { | |
190 uint8_t segmentid[3]; | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
191 uint8_t mbskip; |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
192 uint8_t intra; |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
193 uint8_t last; |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
194 uint8_t golden; |
11921 | 195 uint8_t pred16x16[4]; |
196 uint8_t pred8x8c[3]; | |
12343
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
197 /* Padded to allow overreads */ |
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
198 uint8_t token[4][17][3][NUM_DCT_TOKENS-1]; |
11921 | 199 uint8_t mvc[2][19]; |
200 } prob[2]; | |
201 } VP8Context; | |
202 | |
203 static void vp8_decode_flush(AVCodecContext *avctx) | |
204 { | |
205 VP8Context *s = avctx->priv_data; | |
206 int i; | |
207 | |
208 for (i = 0; i < 4; i++) | |
209 if (s->frames[i].data[0]) | |
210 avctx->release_buffer(avctx, &s->frames[i]); | |
211 memset(s->framep, 0, sizeof(s->framep)); | |
212 | |
213 av_freep(&s->macroblocks_base); | |
12271 | 214 av_freep(&s->filter_strength); |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
215 av_freep(&s->intra4x4_pred_mode_top); |
11921 | 216 av_freep(&s->top_nnz); |
217 av_freep(&s->edge_emu_buffer); | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
218 av_freep(&s->top_border); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
219 av_freep(&s->segmentation_map); |
11921 | 220 |
221 s->macroblocks = NULL; | |
222 } | |
223 | |
224 static int update_dimensions(VP8Context *s, int width, int height) | |
225 { | |
12462
ffb3668ff7af
Use new imgutils.h API names, fix deprecation warnings.
stefano
parents:
12388
diff
changeset
|
226 if (av_image_check_size(width, height, 0, s->avctx)) |
11921 | 227 return AVERROR_INVALIDDATA; |
228 | |
229 vp8_decode_flush(s->avctx); | |
230 | |
231 avcodec_set_dimensions(s->avctx, width, height); | |
232 | |
233 s->mb_width = (s->avctx->coded_width +15) / 16; | |
234 s->mb_height = (s->avctx->coded_height+15) / 16; | |
235 | |
12383 | 236 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks)); |
237 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength)); | |
12382
a0c84084fa2f
fix over-allocation. confused b4_stride with mb_width.
skal
parents:
12372
diff
changeset
|
238 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4); |
11921 | 239 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
240 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); |
12383 | 241 s->segmentation_map = av_mallocz(s->mb_width*s->mb_height); |
11921 | 242 |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
243 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top || |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
244 !s->top_nnz || !s->top_border || !s->segmentation_map) |
12169 | 245 return AVERROR(ENOMEM); |
246 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
247 s->macroblocks = s->macroblocks_base + 1; |
11921 | 248 |
249 return 0; | |
250 } | |
251 | |
252 static void parse_segment_info(VP8Context *s) | |
253 { | |
254 VP56RangeCoder *c = &s->c; | |
255 int i; | |
256 | |
257 s->segmentation.update_map = vp8_rac_get(c); | |
258 | |
259 if (vp8_rac_get(c)) { // update segment feature data | |
260 s->segmentation.absolute_vals = vp8_rac_get(c); | |
261 | |
262 for (i = 0; i < 4; i++) | |
263 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); | |
264 | |
265 for (i = 0; i < 4; i++) | |
266 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); | |
267 } | |
268 if (s->segmentation.update_map) | |
269 for (i = 0; i < 3; i++) | |
270 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; | |
271 } | |
272 | |
273 static void update_lf_deltas(VP8Context *s) | |
274 { | |
275 VP56RangeCoder *c = &s->c; | |
276 int i; | |
277 | |
278 for (i = 0; i < 4; i++) | |
279 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6); | |
280 | |
281 for (i = 0; i < 4; i++) | |
282 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6); | |
283 } | |
284 | |
285 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) | |
286 { | |
287 const uint8_t *sizes = buf; | |
288 int i; | |
289 | |
290 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); | |
291 | |
292 buf += 3*(s->num_coeff_partitions-1); | |
293 buf_size -= 3*(s->num_coeff_partitions-1); | |
294 if (buf_size < 0) | |
295 return -1; | |
296 | |
297 for (i = 0; i < s->num_coeff_partitions-1; i++) { | |
12247
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
298 int size = AV_RL24(sizes + 3*i); |
11921 | 299 if (buf_size - size < 0) |
300 return -1; | |
301 | |
12365 | 302 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size); |
11921 | 303 buf += size; |
304 buf_size -= size; | |
305 } | |
12365 | 306 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); |
11921 | 307 |
308 return 0; | |
309 } | |
310 | |
311 static void get_quants(VP8Context *s) | |
312 { | |
313 VP56RangeCoder *c = &s->c; | |
314 int i, base_qi; | |
315 | |
316 int yac_qi = vp8_rac_get_uint(c, 7); | |
317 int ydc_delta = vp8_rac_get_sint(c, 4); | |
318 int y2dc_delta = vp8_rac_get_sint(c, 4); | |
319 int y2ac_delta = vp8_rac_get_sint(c, 4); | |
320 int uvdc_delta = vp8_rac_get_sint(c, 4); | |
321 int uvac_delta = vp8_rac_get_sint(c, 4); | |
322 | |
323 for (i = 0; i < 4; i++) { | |
324 if (s->segmentation.enabled) { | |
325 base_qi = s->segmentation.base_quant[i]; | |
326 if (!s->segmentation.absolute_vals) | |
327 base_qi += yac_qi; | |
328 } else | |
329 base_qi = yac_qi; | |
330 | |
331 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; | |
332 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; | |
333 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
334 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
335 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; |
11921 | 336 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
337 |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
338 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
339 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); |
11921 | 340 } |
341 } | |
342 | |
343 /** | |
344 * Determine which buffers golden and altref should be updated with after this frame. | |
345 * The spec isn't clear here, so I'm going by my understanding of what libvpx does | |
346 * | |
347 * Intra frames update all 3 references | |
348 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set | |
349 * If the update (golden|altref) flag is set, it's updated with the current frame | |
350 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. | |
351 * If the flag is not set, the number read means: | |
352 * 0: no update | |
353 * 1: VP56_FRAME_PREVIOUS | |
354 * 2: update golden with altref, or update altref with golden | |
355 */ | |
356 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) | |
357 { | |
358 VP56RangeCoder *c = &s->c; | |
359 | |
360 if (update) | |
361 return VP56_FRAME_CURRENT; | |
362 | |
363 switch (vp8_rac_get_uint(c, 2)) { | |
364 case 1: | |
365 return VP56_FRAME_PREVIOUS; | |
366 case 2: | |
367 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; | |
368 } | |
369 return VP56_FRAME_NONE; | |
370 } | |
371 | |
372 static void update_refs(VP8Context *s) | |
373 { | |
374 VP56RangeCoder *c = &s->c; | |
375 | |
376 int update_golden = vp8_rac_get(c); | |
377 int update_altref = vp8_rac_get(c); | |
378 | |
379 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); | |
380 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); | |
381 } | |
382 | |
383 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) | |
384 { | |
385 VP56RangeCoder *c = &s->c; | |
12343
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
386 int header_size, hscale, vscale, i, j, k, l, m, ret; |
11921 | 387 int width = s->avctx->width; |
388 int height = s->avctx->height; | |
389 | |
390 s->keyframe = !(buf[0] & 1); | |
391 s->profile = (buf[0]>>1) & 7; | |
392 s->invisible = !(buf[0] & 0x10); | |
12247
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
393 header_size = AV_RL24(buf) >> 5; |
11921 | 394 buf += 3; |
395 buf_size -= 3; | |
396 | |
11974 | 397 if (s->profile > 3) |
398 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); | |
399 | |
400 if (!s->profile) | |
401 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); | |
402 else // profile 1-3 use bilinear, 4+ aren't defined so whatever | |
403 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab)); | |
11921 | 404 |
405 if (header_size > buf_size - 7*s->keyframe) { | |
406 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); | |
407 return AVERROR_INVALIDDATA; | |
408 } | |
409 | |
410 if (s->keyframe) { | |
12247
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
411 if (AV_RL24(buf) != 0x2a019d) { |
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
412 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf)); |
11921 | 413 return AVERROR_INVALIDDATA; |
414 } | |
415 width = AV_RL16(buf+3) & 0x3fff; | |
416 height = AV_RL16(buf+5) & 0x3fff; | |
417 hscale = buf[4] >> 6; | |
418 vscale = buf[6] >> 6; | |
419 buf += 7; | |
420 buf_size -= 7; | |
421 | |
11970
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
422 if (hscale || vscale) |
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
423 av_log_missing_feature(s->avctx, "Upscaling", 1); |
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
424 |
11921 | 425 s->update_golden = s->update_altref = VP56_FRAME_CURRENT; |
12343
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
426 for (i = 0; i < 4; i++) |
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
427 for (j = 0; j < 16; j++) |
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
428 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], |
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
429 sizeof(s->prob->token[i][j])); |
11921 | 430 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16)); |
431 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c)); | |
432 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc)); | |
433 memset(&s->segmentation, 0, sizeof(s->segmentation)); | |
434 } | |
435 | |
436 if (!s->macroblocks_base || /* first frame */ | |
437 width != s->avctx->width || height != s->avctx->height) { | |
438 if ((ret = update_dimensions(s, width, height) < 0)) | |
439 return ret; | |
440 } | |
441 | |
12365 | 442 ff_vp56_init_range_decoder(c, buf, header_size); |
11921 | 443 buf += header_size; |
444 buf_size -= header_size; | |
445 | |
446 if (s->keyframe) { | |
447 if (vp8_rac_get(c)) | |
448 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); | |
449 vp8_rac_get(c); // whether we can skip clamping in dsp functions | |
450 } | |
451 | |
452 if ((s->segmentation.enabled = vp8_rac_get(c))) | |
453 parse_segment_info(s); | |
454 else | |
455 s->segmentation.update_map = 0; // FIXME: move this to some init function? | |
456 | |
457 s->filter.simple = vp8_rac_get(c); | |
458 s->filter.level = vp8_rac_get_uint(c, 6); | |
459 s->filter.sharpness = vp8_rac_get_uint(c, 3); | |
460 | |
461 if ((s->lf_delta.enabled = vp8_rac_get(c))) | |
462 if (vp8_rac_get(c)) | |
463 update_lf_deltas(s); | |
464 | |
465 if (setup_partitions(s, buf, buf_size)) { | |
466 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); | |
467 return AVERROR_INVALIDDATA; | |
468 } | |
469 | |
470 get_quants(s); | |
471 | |
472 if (!s->keyframe) { | |
473 update_refs(s); | |
474 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); | |
475 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); | |
476 } | |
477 | |
478 // if we aren't saving this frame's probabilities for future frames, | |
479 // make a copy of the current probabilities | |
480 if (!(s->update_probabilities = vp8_rac_get(c))) | |
481 s->prob[1] = s->prob[0]; | |
482 | |
483 s->update_last = s->keyframe || vp8_rac_get(c); | |
484 | |
485 for (i = 0; i < 4; i++) | |
486 for (j = 0; j < 8; j++) | |
487 for (k = 0; k < 3; k++) | |
488 for (l = 0; l < NUM_DCT_TOKENS-1; l++) | |
12343
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
489 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) { |
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
490 int prob = vp8_rac_get_uint(c, 8); |
12358
d596749eb0bc
VP8: slightly faster DCT coefficient probability update
darkshikari
parents:
12354
diff
changeset
|
491 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) |
d596749eb0bc
VP8: slightly faster DCT coefficient probability update
darkshikari
parents:
12354
diff
changeset
|
492 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob; |
12343
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
493 } |
11921 | 494 |
495 if ((s->mbskip_enabled = vp8_rac_get(c))) | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
496 s->prob->mbskip = vp8_rac_get_uint(c, 8); |
11921 | 497 |
498 if (!s->keyframe) { | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
499 s->prob->intra = vp8_rac_get_uint(c, 8); |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
500 s->prob->last = vp8_rac_get_uint(c, 8); |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
501 s->prob->golden = vp8_rac_get_uint(c, 8); |
11921 | 502 |
503 if (vp8_rac_get(c)) | |
504 for (i = 0; i < 4; i++) | |
505 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); | |
506 if (vp8_rac_get(c)) | |
507 for (i = 0; i < 3; i++) | |
508 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); | |
509 | |
510 // 17.2 MV probability update | |
511 for (i = 0; i < 2; i++) | |
512 for (j = 0; j < 19; j++) | |
12254
17c151e1280a
VP8: Use vp56_rac_get_prob_branchy when the bit is only used by an if()
conrad
parents:
12253
diff
changeset
|
513 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) |
11921 | 514 s->prob->mvc[i][j] = vp8_rac_get_nn(c); |
515 } | |
516 | |
517 return 0; | |
518 } | |
519 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
520 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
521 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) |
11921 | 522 { |
523 #define MARGIN (16 << 2) | |
524 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), | |
525 ((s->mb_width - 1 - mb_x) << 6) + MARGIN); | |
526 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN), | |
527 ((s->mb_height - 1 - mb_y) << 6) + MARGIN); | |
528 } | |
529 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
530 static av_always_inline |
12383 | 531 void find_near_mvs(VP8Context *s, VP8Macroblock *mb, |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
532 VP56mv near[2], VP56mv *best, uint8_t cnt[4]) |
11921 | 533 { |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
534 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
535 mb - 1 /* left */, |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
536 mb + 1 /* top-left */ }; |
11921 | 537 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; |
538 VP56mv near_mv[4] = {{ 0 }}; | |
539 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; | |
12217 | 540 int idx = CNT_ZERO; |
11921 | 541 int best_idx = CNT_ZERO; |
12217 | 542 int cur_sign_bias = s->sign_bias[mb->ref_frame]; |
543 int *sign_bias = s->sign_bias; | |
11921 | 544 |
545 /* Process MB on top, left and top-left */ | |
12217 | 546 #define MV_EDGE_CHECK(n)\ |
547 {\ | |
548 VP8Macroblock *edge = mb_edge[n];\ | |
549 int edge_ref = edge->ref_frame;\ | |
550 if (edge_ref != VP56_FRAME_CURRENT) {\ | |
551 uint32_t mv = AV_RN32A(&edge->mv);\ | |
552 if (mv) {\ | |
553 if (cur_sign_bias != sign_bias[edge_ref]) {\ | |
554 /* SWAR negate of the values in mv. */\ | |
12242 | 555 mv = ~mv;\ |
556 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ | |
12217 | 557 }\ |
558 if (!n || mv != AV_RN32A(&near_mv[idx]))\ | |
559 AV_WN32A(&near_mv[++idx], mv);\ | |
560 cnt[idx] += 1 + (n != 2);\ | |
561 } else\ | |
562 cnt[CNT_ZERO] += 1 + (n != 2);\ | |
563 }\ | |
11921 | 564 } |
12217 | 565 MV_EDGE_CHECK(0) |
566 MV_EDGE_CHECK(1) | |
567 MV_EDGE_CHECK(2) | |
11921 | 568 |
12217 | 569 /* If we have three distinct MVs, merge first and last if they're the same */ |
570 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT])) | |
11921 | 571 cnt[CNT_NEAREST] += 1; |
572 | |
573 cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + | |
574 (mb_edge[EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + | |
575 (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); | |
576 | |
577 /* Swap near and nearest if necessary */ | |
578 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { | |
12217 | 579 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); |
580 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); | |
11921 | 581 } |
582 | |
583 /* Choose the best mv out of 0,0 and the nearest mv */ | |
584 if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO]) | |
585 best_idx = CNT_NEAREST; | |
586 | |
12246 | 587 mb->mv = near_mv[best_idx]; |
11921 | 588 near[0] = near_mv[CNT_NEAREST]; |
589 near[1] = near_mv[CNT_NEAR]; | |
590 } | |
591 | |
592 /** | |
593 * Motion vector coding, 17.1. | |
594 */ | |
595 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) | |
596 { | |
12255 | 597 int bit, x = 0; |
11921 | 598 |
12254
17c151e1280a
VP8: Use vp56_rac_get_prob_branchy when the bit is only used by an if()
conrad
parents:
12253
diff
changeset
|
599 if (vp56_rac_get_prob_branchy(c, p[0])) { |
11921 | 600 int i; |
601 | |
602 for (i = 0; i < 3; i++) | |
603 x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
604 for (i = 9; i > 3; i--) | |
605 x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
606 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12])) | |
607 x += 8; | |
12255 | 608 } else { |
609 // small_mvtree | |
610 const uint8_t *ps = p+2; | |
611 bit = vp56_rac_get_prob(c, *ps); | |
612 ps += 1 + 3*bit; | |
613 x += 4*bit; | |
614 bit = vp56_rac_get_prob(c, *ps); | |
615 ps += 1 + bit; | |
616 x += 2*bit; | |
617 x += vp56_rac_get_prob(c, *ps); | |
618 } | |
11921 | 619 |
620 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; | |
621 } | |
622 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
623 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
624 const uint8_t *get_submv_prob(uint32_t left, uint32_t top) |
11921 | 625 { |
12219 | 626 if (left == top) |
627 return vp8_submv_prob[4-!!left]; | |
628 if (!top) | |
11921 | 629 return vp8_submv_prob[2]; |
12219 | 630 return vp8_submv_prob[1-!!left]; |
11921 | 631 } |
632 | |
633 /** | |
634 * Split motion vector prediction, 16.4. | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
635 * @returns the number of motion vectors parsed (2, 4 or 16) |
11921 | 636 */ |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
637 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
638 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) |
11921 | 639 { |
12352 | 640 int part_idx; |
641 int n, num; | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
642 VP8Macroblock *top_mb = &mb[2]; |
12219 | 643 VP8Macroblock *left_mb = &mb[-1]; |
644 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning], | |
645 *mbsplits_top = vp8_mbsplits[top_mb->partitioning], | |
12352 | 646 *mbsplits_cur, *firstidx; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
647 VP56mv *top_mv = top_mb->bmv; |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
648 VP56mv *left_mv = left_mb->bmv; |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
649 VP56mv *cur_mv = mb->bmv; |
11921 | 650 |
12352 | 651 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { |
652 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) { | |
653 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]); | |
654 } else { | |
655 part_idx = VP8_SPLITMVMODE_8x8; | |
656 } | |
657 } else { | |
658 part_idx = VP8_SPLITMVMODE_4x4; | |
659 } | |
660 | |
661 num = vp8_mbsplit_count[part_idx]; | |
662 mbsplits_cur = vp8_mbsplits[part_idx], | |
663 firstidx = vp8_mbfirstidx[part_idx]; | |
664 mb->partitioning = part_idx; | |
665 | |
11921 | 666 for (n = 0; n < num; n++) { |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
667 int k = firstidx[n]; |
12219 | 668 uint32_t left, above; |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
669 const uint8_t *submv_prob; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
670 |
12219 | 671 if (!(k & 3)) |
672 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); | |
673 else | |
674 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); | |
675 if (k <= 3) | |
676 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); | |
677 else | |
678 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]); | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
679 |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
680 submv_prob = get_submv_prob(left, above); |
11921 | 681 |
12351 | 682 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) { |
683 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) { | |
684 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) { | |
685 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]); | |
686 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]); | |
687 } else { | |
688 AV_ZERO32(&mb->bmv[n]); | |
689 } | |
690 } else { | |
691 AV_WN32A(&mb->bmv[n], above); | |
692 } | |
693 } else { | |
12219 | 694 AV_WN32A(&mb->bmv[n], left); |
11921 | 695 } |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
696 } |
11921 | 697 |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
698 return num; |
11921 | 699 } |
700 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
701 static av_always_inline |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
702 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
703 int mb_x, int keyframe) |
11921 | 704 { |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
705 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb; |
12221 | 706 if (keyframe) { |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
707 int x, y; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
708 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
709 uint8_t* const left = s->intra4x4_pred_mode_left; |
12221 | 710 for (y = 0; y < 4; y++) { |
711 for (x = 0; x < 4; x++) { | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
712 const uint8_t *ctx; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
713 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
714 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
715 left[y] = top[x] = *intra4x4; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
716 intra4x4++; |
11921 | 717 } |
718 } | |
12221 | 719 } else { |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
720 int i; |
12221 | 721 for (i = 0; i < 16; i++) |
722 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); | |
11921 | 723 } |
724 } | |
725 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
726 static av_always_inline |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
727 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment) |
11921 | 728 { |
729 VP56RangeCoder *c = &s->c; | |
730 | |
731 if (s->segmentation.update_map) | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
732 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
733 s->segment = *segment; |
11921 | 734 |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
735 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; |
11921 | 736 |
737 if (s->keyframe) { | |
738 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); | |
739 | |
740 if (mb->mode == MODE_I4x4) { | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
741 decode_intra4x4_modes(s, c, mb_x, 1); |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
742 } else { |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
743 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
744 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
745 AV_WN32A(s->intra4x4_pred_mode_left, modes); |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
746 } |
11921 | 747 |
748 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); | |
749 mb->ref_frame = VP56_FRAME_CURRENT; | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
750 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { |
11921 | 751 VP56mv near[2], best; |
12217 | 752 uint8_t cnt[4] = { 0 }; |
11921 | 753 |
754 // inter MB, 16.2 | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
755 if (vp56_rac_get_prob_branchy(c, s->prob->last)) |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
756 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? |
11921 | 757 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN; |
758 else | |
759 mb->ref_frame = VP56_FRAME_PREVIOUS; | |
12231 | 760 s->ref_count[mb->ref_frame-1]++; |
11921 | 761 |
762 // motion vectors, 16.3 | |
12383 | 763 find_near_mvs(s, mb, near, &best, cnt); |
12350 | 764 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[0]][0])) { |
765 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[1]][1])) { | |
766 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[2]][2])) { | |
767 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[3]][3])) { | |
768 mb->mode = VP8_MVMODE_SPLIT; | |
769 clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); | |
770 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1]; | |
771 } else { | |
772 mb->mode = VP8_MVMODE_NEW; | |
773 clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); | |
12388 | 774 mb->mv.y += read_mv_component(c, s->prob->mvc[0]); |
775 mb->mv.x += read_mv_component(c, s->prob->mvc[1]); | |
12350 | 776 } |
777 } else { | |
778 mb->mode = VP8_MVMODE_NEAR; | |
779 clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y); | |
780 } | |
781 } else { | |
782 mb->mode = VP8_MVMODE_NEAREST; | |
783 clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y); | |
784 } | |
785 } else { | |
786 mb->mode = VP8_MVMODE_ZERO; | |
12245
ca82c3ce90c1
VP8: use AV_ZERO32 instead of AV_WN32A where relevant
darkshikari
parents:
12244
diff
changeset
|
787 AV_ZERO32(&mb->mv); |
11921 | 788 } |
789 if (mb->mode != VP8_MVMODE_SPLIT) { | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
790 mb->partitioning = VP8_SPLITMVMODE_NONE; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
791 mb->bmv[0] = mb->mv; |
11921 | 792 } |
793 } else { | |
794 // intra MB, 16.1 | |
795 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); | |
796 | |
12220
0f635b1f7861
Avoid useless fill_rectangle in P-frames in VP8
darkshikari
parents:
12219
diff
changeset
|
797 if (mb->mode == MODE_I4x4) |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
798 decode_intra4x4_modes(s, c, mb_x, 0); |
11921 | 799 |
800 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); | |
801 mb->ref_frame = VP56_FRAME_CURRENT; | |
12225
c3e11b3108d7
Eliminate a pointless memset for intra blocks in P-frames in VP8
darkshikari
parents:
12224
diff
changeset
|
802 mb->partitioning = VP8_SPLITMVMODE_NONE; |
12245
ca82c3ce90c1
VP8: use AV_ZERO32 instead of AV_WN32A where relevant
darkshikari
parents:
12244
diff
changeset
|
803 AV_ZERO32(&mb->bmv[0]); |
11921 | 804 } |
805 } | |
806 | |
807 /** | |
12115 | 808 * @param c arithmetic bitstream reader context |
809 * @param block destination for block coefficients | |
810 * @param probs probabilities to use when reading trees from the bitstream | |
11921 | 811 * @param i initial coeff index, 0 unless a separate DC block is coded |
812 * @param zero_nhood the initial prediction context for number of surrounding | |
813 * all-zero blocks (only left/top, so 0-2) | |
12062
372f7fed2806
Avoid square brackets in Doxygen comments; Doxygen chokes on them.
diego
parents:
11990
diff
changeset
|
814 * @param qmul array holding the dc/ac dequant factor at position 0/1 |
11921 | 815 * @return 0 if no coeffs were decoded |
816 * otherwise, the index of the last coeff decoded plus one | |
817 */ | |
12362 | 818 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16], |
819 uint8_t probs[8][3][NUM_DCT_TOKENS-1], | |
820 int i, uint8_t *token_prob, int16_t qmul[2]) | |
11921 | 821 { |
12360
18117b5bb7dc
VP8: simplify decode_block_coeffs to avoid having to track nonzero coeffs
darkshikari
parents:
12358
diff
changeset
|
822 goto skip_eob; |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
823 do { |
12362 | 824 int coeff; |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
825 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB |
12360
18117b5bb7dc
VP8: simplify decode_block_coeffs to avoid having to track nonzero coeffs
darkshikari
parents:
12358
diff
changeset
|
826 return i; |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
827 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
828 skip_eob: |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
829 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 |
12335 | 830 if (++i == 16) |
12360
18117b5bb7dc
VP8: simplify decode_block_coeffs to avoid having to track nonzero coeffs
darkshikari
parents:
12358
diff
changeset
|
831 return i; // invalid input; blocks should end with EOB |
12343
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
832 token_prob = probs[i][0]; |
12335 | 833 goto skip_eob; |
11921 | 834 } |
835 | |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
836 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
837 coeff = 1; |
12343
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
838 token_prob = probs[i+1][1]; |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
839 } else { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
840 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 |
12354 | 841 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]); |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
842 if (coeff) |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
843 coeff += vp56_rac_get_prob(c, token_prob[5]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
844 coeff += 2; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
845 } else { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
846 // DCT_CAT* |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
847 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
848 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
849 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
850 } else { // DCT_CAT2 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
851 coeff = 7; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
852 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
853 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
854 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
855 } else { // DCT_CAT3 and up |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
856 int a = vp56_rac_get_prob(c, token_prob[8]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
857 int b = vp56_rac_get_prob(c, token_prob[9+a]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
858 int cat = (a<<1) + b; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
859 coeff = 3 + (8<<cat); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
860 coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
861 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
862 } |
12343
a18ab740d2db
VP8: eliminate a dereference in coefficient decoding
darkshikari
parents:
12342
diff
changeset
|
863 token_prob = probs[i+1][2]; |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
864 } |
12360
18117b5bb7dc
VP8: simplify decode_block_coeffs to avoid having to track nonzero coeffs
darkshikari
parents:
12358
diff
changeset
|
865 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; |
18117b5bb7dc
VP8: simplify decode_block_coeffs to avoid having to track nonzero coeffs
darkshikari
parents:
12358
diff
changeset
|
866 } while (++i < 16); |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
867 |
12360
18117b5bb7dc
VP8: simplify decode_block_coeffs to avoid having to track nonzero coeffs
darkshikari
parents:
12358
diff
changeset
|
868 return i; |
11921 | 869 } |
870 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
871 static av_always_inline |
12362 | 872 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], |
873 uint8_t probs[8][3][NUM_DCT_TOKENS-1], | |
874 int i, int zero_nhood, int16_t qmul[2]) | |
875 { | |
876 uint8_t *token_prob = probs[i][zero_nhood]; | |
877 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB | |
878 return 0; | |
879 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul); | |
880 } | |
881 | |
882 static av_always_inline | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
883 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
884 uint8_t t_nnz[9], uint8_t l_nnz[9]) |
11921 | 885 { |
886 int i, x, y, luma_start = 0, luma_ctx = 3; | |
887 int nnz_pred, nnz, nnz_total = 0; | |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
888 int segment = s->segment; |
12342 | 889 int block_dc = 0; |
11921 | 890 |
891 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
892 nnz_pred = t_nnz[8] + l_nnz[8]; | |
893 | |
894 // decode DC values and do hadamard | |
12340
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12339
diff
changeset
|
895 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred, |
11921 | 896 s->qmat[segment].luma_dc_qmul); |
897 l_nnz[8] = t_nnz[8] = !!nnz; | |
12342 | 898 if (nnz) { |
899 nnz_total += nnz; | |
900 block_dc = 1; | |
901 if (nnz == 1) | |
902 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc); | |
903 else | |
904 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc); | |
905 } | |
11921 | 906 luma_start = 1; |
907 luma_ctx = 0; | |
908 } | |
909 | |
910 // luma blocks | |
911 for (y = 0; y < 4; y++) | |
912 for (x = 0; x < 4; x++) { | |
12361 | 913 nnz_pred = l_nnz[y] + t_nnz[x]; |
11921 | 914 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, |
12361 | 915 nnz_pred, s->qmat[segment].luma_qmul); |
12342 | 916 // nnz+block_dc may be one more than the actual last index, but we don't care |
917 s->non_zero_count_cache[y][x] = nnz + block_dc; | |
11921 | 918 t_nnz[x] = l_nnz[y] = !!nnz; |
919 nnz_total += nnz; | |
920 } | |
921 | |
922 // chroma blocks | |
923 // TODO: what to do about dimensions? 2nd dim for luma is x, | |
924 // but for chroma it's (y<<1)|x | |
925 for (i = 4; i < 6; i++) | |
926 for (y = 0; y < 2; y++) | |
927 for (x = 0; x < 2; x++) { | |
928 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; | |
929 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0, | |
930 nnz_pred, s->qmat[segment].chroma_qmul); | |
931 s->non_zero_count_cache[i][(y<<1)+x] = nnz; | |
932 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; | |
933 nnz_total += nnz; | |
934 } | |
935 | |
936 // if there were no coded coeffs despite the macroblock not being marked skip, | |
937 // we MUST not do the inner loop filter and should not do IDCT | |
938 // Since skip isn't used for bitstream prediction, just manually set it. | |
939 if (!nnz_total) | |
940 mb->skip = 1; | |
941 } | |
942 | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
943 static av_always_inline |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
944 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
945 int linesize, int uvlinesize, int simple) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
946 { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
947 AV_COPY128(top_border, src_y + 15*linesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
948 if (!simple) { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
949 AV_COPY64(top_border+16, src_cb + 7*uvlinesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
950 AV_COPY64(top_border+24, src_cr + 7*uvlinesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
951 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
952 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
953 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
954 static av_always_inline |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
955 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
956 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
957 int simple, int xchg) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
958 { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
959 uint8_t *top_border_m1 = top_border-32; // for TL prediction |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
960 src_y -= linesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
961 src_cb -= uvlinesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
962 src_cr -= uvlinesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
963 |
12202 | 964 #define XCHG(a,b,xchg) do { \ |
965 if (xchg) AV_SWAP64(b,a); \ | |
966 else AV_COPY64(b,a); \ | |
967 } while (0) | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
968 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
969 XCHG(top_border_m1+8, src_y-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
970 XCHG(top_border, src_y, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
971 XCHG(top_border+8, src_y+8, 1); |
12201
c4b53914f286
vp8: add do { } while(0) around XCHG() macro to avoid confusing if/else
mru
parents:
12200
diff
changeset
|
972 if (mb_x < mb_width-1) |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
973 XCHG(top_border+32, src_y+16, 1); |
12201
c4b53914f286
vp8: add do { } while(0) around XCHG() macro to avoid confusing if/else
mru
parents:
12200
diff
changeset
|
974 |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
975 // only copy chroma for normal loop filter |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
976 // or to initialize the top row to 127 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
977 if (!simple || !mb_y) { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
978 XCHG(top_border_m1+16, src_cb-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
979 XCHG(top_border_m1+24, src_cr-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
980 XCHG(top_border+16, src_cb, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
981 XCHG(top_border+24, src_cr, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
982 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
983 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
984 |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
985 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
986 int check_intra_pred_mode(int mode, int mb_x, int mb_y) |
11921 | 987 { |
988 if (mode == DC_PRED8x8) { | |
12243
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
989 if (!mb_x) { |
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
990 mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; |
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
991 } else if (!mb_y) { |
12244 | 992 mode = LEFT_DC_PRED8x8; |
12243
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
993 } |
11921 | 994 } |
995 return mode; | |
996 } | |
997 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
998 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
999 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1000 int mb_x, int mb_y) |
11921 | 1001 { |
1002 int x, y, mode, nnz, tr; | |
1003 | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1004 // for the first row, we need to run xchg_mb_border to init the top edge to 127 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1005 // otherwise, skip it if we aren't going to deblock |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1006 if (s->deblock_filter || !mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1007 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1008 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1009 s->filter.simple, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1010 |
11921 | 1011 if (mb->mode < MODE_I4x4) { |
1012 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); | |
1013 s->hpc.pred16x16[mode](dst[0], s->linesize); | |
1014 } else { | |
1015 uint8_t *ptr = dst[0]; | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1016 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb; |
11921 | 1017 |
1018 // all blocks on the right edge of the macroblock use bottom edge | |
1019 // the top macroblock for their topright edge | |
1020 uint8_t *tr_right = ptr - s->linesize + 16; | |
1021 | |
1022 // if we're on the right edge of the frame, said edge is extended | |
1023 // from the top macroblock | |
1024 if (mb_x == s->mb_width-1) { | |
1025 tr = tr_right[-1]*0x01010101; | |
1026 tr_right = (uint8_t *)&tr; | |
1027 } | |
1028 | |
12234
bba849c2a113
VP8: avoid a memset for non-i4x4 blocks with no coefficients
darkshikari
parents:
12233
diff
changeset
|
1029 if (mb->skip) |
bba849c2a113
VP8: avoid a memset for non-i4x4 blocks with no coefficients
darkshikari
parents:
12233
diff
changeset
|
1030 AV_ZERO128(s->non_zero_count_cache); |
bba849c2a113
VP8: avoid a memset for non-i4x4 blocks with no coefficients
darkshikari
parents:
12233
diff
changeset
|
1031 |
11921 | 1032 for (y = 0; y < 4; y++) { |
1033 uint8_t *topright = ptr + 4 - s->linesize; | |
1034 for (x = 0; x < 4; x++) { | |
1035 if (x == 3) | |
1036 topright = tr_right; | |
1037 | |
12221 | 1038 s->hpc.pred4x4[intra4x4[x]](ptr+4*x, topright, s->linesize); |
11921 | 1039 |
1040 nnz = s->non_zero_count_cache[y][x]; | |
1041 if (nnz) { | |
1042 if (nnz == 1) | |
1043 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); | |
1044 else | |
1045 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize); | |
1046 } | |
1047 topright += 4; | |
1048 } | |
1049 | |
1050 ptr += 4*s->linesize; | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1051 intra4x4 += 4; |
11921 | 1052 } |
1053 } | |
1054 | |
1055 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); | |
1056 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); | |
1057 s->hpc.pred8x8[mode](dst[2], s->uvlinesize); | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1058 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1059 if (s->deblock_filter || !mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1060 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1061 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1062 s->filter.simple, 0); |
11921 | 1063 } |
1064 | |
1065 /** | |
1066 * Generic MC function. | |
1067 * | |
1068 * @param s VP8 decoding context | |
1069 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes | |
1070 * @param dst target buffer for block data at block position | |
1071 * @param src reference picture buffer at origin (0, 0) | |
1072 * @param mv motion vector (relative to block position) to get pixel data from | |
1073 * @param x_off horizontal position of block from origin (0, 0) | |
1074 * @param y_off vertical position of block from origin (0, 0) | |
1075 * @param block_w width of block (16, 8 or 4) | |
1076 * @param block_h height of block (always same as block_w) | |
1077 * @param width width of src/dst plane data | |
1078 * @param height height of src/dst plane data | |
1079 * @param linesize size of a single line of plane data, including padding | |
12115 | 1080 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) |
11921 | 1081 */ |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1082 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1083 void vp8_mc(VP8Context *s, int luma, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1084 uint8_t *dst, uint8_t *src, const VP56mv *mv, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1085 int x_off, int y_off, int block_w, int block_h, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1086 int width, int height, int linesize, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1087 vp8_mc_func mc_func[3][3]) |
11921 | 1088 { |
12218 | 1089 if (AV_RN32A(mv)) { |
1090 static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; | |
1091 int mx = (mv->x << luma)&7, mx_idx = idx[mx]; | |
1092 int my = (mv->y << luma)&7, my_idx = idx[my]; | |
11921 | 1093 |
12218 | 1094 x_off += mv->x >> (3 - luma); |
1095 y_off += mv->y >> (3 - luma); | |
11921 | 1096 |
12218 | 1097 // edge emulation |
1098 src += y_off * linesize + x_off; | |
1099 if (x_off < 2 || x_off >= width - block_w - 3 || | |
1100 y_off < 2 || y_off >= height - block_h - 3) { | |
1101 ff_emulated_edge_mc(s->edge_emu_buffer, src - 2 * linesize - 2, linesize, | |
1102 block_w + 5, block_h + 5, | |
1103 x_off - 2, y_off - 2, width, height); | |
1104 src = s->edge_emu_buffer + 2 + linesize * 2; | |
1105 } | |
1106 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); | |
1107 } else | |
1108 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); | |
11921 | 1109 } |
1110 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1111 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1112 void vp8_mc_part(VP8Context *s, uint8_t *dst[3], |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1113 AVFrame *ref_frame, int x_off, int y_off, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1114 int bx_off, int by_off, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1115 int block_w, int block_h, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1116 int width, int height, VP56mv *mv) |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1117 { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1118 VP56mv uvmv = *mv; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1119 |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1120 /* Y */ |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1121 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1122 ref_frame->data[0], mv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1123 block_w, block_h, width, height, s->linesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1124 s->put_pixels_tab[block_w == 8]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1125 |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1126 /* U/V */ |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1127 if (s->profile == 3) { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1128 uvmv.x &= ~7; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1129 uvmv.y &= ~7; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1130 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1131 x_off >>= 1; y_off >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1132 bx_off >>= 1; by_off >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1133 width >>= 1; height >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1134 block_w >>= 1; block_h >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1135 vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1136 ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1137 block_w, block_h, width, height, s->uvlinesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1138 s->put_pixels_tab[1 + (block_w == 4)]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1139 vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1140 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1141 block_w, block_h, width, height, s->uvlinesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1142 s->put_pixels_tab[1 + (block_w == 4)]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1143 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1144 |
12215 | 1145 /* Fetch pixels for estimated mv 4 macroblocks ahead. |
1146 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1147 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) |
12215 | 1148 { |
12237 | 1149 /* Don't prefetch refs that haven't been used very often this frame. */ |
1150 if (s->ref_count[ref-1] > (mb_xy >> 5)) { | |
12231 | 1151 int x_off = mb_x << 4, y_off = mb_y << 4; |
12369 | 1152 int mx = (mb->mv.x>>2) + x_off + 8; |
1153 int my = (mb->mv.y>>2) + y_off; | |
12231 | 1154 uint8_t **src= s->framep[ref]->data; |
1155 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64; | |
1156 s->dsp.prefetch(src[0]+off, s->linesize, 4); | |
1157 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64; | |
1158 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); | |
1159 } | |
12215 | 1160 } |
1161 | |
11921 | 1162 /** |
1163 * Apply motion vectors to prediction buffer, chapter 18. | |
1164 */ | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1165 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1166 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1167 int mb_x, int mb_y) |
11921 | 1168 { |
1169 int x_off = mb_x << 4, y_off = mb_y << 4; | |
1170 int width = 16*s->mb_width, height = 16*s->mb_height; | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1171 AVFrame *ref = s->framep[mb->ref_frame]; |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1172 VP56mv *bmv = mb->bmv; |
11921 | 1173 |
1174 if (mb->mode < VP8_MVMODE_SPLIT) { | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1175 vp8_mc_part(s, dst, ref, x_off, y_off, |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1176 0, 0, 16, 16, width, height, &mb->mv); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1177 } else switch (mb->partitioning) { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1178 case VP8_SPLITMVMODE_4x4: { |
11921 | 1179 int x, y; |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1180 VP56mv uvmv; |
11921 | 1181 |
1182 /* Y */ | |
1183 for (y = 0; y < 4; y++) { | |
1184 for (x = 0; x < 4; x++) { | |
1185 vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1186 ref->data[0], &bmv[4*y + x], |
11921 | 1187 4*x + x_off, 4*y + y_off, 4, 4, |
1188 width, height, s->linesize, | |
11974 | 1189 s->put_pixels_tab[2]); |
11921 | 1190 } |
1191 } | |
1192 | |
1193 /* U/V */ | |
1194 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; | |
1195 for (y = 0; y < 2; y++) { | |
1196 for (x = 0; x < 2; x++) { | |
1197 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x + | |
1198 mb->bmv[ 2*y * 4 + 2*x+1].x + | |
1199 mb->bmv[(2*y+1) * 4 + 2*x ].x + | |
1200 mb->bmv[(2*y+1) * 4 + 2*x+1].x; | |
1201 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y + | |
1202 mb->bmv[ 2*y * 4 + 2*x+1].y + | |
1203 mb->bmv[(2*y+1) * 4 + 2*x ].y + | |
1204 mb->bmv[(2*y+1) * 4 + 2*x+1].y; | |
11937
bc617cceacb1
avoid conditional and division in chroma MV calculation
stefang
parents:
11921
diff
changeset
|
1205 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2; |
bc617cceacb1
avoid conditional and division in chroma MV calculation
stefang
parents:
11921
diff
changeset
|
1206 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2; |
11921 | 1207 if (s->profile == 3) { |
1208 uvmv.x &= ~7; | |
1209 uvmv.y &= ~7; | |
1210 } | |
1211 vp8_mc(s, 0, dst[1] + 4*y*s->uvlinesize + x*4, | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1212 ref->data[1], &uvmv, |
11921 | 1213 4*x + x_off, 4*y + y_off, 4, 4, |
1214 width, height, s->uvlinesize, | |
11974 | 1215 s->put_pixels_tab[2]); |
11921 | 1216 vp8_mc(s, 0, dst[2] + 4*y*s->uvlinesize + x*4, |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1217 ref->data[2], &uvmv, |
11921 | 1218 4*x + x_off, 4*y + y_off, 4, 4, |
1219 width, height, s->uvlinesize, | |
11974 | 1220 s->put_pixels_tab[2]); |
11921 | 1221 } |
1222 } | |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1223 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1224 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1225 case VP8_SPLITMVMODE_16x8: |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1226 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1227 0, 0, 16, 8, width, height, &bmv[0]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1228 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1229 0, 8, 16, 8, width, height, &bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1230 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1231 case VP8_SPLITMVMODE_8x16: |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1232 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1233 0, 0, 8, 16, width, height, &bmv[0]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1234 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1235 8, 0, 8, 16, width, height, &bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1236 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1237 case VP8_SPLITMVMODE_8x8: |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1238 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1239 0, 0, 8, 8, width, height, &bmv[0]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1240 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1241 8, 0, 8, 8, width, height, &bmv[1]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1242 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1243 0, 8, 8, 8, width, height, &bmv[2]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1244 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1245 8, 8, 8, 8, width, height, &bmv[3]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1246 break; |
11921 | 1247 } |
1248 } | |
1249 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1250 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) |
11921 | 1251 { |
12240 | 1252 int x, y, ch; |
11921 | 1253 |
12238 | 1254 if (mb->mode != MODE_I4x4) { |
1255 uint8_t *y_dst = dst[0]; | |
11921 | 1256 for (y = 0; y < 4; y++) { |
12240 | 1257 uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[y]); |
1258 if (nnz4) { | |
1259 if (nnz4&~0x01010101) { | |
12238 | 1260 for (x = 0; x < 4; x++) { |
12240 | 1261 int nnz = s->non_zero_count_cache[y][x]; |
12238 | 1262 if (nnz) { |
1263 if (nnz == 1) | |
1264 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); | |
1265 else | |
1266 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); | |
1267 } | |
1268 } | |
1269 } else { | |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1270 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize); |
11921 | 1271 } |
1272 } | |
1273 y_dst += 4*s->linesize; | |
1274 } | |
12238 | 1275 } |
11921 | 1276 |
12238 | 1277 for (ch = 0; ch < 2; ch++) { |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1278 uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1279 if (nnz4) { |
12238 | 1280 uint8_t *ch_dst = dst[1+ch]; |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1281 if (nnz4&~0x01010101) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1282 for (y = 0; y < 2; y++) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1283 for (x = 0; x < 2; x++) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1284 int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1285 if (nnz) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1286 if (nnz == 1) |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1287 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1288 else |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1289 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1290 } |
12238 | 1291 } |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1292 ch_dst += 4*s->uvlinesize; |
12238 | 1293 } |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1294 } else { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1295 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize); |
11921 | 1296 } |
1297 } | |
1298 } | |
1299 } | |
1300 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1301 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) |
11921 | 1302 { |
1303 int interior_limit, filter_level; | |
1304 | |
1305 if (s->segmentation.enabled) { | |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
1306 filter_level = s->segmentation.filter_level[s->segment]; |
11921 | 1307 if (!s->segmentation.absolute_vals) |
1308 filter_level += s->filter.level; | |
1309 } else | |
1310 filter_level = s->filter.level; | |
1311 | |
1312 if (s->lf_delta.enabled) { | |
1313 filter_level += s->lf_delta.ref[mb->ref_frame]; | |
1314 | |
1315 if (mb->ref_frame == VP56_FRAME_CURRENT) { | |
1316 if (mb->mode == MODE_I4x4) | |
1317 filter_level += s->lf_delta.mode[0]; | |
1318 } else { | |
1319 if (mb->mode == VP8_MVMODE_ZERO) | |
1320 filter_level += s->lf_delta.mode[1]; | |
1321 else if (mb->mode == VP8_MVMODE_SPLIT) | |
1322 filter_level += s->lf_delta.mode[3]; | |
1323 else | |
1324 filter_level += s->lf_delta.mode[2]; | |
1325 } | |
1326 } | |
1327 filter_level = av_clip(filter_level, 0, 63); | |
1328 | |
1329 interior_limit = filter_level; | |
1330 if (s->filter.sharpness) { | |
1331 interior_limit >>= s->filter.sharpness > 4 ? 2 : 1; | |
1332 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); | |
1333 } | |
1334 interior_limit = FFMAX(interior_limit, 1); | |
1335 | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1336 f->filter_level = filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1337 f->inner_limit = interior_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1338 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; |
11921 | 1339 } |
1340 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1341 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) |
11921 | 1342 { |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1343 int mbedge_lim, bedge_lim, hev_thresh; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1344 int filter_level = f->filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1345 int inner_limit = f->inner_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1346 int inner_filter = f->inner_filter; |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1347 int linesize = s->linesize; |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1348 int uvlinesize = s->uvlinesize; |
11921 | 1349 |
1350 if (!filter_level) | |
1351 return; | |
1352 | |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1353 mbedge_lim = 2*(filter_level+2) + inner_limit; |
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1354 bedge_lim = 2* filter_level + inner_limit; |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1355 hev_thresh = filter_level >= 15; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1356 |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1357 if (s->keyframe) { |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1358 if (filter_level >= 40) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1359 hev_thresh = 2; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1360 } else { |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1361 if (filter_level >= 40) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1362 hev_thresh = 3; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1363 else if (filter_level >= 20) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1364 hev_thresh = 2; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1365 } |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1366 |
11921 | 1367 if (mb_x) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1368 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1369 mbedge_lim, inner_limit, hev_thresh); |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1370 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1371 mbedge_lim, inner_limit, hev_thresh); |
11921 | 1372 } |
1373 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1374 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1375 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1376 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1377 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1378 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1379 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1380 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1381 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1382 uvlinesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1383 inner_limit, hev_thresh); |
11921 | 1384 } |
1385 | |
1386 if (mb_y) { | |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1387 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1388 mbedge_lim, inner_limit, hev_thresh); |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1389 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1390 mbedge_lim, inner_limit, hev_thresh); |
11921 | 1391 } |
1392 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1393 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1394 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1395 linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1396 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1397 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1398 linesize, bedge_lim, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1399 inner_limit, hev_thresh); |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1400 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1401 linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1402 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1403 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1404 dst[2] + 4 * uvlinesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1405 uvlinesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1406 inner_limit, hev_thresh); |
11921 | 1407 } |
1408 } | |
1409 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1410 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) |
11921 | 1411 { |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1412 int mbedge_lim, bedge_lim; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1413 int filter_level = f->filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1414 int inner_limit = f->inner_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1415 int inner_filter = f->inner_filter; |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1416 int linesize = s->linesize; |
11921 | 1417 |
1418 if (!filter_level) | |
1419 return; | |
1420 | |
1421 mbedge_lim = 2*(filter_level+2) + inner_limit; | |
1422 bedge_lim = 2* filter_level + inner_limit; | |
1423 | |
1424 if (mb_x) | |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1425 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1426 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1427 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1428 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1429 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim); |
11921 | 1430 } |
1431 | |
1432 if (mb_y) | |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1433 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1434 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1435 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1436 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1437 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim); |
11921 | 1438 } |
1439 } | |
1440 | |
1441 static void filter_mb_row(VP8Context *s, int mb_y) | |
1442 { | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1443 VP8FilterStrength *f = s->filter_strength; |
11921 | 1444 uint8_t *dst[3] = { |
1445 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize, | |
1446 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize, | |
1447 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize | |
1448 }; | |
1449 int mb_x; | |
1450 | |
1451 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1452 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1453 filter_mb(s, dst, f++, mb_x, mb_y); |
11921 | 1454 dst[0] += 16; |
1455 dst[1] += 8; | |
1456 dst[2] += 8; | |
1457 } | |
1458 } | |
1459 | |
1460 static void filter_mb_row_simple(VP8Context *s, int mb_y) | |
1461 { | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1462 VP8FilterStrength *f = s->filter_strength; |
11921 | 1463 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize; |
1464 int mb_x; | |
1465 | |
1466 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1467 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1468 filter_mb_simple(s, dst, f++, mb_x, mb_y); |
11921 | 1469 dst += 16; |
1470 } | |
1471 } | |
1472 | |
1473 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |
1474 AVPacket *avpkt) | |
1475 { | |
1476 VP8Context *s = avctx->priv_data; | |
1477 int ret, mb_x, mb_y, i, y, referenced; | |
1478 enum AVDiscard skip_thresh; | |
12270
161c205dcdd2
Fix r24445: Instead of needlessly initialising a variable, silence the warning.
cehoyos
parents:
12255
diff
changeset
|
1479 AVFrame *av_uninit(curframe); |
11921 | 1480 |
1481 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) | |
1482 return ret; | |
1483 | |
1484 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT | |
1485 || s->update_altref == VP56_FRAME_CURRENT; | |
1486 | |
1487 skip_thresh = !referenced ? AVDISCARD_NONREF : | |
1488 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; | |
1489 | |
1490 if (avctx->skip_frame >= skip_thresh) { | |
1491 s->invisible = 1; | |
1492 goto skip_decode; | |
1493 } | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1494 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; |
11921 | 1495 |
1496 for (i = 0; i < 4; i++) | |
1497 if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1498 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1499 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { | |
1500 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i]; | |
1501 break; | |
1502 } | |
1503 if (curframe->data[0]) | |
1504 avctx->release_buffer(avctx, curframe); | |
1505 | |
1506 curframe->key_frame = s->keyframe; | |
1507 curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE; | |
1508 curframe->reference = referenced ? 3 : 0; | |
1509 if ((ret = avctx->get_buffer(avctx, curframe))) { | |
1510 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); | |
1511 return ret; | |
1512 } | |
1513 | |
1514 // Given that arithmetic probabilities are updated every frame, it's quite likely | |
1515 // that the values we have on a random interframe are complete junk if we didn't | |
1516 // start decode on a keyframe. So just don't display anything rather than junk. | |
1517 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || | |
1518 !s->framep[VP56_FRAME_GOLDEN] || | |
1519 !s->framep[VP56_FRAME_GOLDEN2])) { | |
1520 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); | |
1521 return AVERROR_INVALIDDATA; | |
1522 } | |
1523 | |
1524 s->linesize = curframe->linesize[0]; | |
1525 s->uvlinesize = curframe->linesize[1]; | |
1526 | |
1527 if (!s->edge_emu_buffer) | |
1528 s->edge_emu_buffer = av_malloc(21*s->linesize); | |
1529 | |
1530 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); | |
1531 | |
12383 | 1532 /* Zero macroblock structures for top/top-left prediction from outside the frame. */ |
1533 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks)); | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1534 |
11921 | 1535 // top edge of 127 for intra prediction |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1536 memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border)); |
12231 | 1537 memset(s->ref_count, 0, sizeof(s->ref_count)); |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1538 if (s->keyframe) |
12382
a0c84084fa2f
fix over-allocation. confused b4_stride with mb_width.
skal
parents:
12372
diff
changeset
|
1539 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4); |
11921 | 1540 |
1541 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | |
1542 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1543 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; |
12383 | 1544 int mb_xy = mb_y*s->mb_width; |
11921 | 1545 uint8_t *dst[3] = { |
1546 curframe->data[0] + 16*mb_y*s->linesize, | |
1547 curframe->data[1] + 8*mb_y*s->uvlinesize, | |
1548 curframe->data[2] + 8*mb_y*s->uvlinesize | |
1549 }; | |
1550 | |
12383 | 1551 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock |
11921 | 1552 memset(s->left_nnz, 0, sizeof(s->left_nnz)); |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1553 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101); |
11921 | 1554 |
1555 // left edge of 129 for intra prediction | |
1556 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) | |
1557 for (i = 0; i < 3; i++) | |
1558 for (y = 0; y < 16>>!!i; y++) | |
1559 dst[i][y*curframe->linesize[i]-1] = 129; | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1560 if (mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1561 memset(s->top_border, 129, sizeof(*s->top_border)); |
11921 | 1562 |
12237 | 1563 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { |
12215 | 1564 /* Prefetch the current frame, 4 MBs ahead */ |
1565 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); | |
1566 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); | |
1567 | |
12383 | 1568 decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy); |
11921 | 1569 |
12237 | 1570 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); |
12231 | 1571 |
11921 | 1572 if (!mb->skip) |
1573 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); | |
1574 | |
12225
c3e11b3108d7
Eliminate a pointless memset for intra blocks in P-frames in VP8
darkshikari
parents:
12224
diff
changeset
|
1575 if (mb->mode <= MODE_I4x4) |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1576 intra_predict(s, dst, mb, mb_x, mb_y); |
12225
c3e11b3108d7
Eliminate a pointless memset for intra blocks in P-frames in VP8
darkshikari
parents:
12224
diff
changeset
|
1577 else |
11921 | 1578 inter_predict(s, dst, mb, mb_x, mb_y); |
1579 | |
12237 | 1580 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); |
12231 | 1581 |
11921 | 1582 if (!mb->skip) { |
12238 | 1583 idct_mb(s, dst, mb); |
11921 | 1584 } else { |
1585 AV_ZERO64(s->left_nnz); | |
1586 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned | |
1587 | |
1588 // Reset DC block predictors if they would exist if the mb had coefficients | |
1589 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
1590 s->left_nnz[8] = 0; | |
1591 s->top_nnz[mb_x][8] = 0; | |
1592 } | |
1593 } | |
1594 | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1595 if (s->deblock_filter) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1596 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]); |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1597 |
12237 | 1598 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); |
12231 | 1599 |
11921 | 1600 dst[0] += 16; |
1601 dst[1] += 8; | |
1602 dst[2] += 8; | |
1603 } | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1604 if (s->deblock_filter) { |
11921 | 1605 if (s->filter.simple) |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1606 filter_mb_row_simple(s, mb_y); |
11921 | 1607 else |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1608 filter_mb_row(s, mb_y); |
11921 | 1609 } |
1610 } | |
1611 | |
1612 skip_decode: | |
1613 // if future frames don't use the updated probabilities, | |
1614 // reset them to the values we saved | |
1615 if (!s->update_probabilities) | |
1616 s->prob[0] = s->prob[1]; | |
1617 | |
1618 // check if golden and altref are swapped | |
1619 if (s->update_altref == VP56_FRAME_GOLDEN && | |
1620 s->update_golden == VP56_FRAME_GOLDEN2) | |
1621 FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]); | |
1622 else { | |
1623 if (s->update_altref != VP56_FRAME_NONE) | |
1624 s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; | |
1625 | |
1626 if (s->update_golden != VP56_FRAME_NONE) | |
1627 s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; | |
1628 } | |
1629 | |
1630 if (s->update_last) // move cur->prev | |
1631 s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT]; | |
1632 | |
1633 // release no longer referenced frames | |
1634 for (i = 0; i < 4; i++) | |
1635 if (s->frames[i].data[0] && | |
1636 &s->frames[i] != s->framep[VP56_FRAME_CURRENT] && | |
1637 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1638 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1639 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) | |
1640 avctx->release_buffer(avctx, &s->frames[i]); | |
1641 | |
1642 if (!s->invisible) { | |
1643 *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT]; | |
1644 *data_size = sizeof(AVFrame); | |
1645 } | |
1646 | |
1647 return avpkt->size; | |
1648 } | |
1649 | |
1650 static av_cold int vp8_decode_init(AVCodecContext *avctx) | |
1651 { | |
1652 VP8Context *s = avctx->priv_data; | |
1653 | |
1654 s->avctx = avctx; | |
1655 avctx->pix_fmt = PIX_FMT_YUV420P; | |
1656 | |
1657 dsputil_init(&s->dsp, avctx); | |
1658 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8); | |
1659 ff_vp8dsp_init(&s->vp8dsp); | |
1660 | |
1661 // intra pred needs edge emulation among other things | |
1662 if (avctx->flags&CODEC_FLAG_EMU_EDGE) { | |
11947 | 1663 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported\n"); |
11921 | 1664 return AVERROR_PATCHWELCOME; |
1665 } | |
1666 | |
1667 return 0; | |
1668 } | |
1669 | |
1670 static av_cold int vp8_decode_free(AVCodecContext *avctx) | |
1671 { | |
1672 vp8_decode_flush(avctx); | |
1673 return 0; | |
1674 } | |
1675 | |
1676 AVCodec vp8_decoder = { | |
1677 "vp8", | |
1678 AVMEDIA_TYPE_VIDEO, | |
1679 CODEC_ID_VP8, | |
1680 sizeof(VP8Context), | |
1681 vp8_decode_init, | |
1682 NULL, | |
1683 vp8_decode_free, | |
1684 vp8_decode_frame, | |
1685 CODEC_CAP_DR1, | |
1686 .flush = vp8_decode_flush, | |
1687 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), | |
1688 }; |