Mercurial > libavcodec.hg
annotate vp8.c @ 12340:2d15f62f4f8a libavcodec
VP8: move zeroing of luma DC block into the WHT
Lets us do the zeroing in asm instead of C.
Also makes it consistent with the way the regular iDCT code does it.
author | darkshikari |
---|---|
date | Mon, 02 Aug 2010 20:18:09 +0000 |
parents | 57fc7f2d7b28 |
children | b4c63ffd959b |
rev | line source |
---|---|
11921 | 1 /** |
2 * VP8 compatible video decoder | |
3 * | |
4 * Copyright (C) 2010 David Conrad | |
5 * Copyright (C) 2010 Ronald S. Bultje | |
12249 | 6 * Copyright (C) 2010 Jason Garrett-Glaser |
11921 | 7 * |
8 * This file is part of FFmpeg. | |
9 * | |
10 * FFmpeg is free software; you can redistribute it and/or | |
11 * modify it under the terms of the GNU Lesser General Public | |
12 * License as published by the Free Software Foundation; either | |
13 * version 2.1 of the License, or (at your option) any later version. | |
14 * | |
15 * FFmpeg is distributed in the hope that it will be useful, | |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 * Lesser General Public License for more details. | |
19 * | |
20 * You should have received a copy of the GNU Lesser General Public | |
21 * License along with FFmpeg; if not, write to the Free Software | |
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
23 */ | |
24 | |
25 #include "avcodec.h" | |
26 #include "vp56.h" | |
27 #include "vp8data.h" | |
28 #include "vp8dsp.h" | |
29 #include "h264pred.h" | |
30 #include "rectangle.h" | |
31 | |
32 typedef struct { | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
33 uint8_t filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
34 uint8_t inner_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
35 uint8_t inner_filter; |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
36 } VP8FilterStrength; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
37 |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
38 typedef struct { |
11921 | 39 uint8_t skip; |
40 // todo: make it possible to check for at least (i4x4 or split_mv) | |
41 // in one op. are others needed? | |
42 uint8_t mode; | |
43 uint8_t ref_frame; | |
44 uint8_t partitioning; | |
45 VP56mv mv; | |
46 VP56mv bmv[16]; | |
47 } VP8Macroblock; | |
48 | |
49 typedef struct { | |
50 AVCodecContext *avctx; | |
51 DSPContext dsp; | |
52 VP8DSPContext vp8dsp; | |
53 H264PredContext hpc; | |
11974 | 54 vp8_mc_func put_pixels_tab[3][3][3]; |
11921 | 55 AVFrame frames[4]; |
56 AVFrame *framep[4]; | |
57 uint8_t *edge_emu_buffer; | |
58 VP56RangeCoder c; ///< header context, includes mb modes and motion vectors | |
59 int profile; | |
60 | |
61 int mb_width; /* number of horizontal MB */ | |
62 int mb_height; /* number of vertical MB */ | |
63 int linesize; | |
64 int uvlinesize; | |
65 | |
66 int keyframe; | |
67 int invisible; | |
68 int update_last; ///< update VP56_FRAME_PREVIOUS with the current one | |
69 int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so | |
70 int update_altref; | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
71 int deblock_filter; |
11921 | 72 |
73 /** | |
74 * If this flag is not set, all the probability updates | |
75 * are discarded after this frame is decoded. | |
76 */ | |
77 int update_probabilities; | |
78 | |
79 /** | |
80 * All coefficients are contained in separate arith coding contexts. | |
81 * There can be 1, 2, 4, or 8 of these after the header context. | |
82 */ | |
83 int num_coeff_partitions; | |
84 VP56RangeCoder coeff_partition[8]; | |
85 | |
86 VP8Macroblock *macroblocks; | |
87 VP8Macroblock *macroblocks_base; | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
88 VP8FilterStrength *filter_strength; |
11921 | 89 int mb_stride; |
90 | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
91 uint8_t *intra4x4_pred_mode_top; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
92 uint8_t intra4x4_pred_mode_left[4]; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
93 uint8_t *segmentation_map; |
11921 | 94 int b4_stride; |
95 | |
96 /** | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
97 * Cache of the top row needed for intra prediction |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
98 * 16 for luma, 8 for each chroma plane |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
99 */ |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
100 uint8_t (*top_border)[16+8+8]; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
101 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
102 /** |
11921 | 103 * For coeff decode, we need to know whether the above block had non-zero |
104 * coefficients. This means for each macroblock, we need data for 4 luma | |
105 * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 | |
106 * per macroblock. We keep the last row in top_nnz. | |
107 */ | |
108 uint8_t (*top_nnz)[9]; | |
109 DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; | |
110 | |
111 /** | |
112 * This is the index plus one of the last non-zero coeff | |
113 * for each of the blocks in the current macroblock. | |
114 * So, 0 -> no coeffs | |
115 * 1 -> dc-only (special transform) | |
116 * 2+-> full transform | |
117 */ | |
118 DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; | |
119 DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; | |
12340
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12339
diff
changeset
|
120 DECLARE_ALIGNED(16, DCTELEM, block_dc)[16]; |
12221 | 121 uint8_t intra4x4_pred_mode_mb[16]; |
11921 | 122 |
123 int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock | |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
124 int segment; ///< segment of the current macroblock |
11921 | 125 |
126 int mbskip_enabled; | |
127 int sign_bias[4]; ///< one state [0, 1] per ref frame type | |
12231 | 128 int ref_count[3]; |
11921 | 129 |
130 /** | |
131 * Base parameters for segmentation, i.e. per-macroblock parameters. | |
132 * These must be kept unchanged even if segmentation is not used for | |
133 * a frame, since the values persist between interframes. | |
134 */ | |
135 struct { | |
136 int enabled; | |
137 int absolute_vals; | |
138 int update_map; | |
139 int8_t base_quant[4]; | |
140 int8_t filter_level[4]; ///< base loop filter level | |
141 } segmentation; | |
142 | |
143 /** | |
144 * Macroblocks can have one of 4 different quants in a frame when | |
145 * segmentation is enabled. | |
146 * If segmentation is disabled, only the first segment's values are used. | |
147 */ | |
148 struct { | |
149 // [0] - DC qmul [1] - AC qmul | |
150 int16_t luma_qmul[2]; | |
151 int16_t luma_dc_qmul[2]; ///< luma dc-only block quant | |
152 int16_t chroma_qmul[2]; | |
153 } qmat[4]; | |
154 | |
155 struct { | |
156 int simple; | |
157 int level; | |
158 int sharpness; | |
159 } filter; | |
160 | |
161 struct { | |
162 int enabled; ///< whether each mb can have a different strength based on mode/ref | |
163 | |
164 /** | |
165 * filter strength adjustment for the following macroblock modes: | |
166 * [0] - i4x4 | |
167 * [1] - zero mv | |
168 * [2] - inter modes except for zero or split mv | |
169 * [3] - split mv | |
170 * i16x16 modes never have any adjustment | |
171 */ | |
172 int8_t mode[4]; | |
173 | |
174 /** | |
175 * filter strength adjustment for macroblocks that reference: | |
176 * [0] - intra / VP56_FRAME_CURRENT | |
177 * [1] - VP56_FRAME_PREVIOUS | |
178 * [2] - VP56_FRAME_GOLDEN | |
179 * [3] - altref / VP56_FRAME_GOLDEN2 | |
180 */ | |
181 int8_t ref[4]; | |
182 } lf_delta; | |
183 | |
184 /** | |
185 * These are all of the updatable probabilities for binary decisions. | |
186 * They are only implictly reset on keyframes, making it quite likely | |
187 * for an interframe to desync if a prior frame's header was corrupt | |
188 * or missing outright! | |
189 */ | |
190 struct { | |
191 uint8_t segmentid[3]; | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
192 uint8_t mbskip; |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
193 uint8_t intra; |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
194 uint8_t last; |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
195 uint8_t golden; |
11921 | 196 uint8_t pred16x16[4]; |
197 uint8_t pred8x8c[3]; | |
198 uint8_t token[4][8][3][NUM_DCT_TOKENS-1]; | |
199 uint8_t mvc[2][19]; | |
200 } prob[2]; | |
201 } VP8Context; | |
202 | |
203 static void vp8_decode_flush(AVCodecContext *avctx) | |
204 { | |
205 VP8Context *s = avctx->priv_data; | |
206 int i; | |
207 | |
208 for (i = 0; i < 4; i++) | |
209 if (s->frames[i].data[0]) | |
210 avctx->release_buffer(avctx, &s->frames[i]); | |
211 memset(s->framep, 0, sizeof(s->framep)); | |
212 | |
213 av_freep(&s->macroblocks_base); | |
12271 | 214 av_freep(&s->filter_strength); |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
215 av_freep(&s->intra4x4_pred_mode_top); |
11921 | 216 av_freep(&s->top_nnz); |
217 av_freep(&s->edge_emu_buffer); | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
218 av_freep(&s->top_border); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
219 av_freep(&s->segmentation_map); |
11921 | 220 |
221 s->macroblocks = NULL; | |
222 } | |
223 | |
224 static int update_dimensions(VP8Context *s, int width, int height) | |
225 { | |
226 if (avcodec_check_dimensions(s->avctx, width, height)) | |
227 return AVERROR_INVALIDDATA; | |
228 | |
229 vp8_decode_flush(s->avctx); | |
230 | |
231 avcodec_set_dimensions(s->avctx, width, height); | |
232 | |
233 s->mb_width = (s->avctx->coded_width +15) / 16; | |
234 s->mb_height = (s->avctx->coded_height+15) / 16; | |
235 | |
236 // we allocate a border around the top/left of intra4x4 modes | |
237 // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle | |
238 s->mb_stride = s->mb_width+1; | |
239 s->b4_stride = 4*s->mb_stride; | |
240 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
241 s->macroblocks_base = av_mallocz((s->mb_stride+s->mb_height*2+2)*sizeof(*s->macroblocks)); |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
242 s->filter_strength = av_mallocz(s->mb_stride*sizeof(*s->filter_strength)); |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
243 s->intra4x4_pred_mode_top = av_mallocz(s->b4_stride*4); |
11921 | 244 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
245 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
246 s->segmentation_map = av_mallocz(s->mb_stride*s->mb_height); |
11921 | 247 |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
248 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top || |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
249 !s->top_nnz || !s->top_border || !s->segmentation_map) |
12169 | 250 return AVERROR(ENOMEM); |
251 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
252 s->macroblocks = s->macroblocks_base + 1; |
11921 | 253 |
254 return 0; | |
255 } | |
256 | |
257 static void parse_segment_info(VP8Context *s) | |
258 { | |
259 VP56RangeCoder *c = &s->c; | |
260 int i; | |
261 | |
262 s->segmentation.update_map = vp8_rac_get(c); | |
263 | |
264 if (vp8_rac_get(c)) { // update segment feature data | |
265 s->segmentation.absolute_vals = vp8_rac_get(c); | |
266 | |
267 for (i = 0; i < 4; i++) | |
268 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); | |
269 | |
270 for (i = 0; i < 4; i++) | |
271 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); | |
272 } | |
273 if (s->segmentation.update_map) | |
274 for (i = 0; i < 3; i++) | |
275 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; | |
276 } | |
277 | |
278 static void update_lf_deltas(VP8Context *s) | |
279 { | |
280 VP56RangeCoder *c = &s->c; | |
281 int i; | |
282 | |
283 for (i = 0; i < 4; i++) | |
284 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6); | |
285 | |
286 for (i = 0; i < 4; i++) | |
287 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6); | |
288 } | |
289 | |
290 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) | |
291 { | |
292 const uint8_t *sizes = buf; | |
293 int i; | |
294 | |
295 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); | |
296 | |
297 buf += 3*(s->num_coeff_partitions-1); | |
298 buf_size -= 3*(s->num_coeff_partitions-1); | |
299 if (buf_size < 0) | |
300 return -1; | |
301 | |
302 for (i = 0; i < s->num_coeff_partitions-1; i++) { | |
12247
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
303 int size = AV_RL24(sizes + 3*i); |
11921 | 304 if (buf_size - size < 0) |
305 return -1; | |
306 | |
307 vp56_init_range_decoder(&s->coeff_partition[i], buf, size); | |
308 buf += size; | |
309 buf_size -= size; | |
310 } | |
311 vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); | |
312 | |
313 return 0; | |
314 } | |
315 | |
316 static void get_quants(VP8Context *s) | |
317 { | |
318 VP56RangeCoder *c = &s->c; | |
319 int i, base_qi; | |
320 | |
321 int yac_qi = vp8_rac_get_uint(c, 7); | |
322 int ydc_delta = vp8_rac_get_sint(c, 4); | |
323 int y2dc_delta = vp8_rac_get_sint(c, 4); | |
324 int y2ac_delta = vp8_rac_get_sint(c, 4); | |
325 int uvdc_delta = vp8_rac_get_sint(c, 4); | |
326 int uvac_delta = vp8_rac_get_sint(c, 4); | |
327 | |
328 for (i = 0; i < 4; i++) { | |
329 if (s->segmentation.enabled) { | |
330 base_qi = s->segmentation.base_quant[i]; | |
331 if (!s->segmentation.absolute_vals) | |
332 base_qi += yac_qi; | |
333 } else | |
334 base_qi = yac_qi; | |
335 | |
336 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; | |
337 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; | |
338 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
339 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
340 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; |
11921 | 341 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
342 |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
343 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
344 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); |
11921 | 345 } |
346 } | |
347 | |
348 /** | |
349 * Determine which buffers golden and altref should be updated with after this frame. | |
350 * The spec isn't clear here, so I'm going by my understanding of what libvpx does | |
351 * | |
352 * Intra frames update all 3 references | |
353 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set | |
354 * If the update (golden|altref) flag is set, it's updated with the current frame | |
355 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. | |
356 * If the flag is not set, the number read means: | |
357 * 0: no update | |
358 * 1: VP56_FRAME_PREVIOUS | |
359 * 2: update golden with altref, or update altref with golden | |
360 */ | |
361 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) | |
362 { | |
363 VP56RangeCoder *c = &s->c; | |
364 | |
365 if (update) | |
366 return VP56_FRAME_CURRENT; | |
367 | |
368 switch (vp8_rac_get_uint(c, 2)) { | |
369 case 1: | |
370 return VP56_FRAME_PREVIOUS; | |
371 case 2: | |
372 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; | |
373 } | |
374 return VP56_FRAME_NONE; | |
375 } | |
376 | |
377 static void update_refs(VP8Context *s) | |
378 { | |
379 VP56RangeCoder *c = &s->c; | |
380 | |
381 int update_golden = vp8_rac_get(c); | |
382 int update_altref = vp8_rac_get(c); | |
383 | |
384 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); | |
385 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); | |
386 } | |
387 | |
388 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) | |
389 { | |
390 VP56RangeCoder *c = &s->c; | |
391 int header_size, hscale, vscale, i, j, k, l, ret; | |
392 int width = s->avctx->width; | |
393 int height = s->avctx->height; | |
394 | |
395 s->keyframe = !(buf[0] & 1); | |
396 s->profile = (buf[0]>>1) & 7; | |
397 s->invisible = !(buf[0] & 0x10); | |
12247
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
398 header_size = AV_RL24(buf) >> 5; |
11921 | 399 buf += 3; |
400 buf_size -= 3; | |
401 | |
11974 | 402 if (s->profile > 3) |
403 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); | |
404 | |
405 if (!s->profile) | |
406 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); | |
407 else // profile 1-3 use bilinear, 4+ aren't defined so whatever | |
408 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab)); | |
11921 | 409 |
410 if (header_size > buf_size - 7*s->keyframe) { | |
411 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); | |
412 return AVERROR_INVALIDDATA; | |
413 } | |
414 | |
415 if (s->keyframe) { | |
12247
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
416 if (AV_RL24(buf) != 0x2a019d) { |
50a96623366b
VP8: use AV_RL24 instead of defining a new RL24.
darkshikari
parents:
12246
diff
changeset
|
417 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf)); |
11921 | 418 return AVERROR_INVALIDDATA; |
419 } | |
420 width = AV_RL16(buf+3) & 0x3fff; | |
421 height = AV_RL16(buf+5) & 0x3fff; | |
422 hscale = buf[4] >> 6; | |
423 vscale = buf[6] >> 6; | |
424 buf += 7; | |
425 buf_size -= 7; | |
426 | |
11970
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
427 if (hscale || vscale) |
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
428 av_log_missing_feature(s->avctx, "Upscaling", 1); |
c7953ee47af4
vp8: warn and request sample if upscaling specified in header
mru
parents:
11950
diff
changeset
|
429 |
11921 | 430 s->update_golden = s->update_altref = VP56_FRAME_CURRENT; |
431 memcpy(s->prob->token , vp8_token_default_probs , sizeof(s->prob->token)); | |
432 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16)); | |
433 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c)); | |
434 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc)); | |
435 memset(&s->segmentation, 0, sizeof(s->segmentation)); | |
436 } | |
437 | |
438 if (!s->macroblocks_base || /* first frame */ | |
439 width != s->avctx->width || height != s->avctx->height) { | |
440 if ((ret = update_dimensions(s, width, height) < 0)) | |
441 return ret; | |
442 } | |
443 | |
444 vp56_init_range_decoder(c, buf, header_size); | |
445 buf += header_size; | |
446 buf_size -= header_size; | |
447 | |
448 if (s->keyframe) { | |
449 if (vp8_rac_get(c)) | |
450 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); | |
451 vp8_rac_get(c); // whether we can skip clamping in dsp functions | |
452 } | |
453 | |
454 if ((s->segmentation.enabled = vp8_rac_get(c))) | |
455 parse_segment_info(s); | |
456 else | |
457 s->segmentation.update_map = 0; // FIXME: move this to some init function? | |
458 | |
459 s->filter.simple = vp8_rac_get(c); | |
460 s->filter.level = vp8_rac_get_uint(c, 6); | |
461 s->filter.sharpness = vp8_rac_get_uint(c, 3); | |
462 | |
463 if ((s->lf_delta.enabled = vp8_rac_get(c))) | |
464 if (vp8_rac_get(c)) | |
465 update_lf_deltas(s); | |
466 | |
467 if (setup_partitions(s, buf, buf_size)) { | |
468 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); | |
469 return AVERROR_INVALIDDATA; | |
470 } | |
471 | |
472 get_quants(s); | |
473 | |
474 if (!s->keyframe) { | |
475 update_refs(s); | |
476 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); | |
477 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); | |
478 } | |
479 | |
480 // if we aren't saving this frame's probabilities for future frames, | |
481 // make a copy of the current probabilities | |
482 if (!(s->update_probabilities = vp8_rac_get(c))) | |
483 s->prob[1] = s->prob[0]; | |
484 | |
485 s->update_last = s->keyframe || vp8_rac_get(c); | |
486 | |
487 for (i = 0; i < 4; i++) | |
488 for (j = 0; j < 8; j++) | |
489 for (k = 0; k < 3; k++) | |
490 for (l = 0; l < NUM_DCT_TOKENS-1; l++) | |
12254
17c151e1280a
VP8: Use vp56_rac_get_prob_branchy when the bit is only used by an if()
conrad
parents:
12253
diff
changeset
|
491 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) |
11921 | 492 s->prob->token[i][j][k][l] = vp8_rac_get_uint(c, 8); |
493 | |
494 if ((s->mbskip_enabled = vp8_rac_get(c))) | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
495 s->prob->mbskip = vp8_rac_get_uint(c, 8); |
11921 | 496 |
497 if (!s->keyframe) { | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
498 s->prob->intra = vp8_rac_get_uint(c, 8); |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
499 s->prob->last = vp8_rac_get_uint(c, 8); |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
500 s->prob->golden = vp8_rac_get_uint(c, 8); |
11921 | 501 |
502 if (vp8_rac_get(c)) | |
503 for (i = 0; i < 4; i++) | |
504 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); | |
505 if (vp8_rac_get(c)) | |
506 for (i = 0; i < 3; i++) | |
507 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); | |
508 | |
509 // 17.2 MV probability update | |
510 for (i = 0; i < 2; i++) | |
511 for (j = 0; j < 19; j++) | |
12254
17c151e1280a
VP8: Use vp56_rac_get_prob_branchy when the bit is only used by an if()
conrad
parents:
12253
diff
changeset
|
512 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) |
11921 | 513 s->prob->mvc[i][j] = vp8_rac_get_nn(c); |
514 } | |
515 | |
516 return 0; | |
517 } | |
518 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
519 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
520 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) |
11921 | 521 { |
522 #define MARGIN (16 << 2) | |
523 dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), | |
524 ((s->mb_width - 1 - mb_x) << 6) + MARGIN); | |
525 dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN), | |
526 ((s->mb_height - 1 - mb_y) << 6) + MARGIN); | |
527 } | |
528 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
529 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
530 void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
531 VP56mv near[2], VP56mv *best, uint8_t cnt[4]) |
11921 | 532 { |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
533 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
534 mb - 1 /* left */, |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
535 mb + 1 /* top-left */ }; |
11921 | 536 enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; |
537 VP56mv near_mv[4] = {{ 0 }}; | |
538 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; | |
12217 | 539 int idx = CNT_ZERO; |
11921 | 540 int best_idx = CNT_ZERO; |
12217 | 541 int cur_sign_bias = s->sign_bias[mb->ref_frame]; |
542 int *sign_bias = s->sign_bias; | |
11921 | 543 |
544 /* Process MB on top, left and top-left */ | |
12217 | 545 #define MV_EDGE_CHECK(n)\ |
546 {\ | |
547 VP8Macroblock *edge = mb_edge[n];\ | |
548 int edge_ref = edge->ref_frame;\ | |
549 if (edge_ref != VP56_FRAME_CURRENT) {\ | |
550 uint32_t mv = AV_RN32A(&edge->mv);\ | |
551 if (mv) {\ | |
552 if (cur_sign_bias != sign_bias[edge_ref]) {\ | |
553 /* SWAR negate of the values in mv. */\ | |
12242 | 554 mv = ~mv;\ |
555 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ | |
12217 | 556 }\ |
557 if (!n || mv != AV_RN32A(&near_mv[idx]))\ | |
558 AV_WN32A(&near_mv[++idx], mv);\ | |
559 cnt[idx] += 1 + (n != 2);\ | |
560 } else\ | |
561 cnt[CNT_ZERO] += 1 + (n != 2);\ | |
562 }\ | |
11921 | 563 } |
12217 | 564 MV_EDGE_CHECK(0) |
565 MV_EDGE_CHECK(1) | |
566 MV_EDGE_CHECK(2) | |
11921 | 567 |
12217 | 568 /* If we have three distinct MVs, merge first and last if they're the same */ |
569 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT])) | |
11921 | 570 cnt[CNT_NEAREST] += 1; |
571 | |
572 cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + | |
573 (mb_edge[EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + | |
574 (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); | |
575 | |
576 /* Swap near and nearest if necessary */ | |
577 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { | |
12217 | 578 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); |
579 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); | |
11921 | 580 } |
581 | |
582 /* Choose the best mv out of 0,0 and the nearest mv */ | |
583 if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO]) | |
584 best_idx = CNT_NEAREST; | |
585 | |
12246 | 586 mb->mv = near_mv[best_idx]; |
11921 | 587 near[0] = near_mv[CNT_NEAREST]; |
588 near[1] = near_mv[CNT_NEAR]; | |
589 } | |
590 | |
591 /** | |
592 * Motion vector coding, 17.1. | |
593 */ | |
594 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) | |
595 { | |
12255 | 596 int bit, x = 0; |
11921 | 597 |
12254
17c151e1280a
VP8: Use vp56_rac_get_prob_branchy when the bit is only used by an if()
conrad
parents:
12253
diff
changeset
|
598 if (vp56_rac_get_prob_branchy(c, p[0])) { |
11921 | 599 int i; |
600 | |
601 for (i = 0; i < 3; i++) | |
602 x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
603 for (i = 9; i > 3; i--) | |
604 x += vp56_rac_get_prob(c, p[9 + i]) << i; | |
605 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12])) | |
606 x += 8; | |
12255 | 607 } else { |
608 // small_mvtree | |
609 const uint8_t *ps = p+2; | |
610 bit = vp56_rac_get_prob(c, *ps); | |
611 ps += 1 + 3*bit; | |
612 x += 4*bit; | |
613 bit = vp56_rac_get_prob(c, *ps); | |
614 ps += 1 + bit; | |
615 x += 2*bit; | |
616 x += vp56_rac_get_prob(c, *ps); | |
617 } | |
11921 | 618 |
619 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; | |
620 } | |
621 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
622 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
623 const uint8_t *get_submv_prob(uint32_t left, uint32_t top) |
11921 | 624 { |
12219 | 625 if (left == top) |
626 return vp8_submv_prob[4-!!left]; | |
627 if (!top) | |
11921 | 628 return vp8_submv_prob[2]; |
12219 | 629 return vp8_submv_prob[1-!!left]; |
11921 | 630 } |
631 | |
632 /** | |
633 * Split motion vector prediction, 16.4. | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
634 * @returns the number of motion vectors parsed (2, 4 or 16) |
11921 | 635 */ |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
636 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
637 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) |
11921 | 638 { |
639 int part_idx = mb->partitioning = | |
640 vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); | |
641 int n, num = vp8_mbsplit_count[part_idx]; | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
642 VP8Macroblock *top_mb = &mb[2]; |
12219 | 643 VP8Macroblock *left_mb = &mb[-1]; |
644 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning], | |
645 *mbsplits_top = vp8_mbsplits[top_mb->partitioning], | |
646 *mbsplits_cur = vp8_mbsplits[part_idx], | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
647 *firstidx = vp8_mbfirstidx[part_idx]; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
648 VP56mv *top_mv = top_mb->bmv; |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
649 VP56mv *left_mv = left_mb->bmv; |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
650 VP56mv *cur_mv = mb->bmv; |
11921 | 651 |
652 for (n = 0; n < num; n++) { | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
653 int k = firstidx[n]; |
12219 | 654 uint32_t left, above; |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
655 const uint8_t *submv_prob; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
656 |
12219 | 657 if (!(k & 3)) |
658 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); | |
659 else | |
660 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); | |
661 if (k <= 3) | |
662 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); | |
663 else | |
664 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]); | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
665 |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
666 submv_prob = get_submv_prob(left, above); |
11921 | 667 |
668 switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) { | |
669 case VP8_SUBMVMODE_NEW4X4: | |
12246 | 670 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]); |
671 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]); | |
11921 | 672 break; |
673 case VP8_SUBMVMODE_ZERO4X4: | |
12245
ca82c3ce90c1
VP8: use AV_ZERO32 instead of AV_WN32A where relevant
darkshikari
parents:
12244
diff
changeset
|
674 AV_ZERO32(&mb->bmv[n]); |
11921 | 675 break; |
676 case VP8_SUBMVMODE_LEFT4X4: | |
12219 | 677 AV_WN32A(&mb->bmv[n], left); |
11921 | 678 break; |
679 case VP8_SUBMVMODE_TOP4X4: | |
12219 | 680 AV_WN32A(&mb->bmv[n], above); |
11921 | 681 break; |
682 } | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
683 } |
11921 | 684 |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
685 return num; |
11921 | 686 } |
687 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
688 static av_always_inline |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
689 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
690 int mb_x, int keyframe) |
11921 | 691 { |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
692 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb; |
12221 | 693 if (keyframe) { |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
694 int x, y; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
695 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
696 uint8_t* const left = s->intra4x4_pred_mode_left; |
12221 | 697 for (y = 0; y < 4; y++) { |
698 for (x = 0; x < 4; x++) { | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
699 const uint8_t *ctx; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
700 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
701 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
702 left[y] = top[x] = *intra4x4; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
703 intra4x4++; |
11921 | 704 } |
705 } | |
12221 | 706 } else { |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
707 int i; |
12221 | 708 for (i = 0; i < 16; i++) |
709 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); | |
11921 | 710 } |
711 } | |
712 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
713 static av_always_inline |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
714 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment) |
11921 | 715 { |
716 VP56RangeCoder *c = &s->c; | |
717 | |
718 if (s->segmentation.update_map) | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
719 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
720 s->segment = *segment; |
11921 | 721 |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
722 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; |
11921 | 723 |
724 if (s->keyframe) { | |
725 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); | |
726 | |
727 if (mb->mode == MODE_I4x4) { | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
728 decode_intra4x4_modes(s, c, mb_x, 1); |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
729 } else { |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
730 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u; |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
731 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
732 AV_WN32A(s->intra4x4_pred_mode_left, modes); |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
733 } |
11921 | 734 |
735 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); | |
736 mb->ref_frame = VP56_FRAME_CURRENT; | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
737 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { |
11921 | 738 VP56mv near[2], best; |
12217 | 739 uint8_t cnt[4] = { 0 }; |
11921 | 740 uint8_t p[4]; |
741 | |
742 // inter MB, 16.2 | |
12290
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
743 if (vp56_rac_get_prob_branchy(c, s->prob->last)) |
2a09b276db12
b0rk3d FATE + black helicopters hissing -> rolling back to r24556 and sleeping
skal
parents:
12289
diff
changeset
|
744 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? |
11921 | 745 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN; |
746 else | |
747 mb->ref_frame = VP56_FRAME_PREVIOUS; | |
12231 | 748 s->ref_count[mb->ref_frame-1]++; |
11921 | 749 |
750 // motion vectors, 16.3 | |
751 find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt); | |
12217 | 752 p[0] = vp8_mode_contexts[cnt[0]][0]; |
753 p[1] = vp8_mode_contexts[cnt[1]][1]; | |
754 p[2] = vp8_mode_contexts[cnt[2]][2]; | |
755 p[3] = vp8_mode_contexts[cnt[3]][3]; | |
11921 | 756 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p); |
757 switch (mb->mode) { | |
758 case VP8_MVMODE_SPLIT: | |
12246 | 759 clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); |
760 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1]; | |
11921 | 761 break; |
762 case VP8_MVMODE_ZERO: | |
12245
ca82c3ce90c1
VP8: use AV_ZERO32 instead of AV_WN32A where relevant
darkshikari
parents:
12244
diff
changeset
|
763 AV_ZERO32(&mb->mv); |
11921 | 764 break; |
765 case VP8_MVMODE_NEAREST: | |
766 clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y); | |
767 break; | |
768 case VP8_MVMODE_NEAR: | |
769 clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y); | |
770 break; | |
771 case VP8_MVMODE_NEW: | |
12246 | 772 clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); |
773 mb->mv.y += + read_mv_component(c, s->prob->mvc[0]); | |
774 mb->mv.x += + read_mv_component(c, s->prob->mvc[1]); | |
11921 | 775 break; |
776 } | |
777 if (mb->mode != VP8_MVMODE_SPLIT) { | |
11990
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
778 mb->partitioning = VP8_SPLITMVMODE_NONE; |
3c51d7ac41c9
Simplify MV parsing, removes laying out 2 or 4 (16x8/8x8/8x16) MVs over all
rbultje
parents:
11989
diff
changeset
|
779 mb->bmv[0] = mb->mv; |
11921 | 780 } |
781 } else { | |
782 // intra MB, 16.1 | |
783 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); | |
784 | |
12220
0f635b1f7861
Avoid useless fill_rectangle in P-frames in VP8
darkshikari
parents:
12219
diff
changeset
|
785 if (mb->mode == MODE_I4x4) |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
786 decode_intra4x4_modes(s, c, mb_x, 0); |
11921 | 787 |
788 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); | |
789 mb->ref_frame = VP56_FRAME_CURRENT; | |
12225
c3e11b3108d7
Eliminate a pointless memset for intra blocks in P-frames in VP8
darkshikari
parents:
12224
diff
changeset
|
790 mb->partitioning = VP8_SPLITMVMODE_NONE; |
12245
ca82c3ce90c1
VP8: use AV_ZERO32 instead of AV_WN32A where relevant
darkshikari
parents:
12244
diff
changeset
|
791 AV_ZERO32(&mb->bmv[0]); |
11921 | 792 } |
793 } | |
794 | |
795 /** | |
12115 | 796 * @param c arithmetic bitstream reader context |
797 * @param block destination for block coefficients | |
798 * @param probs probabilities to use when reading trees from the bitstream | |
11921 | 799 * @param i initial coeff index, 0 unless a separate DC block is coded |
800 * @param zero_nhood the initial prediction context for number of surrounding | |
801 * all-zero blocks (only left/top, so 0-2) | |
12062
372f7fed2806
Avoid square brackets in Doxygen comments; Doxygen chokes on them.
diego
parents:
11990
diff
changeset
|
802 * @param qmul array holding the dc/ac dequant factor at position 0/1 |
11921 | 803 * @return 0 if no coeffs were decoded |
804 * otherwise, the index of the last coeff decoded plus one | |
805 */ | |
806 static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], | |
807 uint8_t probs[8][3][NUM_DCT_TOKENS-1], | |
808 int i, int zero_nhood, int16_t qmul[2]) | |
809 { | |
12336 | 810 uint8_t *token_prob = probs[vp8_coeff_band[i]][zero_nhood]; |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
811 int nonzero = 0; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
812 int coeff; |
11921 | 813 |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
814 do { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
815 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
816 return nonzero; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
817 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
818 skip_eob: |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
819 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 |
12335 | 820 if (++i == 16) |
821 return nonzero; // invalid input; blocks should end with EOB | |
822 token_prob = probs[vp8_coeff_band[i]][0]; | |
823 goto skip_eob; | |
11921 | 824 } |
825 | |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
826 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
827 coeff = 1; |
12336 | 828 token_prob = probs[vp8_coeff_band[i+1]][1]; |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
829 } else { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
830 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
831 coeff = vp56_rac_get_prob(c, token_prob[4]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
832 if (coeff) |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
833 coeff += vp56_rac_get_prob(c, token_prob[5]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
834 coeff += 2; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
835 } else { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
836 // DCT_CAT* |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
837 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
838 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
839 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
840 } else { // DCT_CAT2 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
841 coeff = 7; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
842 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
843 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
844 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
845 } else { // DCT_CAT3 and up |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
846 int a = vp56_rac_get_prob(c, token_prob[8]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
847 int b = vp56_rac_get_prob(c, token_prob[9+a]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
848 int cat = (a<<1) + b; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
849 coeff = 3 + (8<<cat); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
850 coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
851 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
852 } |
12336 | 853 token_prob = probs[vp8_coeff_band[i+1]][2]; |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
854 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
855 |
11921 | 856 // todo: full [16] qmat? load into register? |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
857 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
858 nonzero = ++i; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
859 } while (i < 16); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12249
diff
changeset
|
860 |
11921 | 861 return nonzero; |
862 } | |
863 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
864 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
865 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
866 uint8_t t_nnz[9], uint8_t l_nnz[9]) |
11921 | 867 { |
868 int i, x, y, luma_start = 0, luma_ctx = 3; | |
869 int nnz_pred, nnz, nnz_total = 0; | |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
870 int segment = s->segment; |
11921 | 871 |
872 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
873 nnz_pred = t_nnz[8] + l_nnz[8]; | |
874 | |
875 // decode DC values and do hadamard | |
12340
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12339
diff
changeset
|
876 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred, |
11921 | 877 s->qmat[segment].luma_dc_qmul); |
878 l_nnz[8] = t_nnz[8] = !!nnz; | |
879 nnz_total += nnz; | |
12340
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12339
diff
changeset
|
880 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc); |
11921 | 881 luma_start = 1; |
882 luma_ctx = 0; | |
883 } | |
884 | |
885 // luma blocks | |
886 for (y = 0; y < 4; y++) | |
887 for (x = 0; x < 4; x++) { | |
888 nnz_pred = l_nnz[y] + t_nnz[x]; | |
889 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, | |
890 nnz_pred, s->qmat[segment].luma_qmul); | |
891 // nnz+luma_start may be one more than the actual last index, but we don't care | |
892 s->non_zero_count_cache[y][x] = nnz + luma_start; | |
893 t_nnz[x] = l_nnz[y] = !!nnz; | |
894 nnz_total += nnz; | |
895 } | |
896 | |
897 // chroma blocks | |
898 // TODO: what to do about dimensions? 2nd dim for luma is x, | |
899 // but for chroma it's (y<<1)|x | |
900 for (i = 4; i < 6; i++) | |
901 for (y = 0; y < 2; y++) | |
902 for (x = 0; x < 2; x++) { | |
903 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; | |
904 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0, | |
905 nnz_pred, s->qmat[segment].chroma_qmul); | |
906 s->non_zero_count_cache[i][(y<<1)+x] = nnz; | |
907 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; | |
908 nnz_total += nnz; | |
909 } | |
910 | |
911 // if there were no coded coeffs despite the macroblock not being marked skip, | |
912 // we MUST not do the inner loop filter and should not do IDCT | |
913 // Since skip isn't used for bitstream prediction, just manually set it. | |
914 if (!nnz_total) | |
915 mb->skip = 1; | |
916 } | |
917 | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
918 static av_always_inline |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
919 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
920 int linesize, int uvlinesize, int simple) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
921 { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
922 AV_COPY128(top_border, src_y + 15*linesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
923 if (!simple) { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
924 AV_COPY64(top_border+16, src_cb + 7*uvlinesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
925 AV_COPY64(top_border+24, src_cr + 7*uvlinesize); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
926 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
927 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
928 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
929 static av_always_inline |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
930 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
931 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
932 int simple, int xchg) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
933 { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
934 uint8_t *top_border_m1 = top_border-32; // for TL prediction |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
935 src_y -= linesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
936 src_cb -= uvlinesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
937 src_cr -= uvlinesize; |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
938 |
12202 | 939 #define XCHG(a,b,xchg) do { \ |
940 if (xchg) AV_SWAP64(b,a); \ | |
941 else AV_COPY64(b,a); \ | |
942 } while (0) | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
943 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
944 XCHG(top_border_m1+8, src_y-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
945 XCHG(top_border, src_y, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
946 XCHG(top_border+8, src_y+8, 1); |
12201
c4b53914f286
vp8: add do { } while(0) around XCHG() macro to avoid confusing if/else
mru
parents:
12200
diff
changeset
|
947 if (mb_x < mb_width-1) |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
948 XCHG(top_border+32, src_y+16, 1); |
12201
c4b53914f286
vp8: add do { } while(0) around XCHG() macro to avoid confusing if/else
mru
parents:
12200
diff
changeset
|
949 |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
950 // only copy chroma for normal loop filter |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
951 // or to initialize the top row to 127 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
952 if (!simple || !mb_y) { |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
953 XCHG(top_border_m1+16, src_cb-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
954 XCHG(top_border_m1+24, src_cr-8, xchg); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
955 XCHG(top_border+16, src_cb, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
956 XCHG(top_border+24, src_cr, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
957 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
958 } |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
959 |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
960 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
961 int check_intra_pred_mode(int mode, int mb_x, int mb_y) |
11921 | 962 { |
963 if (mode == DC_PRED8x8) { | |
12243
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
964 if (!mb_x) { |
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
965 mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; |
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
966 } else if (!mb_y) { |
12244 | 967 mode = LEFT_DC_PRED8x8; |
12243
788445bf10c0
VP8: shave a few clocks off check_intra_pred_mode
darkshikari
parents:
12242
diff
changeset
|
968 } |
11921 | 969 } |
970 return mode; | |
971 } | |
972 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
973 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
974 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
975 int mb_x, int mb_y) |
11921 | 976 { |
977 int x, y, mode, nnz, tr; | |
978 | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
979 // for the first row, we need to run xchg_mb_border to init the top edge to 127 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
980 // otherwise, skip it if we aren't going to deblock |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
981 if (s->deblock_filter || !mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
982 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
983 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
984 s->filter.simple, 1); |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
985 |
11921 | 986 if (mb->mode < MODE_I4x4) { |
987 mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); | |
988 s->hpc.pred16x16[mode](dst[0], s->linesize); | |
989 } else { | |
990 uint8_t *ptr = dst[0]; | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
991 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb; |
11921 | 992 |
993 // all blocks on the right edge of the macroblock use bottom edge | |
994 // the top macroblock for their topright edge | |
995 uint8_t *tr_right = ptr - s->linesize + 16; | |
996 | |
997 // if we're on the right edge of the frame, said edge is extended | |
998 // from the top macroblock | |
999 if (mb_x == s->mb_width-1) { | |
1000 tr = tr_right[-1]*0x01010101; | |
1001 tr_right = (uint8_t *)&tr; | |
1002 } | |
1003 | |
12234
bba849c2a113
VP8: avoid a memset for non-i4x4 blocks with no coefficients
darkshikari
parents:
12233
diff
changeset
|
1004 if (mb->skip) |
bba849c2a113
VP8: avoid a memset for non-i4x4 blocks with no coefficients
darkshikari
parents:
12233
diff
changeset
|
1005 AV_ZERO128(s->non_zero_count_cache); |
bba849c2a113
VP8: avoid a memset for non-i4x4 blocks with no coefficients
darkshikari
parents:
12233
diff
changeset
|
1006 |
11921 | 1007 for (y = 0; y < 4; y++) { |
1008 uint8_t *topright = ptr + 4 - s->linesize; | |
1009 for (x = 0; x < 4; x++) { | |
1010 if (x == 3) | |
1011 topright = tr_right; | |
1012 | |
12221 | 1013 s->hpc.pred4x4[intra4x4[x]](ptr+4*x, topright, s->linesize); |
11921 | 1014 |
1015 nnz = s->non_zero_count_cache[y][x]; | |
1016 if (nnz) { | |
1017 if (nnz == 1) | |
1018 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); | |
1019 else | |
1020 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize); | |
1021 } | |
1022 topright += 4; | |
1023 } | |
1024 | |
1025 ptr += 4*s->linesize; | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1026 intra4x4 += 4; |
11921 | 1027 } |
1028 } | |
1029 | |
1030 mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); | |
1031 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); | |
1032 s->hpc.pred8x8[mode](dst[2], s->uvlinesize); | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1033 |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1034 if (s->deblock_filter || !mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1035 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1036 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1037 s->filter.simple, 0); |
11921 | 1038 } |
1039 | |
1040 /** | |
1041 * Generic MC function. | |
1042 * | |
1043 * @param s VP8 decoding context | |
1044 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes | |
1045 * @param dst target buffer for block data at block position | |
1046 * @param src reference picture buffer at origin (0, 0) | |
1047 * @param mv motion vector (relative to block position) to get pixel data from | |
1048 * @param x_off horizontal position of block from origin (0, 0) | |
1049 * @param y_off vertical position of block from origin (0, 0) | |
1050 * @param block_w width of block (16, 8 or 4) | |
1051 * @param block_h height of block (always same as block_w) | |
1052 * @param width width of src/dst plane data | |
1053 * @param height height of src/dst plane data | |
1054 * @param linesize size of a single line of plane data, including padding | |
12115 | 1055 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) |
11921 | 1056 */ |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1057 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1058 void vp8_mc(VP8Context *s, int luma, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1059 uint8_t *dst, uint8_t *src, const VP56mv *mv, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1060 int x_off, int y_off, int block_w, int block_h, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1061 int width, int height, int linesize, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1062 vp8_mc_func mc_func[3][3]) |
11921 | 1063 { |
12218 | 1064 if (AV_RN32A(mv)) { |
1065 static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; | |
1066 int mx = (mv->x << luma)&7, mx_idx = idx[mx]; | |
1067 int my = (mv->y << luma)&7, my_idx = idx[my]; | |
11921 | 1068 |
12218 | 1069 x_off += mv->x >> (3 - luma); |
1070 y_off += mv->y >> (3 - luma); | |
11921 | 1071 |
12218 | 1072 // edge emulation |
1073 src += y_off * linesize + x_off; | |
1074 if (x_off < 2 || x_off >= width - block_w - 3 || | |
1075 y_off < 2 || y_off >= height - block_h - 3) { | |
1076 ff_emulated_edge_mc(s->edge_emu_buffer, src - 2 * linesize - 2, linesize, | |
1077 block_w + 5, block_h + 5, | |
1078 x_off - 2, y_off - 2, width, height); | |
1079 src = s->edge_emu_buffer + 2 + linesize * 2; | |
1080 } | |
1081 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); | |
1082 } else | |
1083 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); | |
11921 | 1084 } |
1085 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1086 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1087 void vp8_mc_part(VP8Context *s, uint8_t *dst[3], |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1088 AVFrame *ref_frame, int x_off, int y_off, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1089 int bx_off, int by_off, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1090 int block_w, int block_h, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1091 int width, int height, VP56mv *mv) |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1092 { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1093 VP56mv uvmv = *mv; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1094 |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1095 /* Y */ |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1096 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1097 ref_frame->data[0], mv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1098 block_w, block_h, width, height, s->linesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1099 s->put_pixels_tab[block_w == 8]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1100 |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1101 /* U/V */ |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1102 if (s->profile == 3) { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1103 uvmv.x &= ~7; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1104 uvmv.y &= ~7; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1105 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1106 x_off >>= 1; y_off >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1107 bx_off >>= 1; by_off >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1108 width >>= 1; height >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1109 block_w >>= 1; block_h >>= 1; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1110 vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1111 ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1112 block_w, block_h, width, height, s->uvlinesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1113 s->put_pixels_tab[1 + (block_w == 4)]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1114 vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1115 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1116 block_w, block_h, width, height, s->uvlinesize, |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1117 s->put_pixels_tab[1 + (block_w == 4)]); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1118 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1119 |
12215 | 1120 /* Fetch pixels for estimated mv 4 macroblocks ahead. |
1121 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1122 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) |
12215 | 1123 { |
12237 | 1124 /* Don't prefetch refs that haven't been used very often this frame. */ |
1125 if (s->ref_count[ref-1] > (mb_xy >> 5)) { | |
12231 | 1126 int x_off = mb_x << 4, y_off = mb_y << 4; |
1127 int mx = mb->mv.x + x_off + 8; | |
1128 int my = mb->mv.y + y_off; | |
1129 uint8_t **src= s->framep[ref]->data; | |
1130 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64; | |
1131 s->dsp.prefetch(src[0]+off, s->linesize, 4); | |
1132 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64; | |
1133 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); | |
1134 } | |
12215 | 1135 } |
1136 | |
11921 | 1137 /** |
1138 * Apply motion vectors to prediction buffer, chapter 18. | |
1139 */ | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1140 static av_always_inline |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1141 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1142 int mb_x, int mb_y) |
11921 | 1143 { |
1144 int x_off = mb_x << 4, y_off = mb_y << 4; | |
1145 int width = 16*s->mb_width, height = 16*s->mb_height; | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1146 AVFrame *ref = s->framep[mb->ref_frame]; |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1147 VP56mv *bmv = mb->bmv; |
11921 | 1148 |
1149 if (mb->mode < VP8_MVMODE_SPLIT) { | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1150 vp8_mc_part(s, dst, ref, x_off, y_off, |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1151 0, 0, 16, 16, width, height, &mb->mv); |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1152 } else switch (mb->partitioning) { |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1153 case VP8_SPLITMVMODE_4x4: { |
11921 | 1154 int x, y; |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1155 VP56mv uvmv; |
11921 | 1156 |
1157 /* Y */ | |
1158 for (y = 0; y < 4; y++) { | |
1159 for (x = 0; x < 4; x++) { | |
1160 vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1161 ref->data[0], &bmv[4*y + x], |
11921 | 1162 4*x + x_off, 4*y + y_off, 4, 4, |
1163 width, height, s->linesize, | |
11974 | 1164 s->put_pixels_tab[2]); |
11921 | 1165 } |
1166 } | |
1167 | |
1168 /* U/V */ | |
1169 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; | |
1170 for (y = 0; y < 2; y++) { | |
1171 for (x = 0; x < 2; x++) { | |
1172 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x + | |
1173 mb->bmv[ 2*y * 4 + 2*x+1].x + | |
1174 mb->bmv[(2*y+1) * 4 + 2*x ].x + | |
1175 mb->bmv[(2*y+1) * 4 + 2*x+1].x; | |
1176 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y + | |
1177 mb->bmv[ 2*y * 4 + 2*x+1].y + | |
1178 mb->bmv[(2*y+1) * 4 + 2*x ].y + | |
1179 mb->bmv[(2*y+1) * 4 + 2*x+1].y; | |
11937
bc617cceacb1
avoid conditional and division in chroma MV calculation
stefang
parents:
11921
diff
changeset
|
1180 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2; |
bc617cceacb1
avoid conditional and division in chroma MV calculation
stefang
parents:
11921
diff
changeset
|
1181 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2; |
11921 | 1182 if (s->profile == 3) { |
1183 uvmv.x &= ~7; | |
1184 uvmv.y &= ~7; | |
1185 } | |
1186 vp8_mc(s, 0, dst[1] + 4*y*s->uvlinesize + x*4, | |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1187 ref->data[1], &uvmv, |
11921 | 1188 4*x + x_off, 4*y + y_off, 4, 4, |
1189 width, height, s->uvlinesize, | |
11974 | 1190 s->put_pixels_tab[2]); |
11921 | 1191 vp8_mc(s, 0, dst[2] + 4*y*s->uvlinesize + x*4, |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1192 ref->data[2], &uvmv, |
11921 | 1193 4*x + x_off, 4*y + y_off, 4, 4, |
1194 width, height, s->uvlinesize, | |
11974 | 1195 s->put_pixels_tab[2]); |
11921 | 1196 } |
1197 } | |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1198 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1199 } |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1200 case VP8_SPLITMVMODE_16x8: |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1201 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1202 0, 0, 16, 8, width, height, &bmv[0]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1203 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1204 0, 8, 16, 8, width, height, &bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1205 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1206 case VP8_SPLITMVMODE_8x16: |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1207 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1208 0, 0, 8, 16, width, height, &bmv[0]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1209 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1210 8, 0, 8, 16, width, height, &bmv[1]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1211 break; |
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1212 case VP8_SPLITMVMODE_8x8: |
12228
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1213 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1214 0, 0, 8, 8, width, height, &bmv[0]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1215 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1216 8, 0, 8, 8, width, height, &bmv[1]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1217 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1218 0, 8, 8, 8, width, height, &bmv[2]); |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1219 vp8_mc_part(s, dst, ref, x_off, y_off, |
9c63566f623f
Eliminate some repeated dereferences in VP8 inter_predict
darkshikari
parents:
12225
diff
changeset
|
1220 8, 8, 8, 8, width, height, &bmv[3]); |
11989
176c5deb6756
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
rbultje
parents:
11974
diff
changeset
|
1221 break; |
11921 | 1222 } |
1223 } | |
1224 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1225 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) |
11921 | 1226 { |
12240 | 1227 int x, y, ch; |
11921 | 1228 |
12238 | 1229 if (mb->mode != MODE_I4x4) { |
1230 uint8_t *y_dst = dst[0]; | |
11921 | 1231 for (y = 0; y < 4; y++) { |
12240 | 1232 uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[y]); |
1233 if (nnz4) { | |
1234 if (nnz4&~0x01010101) { | |
12238 | 1235 for (x = 0; x < 4; x++) { |
12240 | 1236 int nnz = s->non_zero_count_cache[y][x]; |
12238 | 1237 if (nnz) { |
1238 if (nnz == 1) | |
1239 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); | |
1240 else | |
1241 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); | |
1242 } | |
1243 } | |
1244 } else { | |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1245 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize); |
11921 | 1246 } |
1247 } | |
1248 y_dst += 4*s->linesize; | |
1249 } | |
12238 | 1250 } |
11921 | 1251 |
12238 | 1252 for (ch = 0; ch < 2; ch++) { |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1253 uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1254 if (nnz4) { |
12238 | 1255 uint8_t *ch_dst = dst[1+ch]; |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1256 if (nnz4&~0x01010101) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1257 for (y = 0; y < 2; y++) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1258 for (x = 0; x < 2; x++) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1259 int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1260 if (nnz) { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1261 if (nnz == 1) |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1262 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1263 else |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1264 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1265 } |
12238 | 1266 } |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1267 ch_dst += 4*s->uvlinesize; |
12238 | 1268 } |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1269 } else { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12240
diff
changeset
|
1270 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize); |
11921 | 1271 } |
1272 } | |
1273 } | |
1274 } | |
1275 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1276 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) |
11921 | 1277 { |
1278 int interior_limit, filter_level; | |
1279 | |
1280 if (s->segmentation.enabled) { | |
12224
5b7d690b761b
VP8: Don't store segment in macroblock struct anymore.
darkshikari
parents:
12223
diff
changeset
|
1281 filter_level = s->segmentation.filter_level[s->segment]; |
11921 | 1282 if (!s->segmentation.absolute_vals) |
1283 filter_level += s->filter.level; | |
1284 } else | |
1285 filter_level = s->filter.level; | |
1286 | |
1287 if (s->lf_delta.enabled) { | |
1288 filter_level += s->lf_delta.ref[mb->ref_frame]; | |
1289 | |
1290 if (mb->ref_frame == VP56_FRAME_CURRENT) { | |
1291 if (mb->mode == MODE_I4x4) | |
1292 filter_level += s->lf_delta.mode[0]; | |
1293 } else { | |
1294 if (mb->mode == VP8_MVMODE_ZERO) | |
1295 filter_level += s->lf_delta.mode[1]; | |
1296 else if (mb->mode == VP8_MVMODE_SPLIT) | |
1297 filter_level += s->lf_delta.mode[3]; | |
1298 else | |
1299 filter_level += s->lf_delta.mode[2]; | |
1300 } | |
1301 } | |
1302 filter_level = av_clip(filter_level, 0, 63); | |
1303 | |
1304 interior_limit = filter_level; | |
1305 if (s->filter.sharpness) { | |
1306 interior_limit >>= s->filter.sharpness > 4 ? 2 : 1; | |
1307 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); | |
1308 } | |
1309 interior_limit = FFMAX(interior_limit, 1); | |
1310 | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1311 f->filter_level = filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1312 f->inner_limit = interior_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1313 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; |
11921 | 1314 } |
1315 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1316 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) |
11921 | 1317 { |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1318 int mbedge_lim, bedge_lim, hev_thresh; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1319 int filter_level = f->filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1320 int inner_limit = f->inner_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1321 int inner_filter = f->inner_filter; |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1322 int linesize = s->linesize; |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1323 int uvlinesize = s->uvlinesize; |
11921 | 1324 |
1325 if (!filter_level) | |
1326 return; | |
1327 | |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1328 mbedge_lim = 2*(filter_level+2) + inner_limit; |
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1329 bedge_lim = 2* filter_level + inner_limit; |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1330 hev_thresh = filter_level >= 15; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1331 |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1332 if (s->keyframe) { |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1333 if (filter_level >= 40) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1334 hev_thresh = 2; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1335 } else { |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1336 if (filter_level >= 40) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1337 hev_thresh = 3; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1338 else if (filter_level >= 20) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1339 hev_thresh = 2; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1340 } |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12062
diff
changeset
|
1341 |
11921 | 1342 if (mb_x) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1343 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1344 mbedge_lim, inner_limit, hev_thresh); |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1345 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1346 mbedge_lim, inner_limit, hev_thresh); |
11921 | 1347 } |
1348 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1349 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1350 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1351 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1352 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1353 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1354 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1355 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1356 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1357 uvlinesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1358 inner_limit, hev_thresh); |
11921 | 1359 } |
1360 | |
1361 if (mb_y) { | |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1362 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1363 mbedge_lim, inner_limit, hev_thresh); |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1364 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1365 mbedge_lim, inner_limit, hev_thresh); |
11921 | 1366 } |
1367 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1368 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1369 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1370 linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1371 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1372 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1373 linesize, bedge_lim, |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12170
diff
changeset
|
1374 inner_limit, hev_thresh); |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1375 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1376 linesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1377 inner_limit, hev_thresh); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1378 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1379 dst[2] + 4 * uvlinesize, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1380 uvlinesize, bedge_lim, |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1381 inner_limit, hev_thresh); |
11921 | 1382 } |
1383 } | |
1384 | |
12248
121272849def
VP8: always_inline some things to force gcc to do the right thing
darkshikari
parents:
12247
diff
changeset
|
1385 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) |
11921 | 1386 { |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1387 int mbedge_lim, bedge_lim; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1388 int filter_level = f->filter_level; |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1389 int inner_limit = f->inner_limit; |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1390 int inner_filter = f->inner_filter; |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1391 int linesize = s->linesize; |
11921 | 1392 |
1393 if (!filter_level) | |
1394 return; | |
1395 | |
1396 mbedge_lim = 2*(filter_level+2) + inner_limit; | |
1397 bedge_lim = 2* filter_level + inner_limit; | |
1398 | |
1399 if (mb_x) | |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1400 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1401 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1402 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1403 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1404 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim); |
11921 | 1405 } |
1406 | |
1407 if (mb_y) | |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1408 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1409 if (inner_filter) { |
12233
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1410 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1411 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim); |
10b02cbc3cc2
Get rid of more unnecessary dereferences in VP8 deblocking
darkshikari
parents:
12232
diff
changeset
|
1412 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim); |
11921 | 1413 } |
1414 } | |
1415 | |
1416 static void filter_mb_row(VP8Context *s, int mb_y) | |
1417 { | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1418 VP8FilterStrength *f = s->filter_strength; |
11921 | 1419 uint8_t *dst[3] = { |
1420 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize, | |
1421 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize, | |
1422 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize | |
1423 }; | |
1424 int mb_x; | |
1425 | |
1426 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1427 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1428 filter_mb(s, dst, f++, mb_x, mb_y); |
11921 | 1429 dst[0] += 16; |
1430 dst[1] += 8; | |
1431 dst[2] += 8; | |
1432 } | |
1433 } | |
1434 | |
1435 static void filter_mb_row_simple(VP8Context *s, int mb_y) | |
1436 { | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1437 VP8FilterStrength *f = s->filter_strength; |
11921 | 1438 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize; |
1439 int mb_x; | |
1440 | |
1441 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1442 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1); |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1443 filter_mb_simple(s, dst, f++, mb_x, mb_y); |
11921 | 1444 dst += 16; |
1445 } | |
1446 } | |
1447 | |
1448 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |
1449 AVPacket *avpkt) | |
1450 { | |
1451 VP8Context *s = avctx->priv_data; | |
1452 int ret, mb_x, mb_y, i, y, referenced; | |
1453 enum AVDiscard skip_thresh; | |
12270
161c205dcdd2
Fix r24445: Instead of needlessly initialising a variable, silence the warning.
cehoyos
parents:
12255
diff
changeset
|
1454 AVFrame *av_uninit(curframe); |
11921 | 1455 |
1456 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) | |
1457 return ret; | |
1458 | |
1459 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT | |
1460 || s->update_altref == VP56_FRAME_CURRENT; | |
1461 | |
1462 skip_thresh = !referenced ? AVDISCARD_NONREF : | |
1463 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; | |
1464 | |
1465 if (avctx->skip_frame >= skip_thresh) { | |
1466 s->invisible = 1; | |
1467 goto skip_decode; | |
1468 } | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1469 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; |
11921 | 1470 |
1471 for (i = 0; i < 4; i++) | |
1472 if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1473 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1474 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { | |
1475 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i]; | |
1476 break; | |
1477 } | |
1478 if (curframe->data[0]) | |
1479 avctx->release_buffer(avctx, curframe); | |
1480 | |
1481 curframe->key_frame = s->keyframe; | |
1482 curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE; | |
1483 curframe->reference = referenced ? 3 : 0; | |
1484 if ((ret = avctx->get_buffer(avctx, curframe))) { | |
1485 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); | |
1486 return ret; | |
1487 } | |
1488 | |
1489 // Given that arithmetic probabilities are updated every frame, it's quite likely | |
1490 // that the values we have on a random interframe are complete junk if we didn't | |
1491 // start decode on a keyframe. So just don't display anything rather than junk. | |
1492 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || | |
1493 !s->framep[VP56_FRAME_GOLDEN] || | |
1494 !s->framep[VP56_FRAME_GOLDEN2])) { | |
1495 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); | |
1496 return AVERROR_INVALIDDATA; | |
1497 } | |
1498 | |
1499 s->linesize = curframe->linesize[0]; | |
1500 s->uvlinesize = curframe->linesize[1]; | |
1501 | |
1502 if (!s->edge_emu_buffer) | |
1503 s->edge_emu_buffer = av_malloc(21*s->linesize); | |
1504 | |
1505 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); | |
1506 | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1507 /* Zero macroblock structures for top/left prediction from outside the frame. */ |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1508 memset(s->macroblocks, 0, (s->mb_width + s->mb_height*2)*sizeof(*s->macroblocks)); |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1509 |
11921 | 1510 // top edge of 127 for intra prediction |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1511 memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border)); |
12231 | 1512 memset(s->ref_count, 0, sizeof(s->ref_count)); |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1513 if (s->keyframe) |
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1514 memset(s->intra4x4_pred_mode_top, DC_PRED, s->b4_stride*4); |
11921 | 1515 |
1516 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { | |
1517 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; | |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1518 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; |
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1519 uint8_t *segment_map = s->segmentation_map + mb_y*s->mb_stride; |
12237 | 1520 int mb_xy = mb_y * s->mb_stride; |
11921 | 1521 uint8_t *dst[3] = { |
1522 curframe->data[0] + 16*mb_y*s->linesize, | |
1523 curframe->data[1] + 8*mb_y*s->uvlinesize, | |
1524 curframe->data[2] + 8*mb_y*s->uvlinesize | |
1525 }; | |
1526 | |
1527 memset(s->left_nnz, 0, sizeof(s->left_nnz)); | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1528 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101); |
11921 | 1529 |
1530 // left edge of 129 for intra prediction | |
1531 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) | |
1532 for (i = 0; i < 3; i++) | |
1533 for (y = 0; y < 16>>!!i; y++) | |
1534 dst[i][y*curframe->linesize[i]-1] = 129; | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1535 if (mb_y) |
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1536 memset(s->top_border, 129, sizeof(*s->top_border)); |
11921 | 1537 |
12237 | 1538 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { |
12223
93e27a5401de
Convert VP8 macroblock structures to a ring buffer.
darkshikari
parents:
12222
diff
changeset
|
1539 uint8_t *segment_mb = segment_map+mb_x; |
12221 | 1540 |
12215 | 1541 /* Prefetch the current frame, 4 MBs ahead */ |
1542 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); | |
1543 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); | |
1544 | |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1545 decode_mb_mode(s, mb, mb_x, mb_y, segment_mb); |
11921 | 1546 |
12237 | 1547 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); |
12231 | 1548 |
11921 | 1549 if (!mb->skip) |
1550 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); | |
1551 | |
12225
c3e11b3108d7
Eliminate a pointless memset for intra blocks in P-frames in VP8
darkshikari
parents:
12224
diff
changeset
|
1552 if (mb->mode <= MODE_I4x4) |
12339
57fc7f2d7b28
only store intra prediction modes on the boundary for keyframes, not as a plane.
skal
parents:
12336
diff
changeset
|
1553 intra_predict(s, dst, mb, mb_x, mb_y); |
12225
c3e11b3108d7
Eliminate a pointless memset for intra blocks in P-frames in VP8
darkshikari
parents:
12224
diff
changeset
|
1554 else |
11921 | 1555 inter_predict(s, dst, mb, mb_x, mb_y); |
1556 | |
12237 | 1557 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); |
12231 | 1558 |
11921 | 1559 if (!mb->skip) { |
12238 | 1560 idct_mb(s, dst, mb); |
11921 | 1561 } else { |
1562 AV_ZERO64(s->left_nnz); | |
1563 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned | |
1564 | |
1565 // Reset DC block predictors if they would exist if the mb had coefficients | |
1566 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | |
1567 s->left_nnz[8] = 0; | |
1568 s->top_nnz[mb_x][8] = 0; | |
1569 } | |
1570 } | |
1571 | |
12222
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1572 if (s->deblock_filter) |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1573 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]); |
7acdbfd2a222
Calculate deblock strength per-MB instead of per-row
darkshikari
parents:
12221
diff
changeset
|
1574 |
12237 | 1575 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); |
12231 | 1576 |
11921 | 1577 dst[0] += 16; |
1578 dst[1] += 8; | |
1579 dst[2] += 8; | |
1580 } | |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1581 if (s->deblock_filter) { |
11921 | 1582 if (s->filter.simple) |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1583 filter_mb_row_simple(s, mb_y); |
11921 | 1584 else |
12170
6f0db2eeaf70
vp8: Save mb border needed for intra prediction so that loop filter can run
conrad
parents:
12169
diff
changeset
|
1585 filter_mb_row(s, mb_y); |
11921 | 1586 } |
1587 } | |
1588 | |
1589 skip_decode: | |
1590 // if future frames don't use the updated probabilities, | |
1591 // reset them to the values we saved | |
1592 if (!s->update_probabilities) | |
1593 s->prob[0] = s->prob[1]; | |
1594 | |
1595 // check if golden and altref are swapped | |
1596 if (s->update_altref == VP56_FRAME_GOLDEN && | |
1597 s->update_golden == VP56_FRAME_GOLDEN2) | |
1598 FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]); | |
1599 else { | |
1600 if (s->update_altref != VP56_FRAME_NONE) | |
1601 s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; | |
1602 | |
1603 if (s->update_golden != VP56_FRAME_NONE) | |
1604 s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; | |
1605 } | |
1606 | |
1607 if (s->update_last) // move cur->prev | |
1608 s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT]; | |
1609 | |
1610 // release no longer referenced frames | |
1611 for (i = 0; i < 4; i++) | |
1612 if (s->frames[i].data[0] && | |
1613 &s->frames[i] != s->framep[VP56_FRAME_CURRENT] && | |
1614 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && | |
1615 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && | |
1616 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) | |
1617 avctx->release_buffer(avctx, &s->frames[i]); | |
1618 | |
1619 if (!s->invisible) { | |
1620 *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT]; | |
1621 *data_size = sizeof(AVFrame); | |
1622 } | |
1623 | |
1624 return avpkt->size; | |
1625 } | |
1626 | |
1627 static av_cold int vp8_decode_init(AVCodecContext *avctx) | |
1628 { | |
1629 VP8Context *s = avctx->priv_data; | |
1630 | |
1631 s->avctx = avctx; | |
1632 avctx->pix_fmt = PIX_FMT_YUV420P; | |
1633 | |
1634 dsputil_init(&s->dsp, avctx); | |
1635 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8); | |
1636 ff_vp8dsp_init(&s->vp8dsp); | |
1637 | |
1638 // intra pred needs edge emulation among other things | |
1639 if (avctx->flags&CODEC_FLAG_EMU_EDGE) { | |
11947 | 1640 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported\n"); |
11921 | 1641 return AVERROR_PATCHWELCOME; |
1642 } | |
1643 | |
1644 return 0; | |
1645 } | |
1646 | |
1647 static av_cold int vp8_decode_free(AVCodecContext *avctx) | |
1648 { | |
1649 vp8_decode_flush(avctx); | |
1650 return 0; | |
1651 } | |
1652 | |
1653 AVCodec vp8_decoder = { | |
1654 "vp8", | |
1655 AVMEDIA_TYPE_VIDEO, | |
1656 CODEC_ID_VP8, | |
1657 sizeof(VP8Context), | |
1658 vp8_decode_init, | |
1659 NULL, | |
1660 vp8_decode_free, | |
1661 vp8_decode_frame, | |
1662 CODEC_CAP_DR1, | |
1663 .flush = vp8_decode_flush, | |
1664 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), | |
1665 }; |