Mercurial > libavcodec.hg
annotate vp6.c @ 3990:746a60ba3177 libavcodec
enable CMOV_IS_FAST as its faster or equal speed on every cpu (duron, athlon, PM, P3) from which ive seen benchmarks, it might be slower on P4 but noone has posted benchmarks ...
author | michael |
---|---|
date | Wed, 11 Oct 2006 12:23:40 +0000 |
parents | c8c591fe26f8 |
children | 34fdffe98bd0 |
rev | line source |
---|---|
3695 | 1 /** |
2 * @file vp6.c | |
3 * VP6 compatible video decoder | |
4 * | |
5 * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> | |
6 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3697
diff
changeset
|
7 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3697
diff
changeset
|
8 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3697
diff
changeset
|
9 * FFmpeg is free software; you can redistribute it and/or |
3695 | 10 * modify it under the terms of the GNU Lesser General Public |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3697
diff
changeset
|
14 * FFmpeg is distributed in the hope that it will be useful, |
3695 | 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3697
diff
changeset
|
20 * License along with FFmpeg; if not, write to the Free Software |
3695 | 21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
22 */ | |
23 | |
24 #include <stdlib.h> | |
25 #include <inttypes.h> | |
26 | |
27 #include "avcodec.h" | |
28 #include "dsputil.h" | |
29 #include "bitstream.h" | |
30 #include "mpegvideo.h" | |
31 | |
32 #include "vp56.h" | |
33 #include "vp56data.h" | |
34 #include "vp6data.h" | |
35 | |
36 | |
37 static int vp6_parse_header(vp56_context_t *s, uint8_t *buf, int buf_size, | |
38 int *golden_frame) | |
39 { | |
40 vp56_range_coder_t *c = &s->c; | |
41 int parse_filter_info; | |
42 int rows, cols; | |
43 int res = 1; | |
44 | |
45 if (buf[0] & 1) | |
46 return 0; | |
47 | |
48 s->frames[VP56_FRAME_CURRENT].key_frame = !(buf[0] & 0x80); | |
49 vp56_init_dequant(s, (buf[0] >> 1) & 0x3F); | |
50 | |
51 if (s->frames[VP56_FRAME_CURRENT].key_frame) { | |
52 if ((buf[1] & 0xFE) != 0x46) /* would be 0x36 for VP61 */ | |
53 return 0; | |
54 if (buf[1] & 1) { | |
55 av_log(s->avctx, AV_LOG_ERROR, "interlacing not supported\n"); | |
56 return 0; | |
57 } | |
58 | |
59 rows = buf[2]; /* number of stored macroblock rows */ | |
60 cols = buf[3]; /* number of stored macroblock cols */ | |
61 /* buf[4] is number of displayed macroblock rows */ | |
62 /* buf[5] is number of displayed macroblock cols */ | |
63 | |
64 if (16*cols != s->avctx->coded_width || | |
65 16*rows != s->avctx->coded_height) { | |
66 avcodec_set_dimensions(s->avctx, 16*cols, 16*rows); | |
67 res = 2; | |
68 } | |
69 | |
70 vp56_init_range_decoder(c, buf+6, buf_size-6); | |
71 vp56_rac_gets(c, 2); | |
72 | |
73 parse_filter_info = 1; | |
74 } else { | |
75 vp56_init_range_decoder(c, buf+1, buf_size-1); | |
76 | |
77 *golden_frame = vp56_rac_get(c); | |
78 s->deblock_filtering = vp56_rac_get(c); | |
79 if (s->deblock_filtering) | |
80 vp56_rac_get(c); | |
81 parse_filter_info = vp56_rac_get(c); | |
82 } | |
83 | |
84 if (parse_filter_info) { | |
85 if (vp56_rac_get(c)) { | |
86 s->filter_mode = 2; | |
87 s->sample_variance_threshold = vp56_rac_gets(c, 5); | |
88 s->max_vector_length = 2 << vp56_rac_gets(c, 3); | |
89 } else if (vp56_rac_get(c)) { | |
90 s->filter_mode = 1; | |
91 } else { | |
92 s->filter_mode = 0; | |
93 } | |
94 s->filter_selection = vp56_rac_gets(c, 4); | |
95 } | |
96 | |
97 vp56_rac_get(c); | |
98 return res; | |
99 } | |
100 | |
101 static void vp6_coeff_order_table_init(vp56_context_t *s) | |
102 { | |
103 int i, pos, idx = 1; | |
104 | |
105 s->coeff_index_to_pos[0] = 0; | |
106 for (i=0; i<16; i++) | |
107 for (pos=1; pos<64; pos++) | |
108 if (s->coeff_reorder[pos] == i) | |
109 s->coeff_index_to_pos[idx++] = pos; | |
110 } | |
111 | |
112 static void vp6_default_models_init(vp56_context_t *s) | |
113 { | |
114 s->vector_model_dct[0] = 0xA2; | |
115 s->vector_model_dct[1] = 0xA4; | |
116 s->vector_model_sig[0] = 0x80; | |
117 s->vector_model_sig[1] = 0x80; | |
118 | |
119 memcpy(s->mb_types_stats, vp56_def_mb_types_stats, sizeof(s->mb_types_stats)); | |
120 memcpy(s->vector_model_fdv, vp6_def_fdv_vector_model, sizeof(s->vector_model_fdv)); | |
121 memcpy(s->vector_model_pdv, vp6_def_pdv_vector_model, sizeof(s->vector_model_pdv)); | |
122 memcpy(s->coeff_model_runv, vp6_def_runv_coeff_model, sizeof(s->coeff_model_runv)); | |
123 memcpy(s->coeff_reorder, vp6_def_coeff_reorder, sizeof(s->coeff_reorder)); | |
124 | |
125 vp6_coeff_order_table_init(s); | |
126 } | |
127 | |
128 static void vp6_parse_vector_models(vp56_context_t *s) | |
129 { | |
130 vp56_range_coder_t *c = &s->c; | |
131 int comp, node; | |
132 | |
133 for (comp=0; comp<2; comp++) { | |
134 if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][0])) | |
135 s->vector_model_dct[comp] = vp56_rac_gets_nn(c, 7); | |
136 if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][1])) | |
137 s->vector_model_sig[comp] = vp56_rac_gets_nn(c, 7); | |
138 } | |
139 | |
140 for (comp=0; comp<2; comp++) | |
141 for (node=0; node<7; node++) | |
142 if (vp56_rac_get_prob(c, vp6_pdv_pct[comp][node])) | |
143 s->vector_model_pdv[comp][node] = vp56_rac_gets_nn(c, 7); | |
144 | |
145 for (comp=0; comp<2; comp++) | |
146 for (node=0; node<8; node++) | |
147 if (vp56_rac_get_prob(c, vp6_fdv_pct[comp][node])) | |
148 s->vector_model_fdv[comp][node] = vp56_rac_gets_nn(c, 7); | |
149 } | |
150 | |
151 static void vp6_parse_coeff_models(vp56_context_t *s) | |
152 { | |
153 vp56_range_coder_t *c = &s->c; | |
154 int def_prob[11]; | |
155 int node, cg, ctx, pos; | |
156 int ct; /* code type */ | |
157 int pt; /* plane type (0 for Y, 1 for U or V) */ | |
158 | |
159 memset(def_prob, 0x80, sizeof(def_prob)); | |
160 | |
161 for (pt=0; pt<2; pt++) | |
162 for (node=0; node<11; node++) | |
163 if (vp56_rac_get_prob(c, vp6_dccv_pct[pt][node])) { | |
164 def_prob[node] = vp56_rac_gets_nn(c, 7); | |
165 s->coeff_model_dccv[pt][node] = def_prob[node]; | |
166 } else if (s->frames[VP56_FRAME_CURRENT].key_frame) { | |
167 s->coeff_model_dccv[pt][node] = def_prob[node]; | |
168 } | |
169 | |
170 if (vp56_rac_get(c)) { | |
171 for (pos=1; pos<64; pos++) | |
172 if (vp56_rac_get_prob(c, vp6_coeff_reorder_pct[pos])) | |
173 s->coeff_reorder[pos] = vp56_rac_gets(c, 4); | |
174 vp6_coeff_order_table_init(s); | |
175 } | |
176 | |
177 for (cg=0; cg<2; cg++) | |
178 for (node=0; node<14; node++) | |
179 if (vp56_rac_get_prob(c, vp6_runv_pct[cg][node])) | |
180 s->coeff_model_runv[cg][node] = vp56_rac_gets_nn(c, 7); | |
181 | |
182 for (ct=0; ct<3; ct++) | |
183 for (pt=0; pt<2; pt++) | |
184 for (cg=0; cg<6; cg++) | |
185 for (node=0; node<11; node++) | |
186 if (vp56_rac_get_prob(c, vp6_ract_pct[ct][pt][cg][node])) { | |
187 def_prob[node] = vp56_rac_gets_nn(c, 7); | |
188 s->coeff_model_ract[pt][ct][cg][node] = def_prob[node]; | |
189 } else if (s->frames[VP56_FRAME_CURRENT].key_frame) { | |
190 s->coeff_model_ract[pt][ct][cg][node] = def_prob[node]; | |
191 } | |
192 | |
193 /* coeff_model_dcct is a linear combination of coeff_model_dccv */ | |
194 for (pt=0; pt<2; pt++) | |
195 for (ctx=0; ctx<3; ctx++) | |
196 for (node=0; node<5; node++) | |
197 s->coeff_model_dcct[pt][ctx][node] = clip(((s->coeff_model_dccv[pt][node] * vp6_dccv_lc[ctx][node][0] + 128) >> 8) + vp6_dccv_lc[ctx][node][1], 1, 255); | |
198 } | |
199 | |
3697 | 200 static void vp6_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect) |
3695 | 201 { |
202 vp56_range_coder_t *c = &s->c; | |
203 int comp; | |
204 | |
3697 | 205 *vect = (vp56_mv_t) {0,0}; |
3695 | 206 if (s->vector_candidate_pos < 2) |
3697 | 207 *vect = s->vector_candidate[0]; |
3695 | 208 |
209 for (comp=0; comp<2; comp++) { | |
210 int i, delta = 0; | |
211 | |
212 if (vp56_rac_get_prob(c, s->vector_model_dct[comp])) { | |
213 static const uint8_t prob_order[] = {0, 1, 2, 7, 6, 5, 4}; | |
214 for (i=0; i<sizeof(prob_order); i++) { | |
215 int j = prob_order[i]; | |
216 delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][j])<<j; | |
217 } | |
218 if (delta & 0xF0) | |
219 delta |= vp56_rac_get_prob(c, s->vector_model_fdv[comp][3])<<3; | |
220 else | |
221 delta |= 8; | |
222 } else { | |
223 delta = vp56_rac_get_tree(c, vp56_pva_tree, | |
224 s->vector_model_pdv[comp]); | |
225 } | |
226 | |
227 if (delta && vp56_rac_get_prob(c, s->vector_model_sig[comp])) | |
228 delta = -delta; | |
229 | |
230 if (!comp) | |
3697 | 231 vect->x += delta; |
3695 | 232 else |
3697 | 233 vect->y += delta; |
3695 | 234 } |
235 } | |
236 | |
237 static void vp6_parse_coeff(vp56_context_t *s) | |
238 { | |
239 vp56_range_coder_t *c = &s->c; | |
240 uint8_t *permute = s->scantable.permutated; | |
241 uint8_t *model, *model2, *model3; | |
242 int coeff, sign, coeff_idx; | |
243 int b, i, cg, idx, ctx; | |
244 int pt = 0; /* plane type (0 for Y, 1 for U or V) */ | |
245 | |
246 for (b=0; b<6; b++) { | |
247 int ct = 1; /* code type */ | |
248 int run = 1; | |
249 | |
250 if (b > 3) pt = 1; | |
251 | |
252 ctx = s->left_block[vp56_b6to4[b]].not_null_dc | |
253 + s->above_blocks[s->above_block_idx[b]].not_null_dc; | |
254 model = s->coeff_model_dccv[pt]; | |
255 model2 = s->coeff_model_dcct[pt][ctx]; | |
256 | |
257 for (coeff_idx=0; coeff_idx<64; ) { | |
258 if ((coeff_idx>1 && ct==0) || vp56_rac_get_prob(c, model2[0])) { | |
259 /* parse a coeff */ | |
260 if (coeff_idx == 0) { | |
261 s->left_block[vp56_b6to4[b]].not_null_dc = 1; | |
262 s->above_blocks[s->above_block_idx[b]].not_null_dc = 1; | |
263 } | |
264 | |
265 if (vp56_rac_get_prob(c, model2[2])) { | |
266 if (vp56_rac_get_prob(c, model2[3])) { | |
267 idx = vp56_rac_get_tree(c, vp56_pc_tree, model); | |
268 coeff = vp56_coeff_bias[idx]; | |
269 for (i=vp56_coeff_bit_length[idx]; i>=0; i--) | |
270 coeff += vp56_rac_get_prob(c, vp56_coeff_parse_table[idx][i]) << i; | |
271 } else { | |
272 if (vp56_rac_get_prob(c, model2[4])) | |
273 coeff = 3 + vp56_rac_get_prob(c, model[5]); | |
274 else | |
275 coeff = 2; | |
276 } | |
277 ct = 2; | |
278 } else { | |
279 ct = 1; | |
280 coeff = 1; | |
281 } | |
282 sign = vp56_rac_get(c); | |
283 coeff = (coeff ^ -sign) + sign; | |
284 if (coeff_idx) | |
285 coeff *= s->dequant_ac; | |
286 idx = s->coeff_index_to_pos[coeff_idx]; | |
287 s->block_coeff[b][permute[idx]] = coeff; | |
288 run = 1; | |
289 } else { | |
290 /* parse a run */ | |
291 ct = 0; | |
292 if (coeff_idx == 0) { | |
293 s->left_block[vp56_b6to4[b]].not_null_dc = 0; | |
294 s->above_blocks[s->above_block_idx[b]].not_null_dc = 0; | |
295 } else { | |
296 if (!vp56_rac_get_prob(c, model2[1])) | |
297 break; | |
298 | |
299 model3 = s->coeff_model_runv[coeff_idx >= 6]; | |
300 run = vp56_rac_get_tree(c, vp6_pcr_tree, model3); | |
301 if (!run) | |
302 for (run=9, i=0; i<6; i++) | |
303 run += vp56_rac_get_prob(c, model3[i+8]) << i; | |
304 } | |
305 } | |
306 | |
307 cg = vp6_coeff_groups[coeff_idx+=run]; | |
308 model = model2 = s->coeff_model_ract[pt][ct][cg]; | |
309 } | |
310 } | |
311 } | |
312 | |
313 static int vp6_adjust(int v, int t) | |
314 { | |
315 int V = v, s = v >> 31; | |
316 V ^= s; | |
317 V -= s; | |
318 if (V-t-1 >= (unsigned)(t-1)) | |
319 return v; | |
320 V = 2*t - V; | |
321 V += s; | |
322 V ^= s; | |
323 return V; | |
324 } | |
325 | |
326 static int vp6_block_variance(uint8_t *src, int stride) | |
327 { | |
328 int sum = 0, square_sum = 0; | |
329 int y, x; | |
330 | |
331 for (y=0; y<8; y+=2) { | |
332 for (x=0; x<8; x+=2) { | |
333 sum += src[x]; | |
334 square_sum += src[x]*src[x]; | |
335 } | |
336 src += 2*stride; | |
337 } | |
338 return (16*square_sum - sum*sum) / (16*16); | |
339 } | |
340 | |
341 static void vp6_filter_hv2(vp56_context_t *s, uint8_t *dst, uint8_t *src, | |
342 int stride, int delta, int16_t weight) | |
343 { | |
344 s->dsp.put_pixels_tab[1][0](dst, src, stride, 8); | |
345 s->dsp.biweight_h264_pixels_tab[3](dst, src+delta, stride, 2, | |
346 8-weight, weight, 0); | |
347 } | |
348 | |
349 static void vp6_filter_hv4(uint8_t *dst, uint8_t *src, int stride, | |
350 int delta, const int16_t *weights) | |
351 { | |
352 int x, y; | |
353 | |
354 for (y=0; y<8; y++) { | |
355 for (x=0; x<8; x++) { | |
356 dst[x] = clip_uint8(( src[x-delta ] * weights[0] | |
357 + src[x ] * weights[1] | |
358 + src[x+delta ] * weights[2] | |
359 + src[x+2*delta] * weights[3] + 64) >> 7); | |
360 } | |
361 src += stride; | |
362 dst += stride; | |
363 } | |
364 } | |
365 | |
366 static void vp6_filter_diag2(vp56_context_t *s, uint8_t *dst, uint8_t *src, | |
367 int stride, int h_weight, int v_weight) | |
368 { | |
369 uint8_t *tmp = s->edge_emu_buffer+16; | |
370 int x, xmax; | |
371 | |
372 s->dsp.put_pixels_tab[1][0](tmp, src, stride, 8); | |
373 s->dsp.biweight_h264_pixels_tab[3](tmp, src+1, stride, 2, | |
374 8-h_weight, h_weight, 0); | |
375 /* we need a 8x9 block to do vertical filter, so compute one more line */ | |
376 for (x=8*stride, xmax=x+8; x<xmax; x++) | |
377 tmp[x] = (src[x]*(8-h_weight) + src[x+1]*h_weight + 4) >> 3; | |
378 | |
379 s->dsp.put_pixels_tab[1][0](dst, tmp, stride, 8); | |
380 s->dsp.biweight_h264_pixels_tab[3](dst, tmp+stride, stride, 2, | |
381 8-v_weight, v_weight, 0); | |
382 } | |
383 | |
384 static void vp6_filter_diag4(uint8_t *dst, uint8_t *src, int stride, | |
385 const int16_t *h_weights,const int16_t *v_weights) | |
386 { | |
387 int x, y; | |
388 int tmp[8*11]; | |
389 int *t = tmp; | |
390 | |
391 src -= stride; | |
392 | |
393 for (y=0; y<11; y++) { | |
394 for (x=0; x<8; x++) { | |
395 t[x] = clip_uint8(( src[x-1] * h_weights[0] | |
396 + src[x ] * h_weights[1] | |
397 + src[x+1] * h_weights[2] | |
398 + src[x+2] * h_weights[3] + 64) >> 7); | |
399 } | |
400 src += stride; | |
401 t += 8; | |
402 } | |
403 | |
404 t = tmp + 8; | |
405 for (y=0; y<8; y++) { | |
406 for (x=0; x<8; x++) { | |
407 dst[x] = clip_uint8(( t[x-8 ] * v_weights[0] | |
408 + t[x ] * v_weights[1] | |
409 + t[x+8 ] * v_weights[2] | |
410 + t[x+16] * v_weights[3] + 64) >> 7); | |
411 } | |
412 dst += stride; | |
413 t += 8; | |
414 } | |
415 } | |
416 | |
417 static void vp6_filter(vp56_context_t *s, uint8_t *dst, uint8_t *src, | |
418 int offset1, int offset2, int stride, | |
419 vp56_mv_t mv, int mask, int select, int luma) | |
420 { | |
421 int filter4 = 0; | |
422 int x8 = mv.x & mask; | |
423 int y8 = mv.y & mask; | |
424 | |
425 if (luma) { | |
426 x8 *= 2; | |
427 y8 *= 2; | |
428 filter4 = s->filter_mode; | |
429 if (filter4 == 2) { | |
430 if (s->max_vector_length && | |
431 (ABS(mv.x) > s->max_vector_length || | |
432 ABS(mv.y) > s->max_vector_length)) { | |
433 filter4 = 0; | |
434 } else if (!s->sample_variance_threshold | |
435 || (vp6_block_variance(src+offset1, stride) | |
436 < s->sample_variance_threshold)) { | |
437 filter4 = 0; | |
438 } | |
439 } | |
440 } | |
441 | |
442 if ((y8 && (offset2-offset1)*s->flip<0) || (!y8 && offset1 > offset2)) { | |
443 offset1 = offset2; | |
444 } | |
445 | |
446 if (filter4) { | |
447 if (!y8) { /* left or right combine */ | |
448 vp6_filter_hv4(dst, src+offset1, stride, 1, | |
449 vp6_block_copy_filter[select][x8]); | |
450 } else if (!x8) { /* above or below combine */ | |
451 vp6_filter_hv4(dst, src+offset1, stride, stride, | |
452 vp6_block_copy_filter[select][y8]); | |
453 } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */ | |
454 vp6_filter_diag4(dst, src+offset1-1, stride, | |
455 vp6_block_copy_filter[select][x8], | |
456 vp6_block_copy_filter[select][y8]); | |
457 } else { /* lower-right or upper-left combine */ | |
458 vp6_filter_diag4(dst, src+offset1, stride, | |
459 vp6_block_copy_filter[select][x8], | |
460 vp6_block_copy_filter[select][y8]); | |
461 } | |
462 } else { | |
463 if (!y8) { /* left or right combine */ | |
464 vp6_filter_hv2(s, dst, src+offset1, stride, 1, x8); | |
465 } else if (!x8) { /* above or below combine */ | |
466 vp6_filter_hv2(s, dst, src+offset1, stride, stride, y8); | |
467 } else if ((mv.x^mv.y) >> 31) { /* lower-left or upper-right combine */ | |
468 vp6_filter_diag2(s, dst, src+offset1-1, stride, x8, y8); | |
469 } else { /* lower-right or upper-left combine */ | |
470 vp6_filter_diag2(s, dst, src+offset1, stride, x8, y8); | |
471 } | |
472 } | |
473 } | |
474 | |
475 static int vp6_decode_init(AVCodecContext *avctx) | |
476 { | |
477 vp56_context_t *s = avctx->priv_data; | |
478 | |
479 vp56_init(s, avctx, avctx->codec->id == CODEC_ID_VP6); | |
480 s->vp56_coord_div = vp6_coord_div; | |
481 s->parse_vector_adjustment = vp6_parse_vector_adjustment; | |
482 s->adjust = vp6_adjust; | |
483 s->filter = vp6_filter; | |
484 s->parse_coeff = vp6_parse_coeff; | |
485 s->default_models_init = vp6_default_models_init; | |
486 s->parse_vector_models = vp6_parse_vector_models; | |
487 s->parse_coeff_models = vp6_parse_coeff_models; | |
488 s->parse_header = vp6_parse_header; | |
489 | |
490 return 0; | |
491 } | |
492 | |
493 AVCodec vp6_decoder = { | |
494 "vp6", | |
495 CODEC_TYPE_VIDEO, | |
496 CODEC_ID_VP6, | |
497 sizeof(vp56_context_t), | |
498 vp6_decode_init, | |
499 NULL, | |
500 vp56_free, | |
501 vp56_decode_frame, | |
502 }; | |
503 | |
504 /* flash version, not flipped upside-down */ | |
505 AVCodec vp6f_decoder = { | |
506 "vp6f", | |
507 CODEC_TYPE_VIDEO, | |
508 CODEC_ID_VP6F, | |
509 sizeof(vp56_context_t), | |
510 vp6_decode_init, | |
511 NULL, | |
512 vp56_free, | |
513 vp56_decode_frame, | |
514 }; |