Mercurial > libavcodec.hg
annotate vp56.h @ 12400:4f13b2ded34d libavcodec
Fix segfaults in VP8 SIMD code on Win64 (and FATE/win64 failures).
author | rbultje |
---|---|
date | Mon, 23 Aug 2010 02:41:22 +0000 |
parents | d0b25641338b |
children |
rev | line source |
---|---|
3695 | 1 /** |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11369
diff
changeset
|
2 * @file |
3695 | 3 * VP5 and VP6 compatible video decoder (common features) |
4 * | |
5 * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> | |
6 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3759
diff
changeset
|
7 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3759
diff
changeset
|
8 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3759
diff
changeset
|
9 * FFmpeg is free software; you can redistribute it and/or |
3695 | 10 * modify it under the terms of the GNU Lesser General Public |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3759
diff
changeset
|
14 * FFmpeg is distributed in the hope that it will be useful, |
3695 | 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3759
diff
changeset
|
20 * License along with FFmpeg; if not, write to the Free Software |
5215 | 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
3695 | 22 */ |
23 | |
7760 | 24 #ifndef AVCODEC_VP56_H |
25 #define AVCODEC_VP56_H | |
3695 | 26 |
27 #include "vp56data.h" | |
28 #include "dsputil.h" | |
9428 | 29 #include "get_bits.h" |
5089 | 30 #include "bytestream.h" |
12029 | 31 #include "cabac.h" |
11665 | 32 #include "vp56dsp.h" |
3695 | 33 |
8299 | 34 typedef struct vp56_context VP56Context; |
12217 | 35 |
36 typedef struct { | |
37 int16_t x; | |
38 int16_t y; | |
39 } DECLARE_ALIGNED(4, , VP56mv); | |
3695 | 40 |
8299 | 41 typedef void (*VP56ParseVectorAdjustment)(VP56Context *s, |
8300 | 42 VP56mv *vect); |
8299 | 43 typedef void (*VP56Filter)(VP56Context *s, uint8_t *dst, uint8_t *src, |
8300 | 44 int offset1, int offset2, int stride, |
45 VP56mv mv, int mask, int select, int luma); | |
8299 | 46 typedef void (*VP56ParseCoeff)(VP56Context *s); |
47 typedef void (*VP56DefaultModelsInit)(VP56Context *s); | |
48 typedef void (*VP56ParseVectorModels)(VP56Context *s); | |
49 typedef void (*VP56ParseCoeffModels)(VP56Context *s); | |
8300 | 50 typedef int (*VP56ParseHeader)(VP56Context *s, const uint8_t *buf, |
51 int buf_size, int *golden_frame); | |
3695 | 52 |
53 typedef struct { | |
54 int high; | |
12038 | 55 int bits; /* stored negated (i.e. negative "bits" is a positive number of |
56 bits left) in order to eliminate a negate in cache refilling */ | |
6297 | 57 const uint8_t *buffer; |
9919
c7c1c6b35a73
vp56dec: ensure range coder won't read past the end of input buffer
aurel
parents:
9428
diff
changeset
|
58 const uint8_t *end; |
12250
0d81ba00151a
vp56's arith decoder's code_word is only 16 bits, no need for unsigned long
conrad
parents:
12217
diff
changeset
|
59 unsigned int code_word; |
8299 | 60 } VP56RangeCoder; |
3695 | 61 |
62 typedef struct { | |
63 uint8_t not_null_dc; | |
8299 | 64 VP56Frame ref_frame; |
3695 | 65 DCTELEM dc_coeff; |
8299 | 66 } VP56RefDc; |
3695 | 67 |
68 typedef struct { | |
69 uint8_t type; | |
8299 | 70 VP56mv mv; |
71 } VP56Macroblock; | |
3695 | 72 |
5711 | 73 typedef struct { |
74 uint8_t coeff_reorder[64]; /* used in vp6 only */ | |
75 uint8_t coeff_index_to_pos[64]; /* used in vp6 only */ | |
76 uint8_t vector_sig[2]; /* delta sign */ | |
77 uint8_t vector_dct[2]; /* delta coding types */ | |
78 uint8_t vector_pdi[2][2]; /* predefined delta init */ | |
79 uint8_t vector_pdv[2][7]; /* predefined delta values */ | |
80 uint8_t vector_fdv[2][8]; /* 8 bit delta value definition */ | |
81 uint8_t coeff_dccv[2][11]; /* DC coeff value */ | |
82 uint8_t coeff_ract[2][3][6][11]; /* Run/AC coding type and AC coeff value */ | |
83 uint8_t coeff_acct[2][3][3][6][5];/* vp5 only AC coding type for coding group < 3 */ | |
84 uint8_t coeff_dcct[2][36][5]; /* DC coeff coding type */ | |
85 uint8_t coeff_runv[2][14]; /* run value (vp6 only) */ | |
86 uint8_t mb_type[3][10][10]; /* model for decoding MB type */ | |
87 uint8_t mb_types_stats[3][10][2];/* contextual, next MB type stats */ | |
8304 | 88 } VP56Model; |
5711 | 89 |
3695 | 90 struct vp56_context { |
91 AVCodecContext *avctx; | |
92 DSPContext dsp; | |
11665 | 93 VP56DSPContext vp56dsp; |
3695 | 94 ScanTable scantable; |
5714
314be1cfdcb0
add a new vp6a codec (add alpha plan support to vp6)
aurel
parents:
5711
diff
changeset
|
95 AVFrame frames[4]; |
314be1cfdcb0
add a new vp6a codec (add alpha plan support to vp6)
aurel
parents:
5711
diff
changeset
|
96 AVFrame *framep[6]; |
3695 | 97 uint8_t *edge_emu_buffer_alloc; |
98 uint8_t *edge_emu_buffer; | |
8299 | 99 VP56RangeCoder c; |
100 VP56RangeCoder cc; | |
101 VP56RangeCoder *ccp; | |
4308 | 102 int sub_version; |
3695 | 103 |
104 /* frame info */ | |
5714
314be1cfdcb0
add a new vp6a codec (add alpha plan support to vp6)
aurel
parents:
5711
diff
changeset
|
105 int plane_width[4]; |
314be1cfdcb0
add a new vp6a codec (add alpha plan support to vp6)
aurel
parents:
5711
diff
changeset
|
106 int plane_height[4]; |
3695 | 107 int mb_width; /* number of horizontal MB */ |
108 int mb_height; /* number of vertical MB */ | |
109 int block_offset[6]; | |
110 | |
111 int quantizer; | |
112 uint16_t dequant_dc; | |
113 uint16_t dequant_ac; | |
11053
c57e72227d7d
Make VP5 and VP6 decoders output a qscale table to allow for more automatic
reimar
parents:
10961
diff
changeset
|
114 int8_t *qscale_table; |
3695 | 115 |
116 /* DC predictors management */ | |
8299 | 117 VP56RefDc *above_blocks; |
118 VP56RefDc left_block[4]; | |
3695 | 119 int above_block_idx[6]; |
120 DCTELEM prev_dc[3][3]; /* [plan][ref_frame] */ | |
121 | |
122 /* blocks / macroblock */ | |
8299 | 123 VP56mb mb_type; |
124 VP56Macroblock *macroblocks; | |
11369 | 125 DECLARE_ALIGNED(16, DCTELEM, block_coeff)[6][64]; |
3695 | 126 |
127 /* motion vectors */ | |
8299 | 128 VP56mv mv[6]; /* vectors for each block in MB */ |
129 VP56mv vector_candidate[2]; | |
3695 | 130 int vector_candidate_pos; |
131 | |
132 /* filtering hints */ | |
4348 | 133 int filter_header; /* used in vp6 only */ |
3695 | 134 int deblock_filtering; |
135 int filter_selection; | |
136 int filter_mode; | |
137 int max_vector_length; | |
138 int sample_variance_threshold; | |
139 | |
140 uint8_t coeff_ctx[4][64]; /* used in vp5 only */ | |
141 uint8_t coeff_ctx_last[4]; /* used in vp5 only */ | |
142 | |
5714
314be1cfdcb0
add a new vp6a codec (add alpha plan support to vp6)
aurel
parents:
5711
diff
changeset
|
143 int has_alpha; |
314be1cfdcb0
add a new vp6a codec (add alpha plan support to vp6)
aurel
parents:
5711
diff
changeset
|
144 |
3695 | 145 /* upside-down flipping hints */ |
146 int flip; /* are we flipping ? */ | |
147 int frbi; /* first row block index in MB */ | |
148 int srbi; /* second row block index in MB */ | |
5714
314be1cfdcb0
add a new vp6a codec (add alpha plan support to vp6)
aurel
parents:
5711
diff
changeset
|
149 int stride[4]; /* stride for each plan */ |
3695 | 150 |
151 const uint8_t *vp56_coord_div; | |
8299 | 152 VP56ParseVectorAdjustment parse_vector_adjustment; |
153 VP56Filter filter; | |
154 VP56ParseCoeff parse_coeff; | |
155 VP56DefaultModelsInit default_models_init; | |
156 VP56ParseVectorModels parse_vector_models; | |
157 VP56ParseCoeffModels parse_coeff_models; | |
158 VP56ParseHeader parse_header; | |
5711 | 159 |
8304 | 160 VP56Model *modelp; |
161 VP56Model models[2]; | |
5821 | 162 |
163 /* huffman decoding */ | |
164 int use_huffman; | |
165 GetBitContext gb; | |
166 VLC dccv_vlc[2]; | |
167 VLC runv_vlc[2]; | |
168 VLC ract_vlc[2][3][6]; | |
169 unsigned int nb_null[2][2]; /* number of consecutive NULL DC/AC */ | |
3695 | 170 }; |
171 | |
172 | |
12292 | 173 void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha); |
174 int ff_vp56_free(AVCodecContext *avctx); | |
175 void ff_vp56_init_dequant(VP56Context *s, int quantizer); | |
176 int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |
12293 | 177 AVPacket *avpkt); |
3695 | 178 |
179 | |
180 /** | |
181 * vp56 specific range coder implementation | |
182 */ | |
183 | |
12365 | 184 extern const uint8_t ff_vp56_norm_shift[256]; |
185 void ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size); | |
3695 | 186 |
12252
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
187 static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c) |
3695 | 188 { |
12365 | 189 int shift = ff_vp56_norm_shift[c->high]; |
12033
5de2b84a1fc3
Eliminate another redundant instruction in vp56/8 arithcoder
darkshikari
parents:
12032
diff
changeset
|
190 int bits = c->bits; |
12252
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
191 unsigned int code_word = c->code_word; |
3695 | 192 |
12033
5de2b84a1fc3
Eliminate another redundant instruction in vp56/8 arithcoder
darkshikari
parents:
12032
diff
changeset
|
193 c->high <<= shift; |
5de2b84a1fc3
Eliminate another redundant instruction in vp56/8 arithcoder
darkshikari
parents:
12032
diff
changeset
|
194 code_word <<= shift; |
5de2b84a1fc3
Eliminate another redundant instruction in vp56/8 arithcoder
darkshikari
parents:
12032
diff
changeset
|
195 bits += shift; |
5de2b84a1fc3
Eliminate another redundant instruction in vp56/8 arithcoder
darkshikari
parents:
12032
diff
changeset
|
196 if(bits >= 0 && c->buffer < c->end) { |
12385 | 197 code_word |= bytestream_get_be16(&c->buffer) << bits; |
198 bits -= 16; | |
3695 | 199 } |
12033
5de2b84a1fc3
Eliminate another redundant instruction in vp56/8 arithcoder
darkshikari
parents:
12032
diff
changeset
|
200 c->bits = bits; |
12252
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
201 return code_word; |
12251
bbe8e7233c5d
Split renorm of vp56 arith decoder to its own function
conrad
parents:
12250
diff
changeset
|
202 } |
bbe8e7233c5d
Split renorm of vp56 arith decoder to its own function
conrad
parents:
12250
diff
changeset
|
203 |
12256 | 204 #if ARCH_X86 |
205 #include "x86/vp56_arith.h" | |
206 #endif | |
207 | |
208 #ifndef vp56_rac_get_prob | |
209 #define vp56_rac_get_prob vp56_rac_get_prob | |
12349 | 210 static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) |
12251
bbe8e7233c5d
Split renorm of vp56 arith decoder to its own function
conrad
parents:
12250
diff
changeset
|
211 { |
12252
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
212 unsigned int code_word = vp56_rac_renorm(c); |
12251
bbe8e7233c5d
Split renorm of vp56 arith decoder to its own function
conrad
parents:
12250
diff
changeset
|
213 unsigned int low = 1 + (((c->high - 1) * prob) >> 8); |
12385 | 214 unsigned int low_shift = low << 16; |
12251
bbe8e7233c5d
Split renorm of vp56 arith decoder to its own function
conrad
parents:
12250
diff
changeset
|
215 int bit = code_word >= low_shift; |
bbe8e7233c5d
Split renorm of vp56 arith decoder to its own function
conrad
parents:
12250
diff
changeset
|
216 |
bbe8e7233c5d
Split renorm of vp56 arith decoder to its own function
conrad
parents:
12250
diff
changeset
|
217 c->high = bit ? c->high - low : low; |
12252
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
218 c->code_word = bit ? code_word - low_shift : code_word; |
12251
bbe8e7233c5d
Split renorm of vp56 arith decoder to its own function
conrad
parents:
12250
diff
changeset
|
219 |
3695 | 220 return bit; |
221 } | |
12256 | 222 #endif |
3695 | 223 |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
224 // branchy variant, to be used where there's a branch based on the bit decoded |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
225 static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob) |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
226 { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
227 unsigned long code_word = vp56_rac_renorm(c); |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
228 unsigned low = 1 + (((c->high - 1) * prob) >> 8); |
12385 | 229 unsigned low_shift = low << 16; |
12253
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
230 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
231 if (code_word >= low_shift) { |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
232 c->high -= low; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
233 c->code_word = code_word - low_shift; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
234 return 1; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
235 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
236 |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
237 c->high = low; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
238 c->code_word = code_word; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
239 return 0; |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
240 } |
112b3a0db187
Decode DCT tokens by branching to a different code path for each branch
conrad
parents:
12252
diff
changeset
|
241 |
12349 | 242 static av_always_inline int vp56_rac_get(VP56RangeCoder *c) |
3695 | 243 { |
12252
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
244 unsigned int code_word = vp56_rac_renorm(c); |
3695 | 245 /* equiprobable */ |
246 int low = (c->high + 1) >> 1; | |
12385 | 247 unsigned int low_shift = low << 16; |
12252
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
248 int bit = code_word >= low_shift; |
3695 | 249 if (bit) { |
12252
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
250 c->high -= low; |
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
251 code_word -= low_shift; |
3695 | 252 } else { |
12252
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
253 c->high = low; |
3695 | 254 } |
255 | |
12252
b8211cda076d
Move renormalization of the VP56 arith decoder to before decoding a bit
conrad
parents:
12251
diff
changeset
|
256 c->code_word = code_word; |
3695 | 257 return bit; |
258 } | |
259 | |
11921 | 260 // rounding is different than vp56_rac_get, is vp56_rac_get wrong? |
12349 | 261 static av_always_inline int vp8_rac_get(VP56RangeCoder *c) |
11921 | 262 { |
263 return vp56_rac_get_prob(c, 128); | |
264 } | |
265 | |
12355 | 266 static av_unused int vp56_rac_gets(VP56RangeCoder *c, int bits) |
3695 | 267 { |
268 int value = 0; | |
269 | |
270 while (bits--) { | |
271 value = (value << 1) | vp56_rac_get(c); | |
272 } | |
273 | |
274 return value; | |
275 } | |
276 | |
12355 | 277 static av_unused int vp8_rac_get_uint(VP56RangeCoder *c, int bits) |
11921 | 278 { |
279 int value = 0; | |
280 | |
281 while (bits--) { | |
282 value = (value << 1) | vp8_rac_get(c); | |
283 } | |
284 | |
285 return value; | |
286 } | |
287 | |
288 // fixme: add 1 bit to all the calls to this? | |
12355 | 289 static av_unused int vp8_rac_get_sint(VP56RangeCoder *c, int bits) |
11921 | 290 { |
291 int v; | |
292 | |
293 if (!vp8_rac_get(c)) | |
294 return 0; | |
295 | |
296 v = vp8_rac_get_uint(c, bits); | |
297 | |
298 if (vp8_rac_get(c)) | |
299 v = -v; | |
300 | |
301 return v; | |
302 } | |
303 | |
304 // P(7) | |
12355 | 305 static av_unused int vp56_rac_gets_nn(VP56RangeCoder *c, int bits) |
3695 | 306 { |
307 int v = vp56_rac_gets(c, 7) << 1; | |
308 return v + !v; | |
309 } | |
310 | |
12355 | 311 static av_unused int vp8_rac_get_nn(VP56RangeCoder *c) |
11921 | 312 { |
313 int v = vp8_rac_get_uint(c, 7) << 1; | |
314 return v + !v; | |
315 } | |
316 | |
12349 | 317 static av_always_inline |
318 int vp56_rac_get_tree(VP56RangeCoder *c, | |
319 const VP56Tree *tree, | |
320 const uint8_t *probs) | |
3695 | 321 { |
322 while (tree->val > 0) { | |
323 if (vp56_rac_get_prob(c, probs[tree->prob_idx])) | |
324 tree += tree->val; | |
325 else | |
326 tree++; | |
327 } | |
328 return -tree->val; | |
329 } | |
330 | |
11921 | 331 /** |
332 * This is identical to vp8_rac_get_tree except for the possibility of starting | |
333 * on a node other than the root node, needed for coeff decode where this is | |
334 * used to save a bit after a 0 token (by disallowing EOB to immediately follow.) | |
335 */ | |
12349 | 336 static av_always_inline |
337 int vp8_rac_get_tree_with_offset(VP56RangeCoder *c, const int8_t (*tree)[2], | |
338 const uint8_t *probs, int i) | |
11921 | 339 { |
340 do { | |
341 i = tree[i][vp56_rac_get_prob(c, probs[i])]; | |
342 } while (i > 0); | |
343 | |
344 return -i; | |
345 } | |
346 | |
347 // how probabilities are associated with decisions is different I think | |
348 // well, the new scheme fits in the old but this way has one fewer branches per decision | |
12349 | 349 static av_always_inline |
350 int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t (*tree)[2], | |
351 const uint8_t *probs) | |
11921 | 352 { |
353 return vp8_rac_get_tree_with_offset(c, tree, probs, 0); | |
354 } | |
355 | |
356 // DCTextra | |
12349 | 357 static av_always_inline int vp8_rac_get_coeff(VP56RangeCoder *c, const uint8_t *prob) |
11921 | 358 { |
359 int v = 0; | |
360 | |
361 do { | |
362 v = (v<<1) + vp56_rac_get_prob(c, *prob++); | |
363 } while (*prob); | |
364 | |
365 return v; | |
366 } | |
367 | |
7760 | 368 #endif /* AVCODEC_VP56_H */ |