Mercurial > libavcodec.hg
annotate h264_cavlc.c @ 11108:0f845e20982a libavcodec
ARMv6 optimised put_pixels functions except xy2 variants
author | mru |
---|---|
date | Tue, 09 Feb 2010 16:13:21 +0000 |
parents | 7cfb7bb6e554 |
children | d1a855cb0a0c |
rev | line source |
---|---|
10866 | 1 /* |
2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding | |
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 /** | |
23 * @file libavcodec/h264_cavlc.c | |
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding. | |
25 * @author Michael Niedermayer <michaelni@gmx.at> | |
26 */ | |
27 | |
10893
2aafcafbe1f0
Replace cabac checks in inline functions from h264.h with constants.
michael
parents:
10868
diff
changeset
|
28 #define CABAC 0 |
2aafcafbe1f0
Replace cabac checks in inline functions from h264.h with constants.
michael
parents:
10868
diff
changeset
|
29 |
10866 | 30 #include "internal.h" |
31 #include "avcodec.h" | |
32 #include "mpegvideo.h" | |
33 #include "h264.h" | |
34 #include "h264data.h" // FIXME FIXME FIXME | |
35 #include "h264_mvpred.h" | |
36 #include "golomb.h" | |
37 | |
38 //#undef NDEBUG | |
39 #include <assert.h> | |
40 | |
41 static const uint8_t golomb_to_inter_cbp_gray[16]={ | |
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9, | |
43 }; | |
44 | |
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={ | |
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9, | |
47 }; | |
48 | |
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={ | |
50 2, 0, 0, 0, | |
51 6, 1, 0, 0, | |
52 6, 6, 3, 0, | |
53 6, 7, 7, 6, | |
54 6, 8, 8, 7, | |
55 }; | |
56 | |
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={ | |
58 1, 0, 0, 0, | |
59 7, 1, 0, 0, | |
60 4, 6, 1, 0, | |
61 3, 3, 2, 5, | |
62 2, 3, 2, 0, | |
63 }; | |
64 | |
65 static const uint8_t coeff_token_len[4][4*17]={ | |
66 { | |
67 1, 0, 0, 0, | |
68 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6, | |
69 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10, | |
70 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14, | |
71 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16, | |
72 }, | |
73 { | |
74 2, 0, 0, 0, | |
75 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4, | |
76 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7, | |
77 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12, | |
78 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14, | |
79 }, | |
80 { | |
81 4, 0, 0, 0, | |
82 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4, | |
83 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5, | |
84 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8, | |
85 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10, | |
86 }, | |
87 { | |
88 6, 0, 0, 0, | |
89 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6, | |
90 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
91 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
92 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
93 } | |
94 }; | |
95 | |
96 static const uint8_t coeff_token_bits[4][4*17]={ | |
97 { | |
98 1, 0, 0, 0, | |
99 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3, | |
100 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4, | |
101 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8, | |
102 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8, | |
103 }, | |
104 { | |
105 3, 0, 0, 0, | |
106 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4, | |
107 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4, | |
108 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12, | |
109 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4, | |
110 }, | |
111 { | |
112 15, 0, 0, 0, | |
113 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11, | |
114 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13, | |
115 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8, | |
116 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2, | |
117 }, | |
118 { | |
119 3, 0, 0, 0, | |
120 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15, | |
121 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31, | |
122 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47, | |
123 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63, | |
124 } | |
125 }; | |
126 | |
127 static const uint8_t total_zeros_len[16][16]= { | |
128 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9}, | |
129 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6}, | |
130 {4,3,3,3,4,4,3,3,4,5,5,6,5,6}, | |
131 {5,3,4,4,3,3,3,4,3,4,5,5,5}, | |
132 {4,4,4,3,3,3,3,3,4,5,4,5}, | |
133 {6,5,3,3,3,3,3,3,4,3,6}, | |
134 {6,5,3,3,3,2,3,4,3,6}, | |
135 {6,4,5,3,2,2,3,3,6}, | |
136 {6,6,4,2,2,3,2,5}, | |
137 {5,5,3,2,2,2,4}, | |
138 {4,4,3,3,1,3}, | |
139 {4,4,2,1,3}, | |
140 {3,3,1,2}, | |
141 {2,2,1}, | |
142 {1,1}, | |
143 }; | |
144 | |
145 static const uint8_t total_zeros_bits[16][16]= { | |
146 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1}, | |
147 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0}, | |
148 {5,7,6,5,4,3,4,3,2,3,2,1,1,0}, | |
149 {3,7,5,4,6,5,4,3,3,2,2,1,0}, | |
150 {5,4,3,7,6,5,4,3,2,1,1,0}, | |
151 {1,1,7,6,5,4,3,2,1,1,0}, | |
152 {1,1,5,4,3,3,2,1,1,0}, | |
153 {1,1,1,3,3,2,2,1,0}, | |
154 {1,0,1,3,2,1,1,1}, | |
155 {1,0,1,3,2,1,1}, | |
156 {0,1,1,2,1,3}, | |
157 {0,1,1,1,1}, | |
158 {0,1,1,1}, | |
159 {0,1,1}, | |
160 {0,1}, | |
161 }; | |
162 | |
163 static const uint8_t chroma_dc_total_zeros_len[3][4]= { | |
164 { 1, 2, 3, 3,}, | |
165 { 1, 2, 2, 0,}, | |
166 { 1, 1, 0, 0,}, | |
167 }; | |
168 | |
169 static const uint8_t chroma_dc_total_zeros_bits[3][4]= { | |
170 { 1, 1, 1, 0,}, | |
171 { 1, 1, 0, 0,}, | |
172 { 1, 0, 0, 0,}, | |
173 }; | |
174 | |
175 static const uint8_t run_len[7][16]={ | |
176 {1,1}, | |
177 {1,2,2}, | |
178 {2,2,2,2}, | |
179 {2,2,2,3,3}, | |
180 {2,2,3,3,3,3}, | |
181 {2,3,3,3,3,3,3}, | |
182 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11}, | |
183 }; | |
184 | |
185 static const uint8_t run_bits[7][16]={ | |
186 {1,0}, | |
187 {1,1,0}, | |
188 {3,2,1,0}, | |
189 {3,2,1,1,0}, | |
190 {3,2,3,2,1,0}, | |
191 {3,0,1,3,2,5,4}, | |
192 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1}, | |
193 }; | |
194 | |
195 static VLC coeff_token_vlc[4]; | |
196 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2]; | |
197 static const int coeff_token_vlc_tables_size[4]={520,332,280,256}; | |
198 | |
199 static VLC chroma_dc_coeff_token_vlc; | |
200 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; | |
201 static const int chroma_dc_coeff_token_vlc_table_size = 256; | |
202 | |
203 static VLC total_zeros_vlc[15]; | |
204 static VLC_TYPE total_zeros_vlc_tables[15][512][2]; | |
205 static const int total_zeros_vlc_tables_size = 512; | |
206 | |
207 static VLC chroma_dc_total_zeros_vlc[3]; | |
208 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; | |
209 static const int chroma_dc_total_zeros_vlc_tables_size = 8; | |
210 | |
211 static VLC run_vlc[6]; | |
212 static VLC_TYPE run_vlc_tables[6][8][2]; | |
213 static const int run_vlc_tables_size = 8; | |
214 | |
215 static VLC run7_vlc; | |
216 static VLC_TYPE run7_vlc_table[96][2]; | |
217 static const int run7_vlc_table_size = 96; | |
218 | |
219 #define LEVEL_TAB_BITS 8 | |
220 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; | |
221 | |
222 | |
223 /** | |
224 * gets the predicted number of non-zero coefficients. | |
225 * @param n block index | |
226 */ | |
227 static inline int pred_non_zero_count(H264Context *h, int n){ | |
228 const int index8= scan8[n]; | |
229 const int left= h->non_zero_count_cache[index8 - 1]; | |
230 const int top = h->non_zero_count_cache[index8 - 8]; | |
231 int i= left + top; | |
232 | |
233 if(i<64) i= (i+1)>>1; | |
234 | |
235 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31); | |
236 | |
237 return i&31; | |
238 } | |
239 | |
240 static av_cold void init_cavlc_level_tab(void){ | |
241 int suffix_length, mask; | |
242 unsigned int i; | |
243 | |
244 for(suffix_length=0; suffix_length<7; suffix_length++){ | |
245 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){ | |
246 int prefix= LEVEL_TAB_BITS - av_log2(2*i); | |
247 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length); | |
248 | |
249 mask= -(level_code&1); | |
250 level_code= (((2+level_code)>>1) ^ mask) - mask; | |
251 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){ | |
252 cavlc_level_tab[suffix_length][i][0]= level_code; | |
253 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length; | |
254 }else if(prefix + 1 <= LEVEL_TAB_BITS){ | |
255 cavlc_level_tab[suffix_length][i][0]= prefix+100; | |
256 cavlc_level_tab[suffix_length][i][1]= prefix + 1; | |
257 }else{ | |
258 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100; | |
259 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS; | |
260 } | |
261 } | |
262 } | |
263 } | |
264 | |
265 av_cold void ff_h264_decode_init_vlc(void){ | |
266 static int done = 0; | |
267 | |
268 if (!done) { | |
269 int i; | |
270 int offset; | |
271 done = 1; | |
272 | |
273 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table; | |
274 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size; | |
275 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, | |
276 &chroma_dc_coeff_token_len [0], 1, 1, | |
277 &chroma_dc_coeff_token_bits[0], 1, 1, | |
278 INIT_VLC_USE_NEW_STATIC); | |
279 | |
280 offset = 0; | |
281 for(i=0; i<4; i++){ | |
282 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; | |
283 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i]; | |
284 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, | |
285 &coeff_token_len [i][0], 1, 1, | |
286 &coeff_token_bits[i][0], 1, 1, | |
287 INIT_VLC_USE_NEW_STATIC); | |
288 offset += coeff_token_vlc_tables_size[i]; | |
289 } | |
290 /* | |
291 * This is a one time safety check to make sure that | |
292 * the packed static coeff_token_vlc table sizes | |
293 * were initialized correctly. | |
294 */ | |
295 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables)); | |
296 | |
297 for(i=0; i<3; i++){ | |
298 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i]; | |
299 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size; | |
300 init_vlc(&chroma_dc_total_zeros_vlc[i], | |
301 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4, | |
302 &chroma_dc_total_zeros_len [i][0], 1, 1, | |
303 &chroma_dc_total_zeros_bits[i][0], 1, 1, | |
304 INIT_VLC_USE_NEW_STATIC); | |
305 } | |
306 for(i=0; i<15; i++){ | |
307 total_zeros_vlc[i].table = total_zeros_vlc_tables[i]; | |
308 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size; | |
309 init_vlc(&total_zeros_vlc[i], | |
310 TOTAL_ZEROS_VLC_BITS, 16, | |
311 &total_zeros_len [i][0], 1, 1, | |
312 &total_zeros_bits[i][0], 1, 1, | |
313 INIT_VLC_USE_NEW_STATIC); | |
314 } | |
315 | |
316 for(i=0; i<6; i++){ | |
317 run_vlc[i].table = run_vlc_tables[i]; | |
318 run_vlc[i].table_allocated = run_vlc_tables_size; | |
319 init_vlc(&run_vlc[i], | |
320 RUN_VLC_BITS, 7, | |
321 &run_len [i][0], 1, 1, | |
322 &run_bits[i][0], 1, 1, | |
323 INIT_VLC_USE_NEW_STATIC); | |
324 } | |
325 run7_vlc.table = run7_vlc_table, | |
326 run7_vlc.table_allocated = run7_vlc_table_size; | |
327 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, | |
328 &run_len [6][0], 1, 1, | |
329 &run_bits[6][0], 1, 1, | |
330 INIT_VLC_USE_NEW_STATIC); | |
331 | |
332 init_cavlc_level_tab(); | |
333 } | |
334 } | |
335 | |
336 /** | |
337 * | |
338 */ | |
339 static inline int get_level_prefix(GetBitContext *gb){ | |
340 unsigned int buf; | |
341 int log; | |
342 | |
343 OPEN_READER(re, gb); | |
344 UPDATE_CACHE(re, gb); | |
345 buf=GET_CACHE(re, gb); | |
346 | |
347 log= 32 - av_log2(buf); | |
348 #ifdef TRACE | |
349 print_bin(buf>>(32-log), log); | |
350 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__); | |
351 #endif | |
352 | |
353 LAST_SKIP_BITS(re, gb, log); | |
354 CLOSE_READER(re, gb); | |
355 | |
356 return log-1; | |
357 } | |
358 | |
359 /** | |
360 * decodes a residual block. | |
361 * @param n block index | |
362 * @param scantable scantable | |
363 * @param max_coeff number of coefficients in the block | |
364 * @return <0 if an error occurred | |
365 */ | |
366 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){ | |
367 MpegEncContext * const s = &h->s; | |
368 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}; | |
369 int level[16]; | |
370 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before; | |
371 | |
372 //FIXME put trailing_onex into the context | |
373 | |
374 if(n == CHROMA_DC_BLOCK_INDEX){ | |
375 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); | |
376 total_coeff= coeff_token>>2; | |
377 }else{ | |
378 if(n == LUMA_DC_BLOCK_INDEX){ | |
379 total_coeff= pred_non_zero_count(h, 0); | |
380 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2); | |
381 total_coeff= coeff_token>>2; | |
382 }else{ | |
383 total_coeff= pred_non_zero_count(h, n); | |
384 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2); | |
385 total_coeff= coeff_token>>2; | |
386 h->non_zero_count_cache[ scan8[n] ]= total_coeff; | |
387 } | |
388 } | |
389 | |
390 //FIXME set last_non_zero? | |
391 | |
392 if(total_coeff==0) | |
393 return 0; | |
394 if(total_coeff > (unsigned)max_coeff) { | |
395 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff); | |
396 return -1; | |
397 } | |
398 | |
399 trailing_ones= coeff_token&3; | |
400 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff); | |
401 assert(total_coeff<=16); | |
402 | |
403 i = show_bits(gb, 3); | |
404 skip_bits(gb, trailing_ones); | |
405 level[0] = 1-((i&4)>>1); | |
406 level[1] = 1-((i&2) ); | |
407 level[2] = 1-((i&1)<<1); | |
408 | |
409 if(trailing_ones<total_coeff) { | |
410 int mask, prefix; | |
10991 | 411 int suffix_length = total_coeff > 10 & trailing_ones < 3; |
10866 | 412 int bitsi= show_bits(gb, LEVEL_TAB_BITS); |
413 int level_code= cavlc_level_tab[suffix_length][bitsi][0]; | |
414 | |
415 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]); | |
416 if(level_code >= 100){ | |
417 prefix= level_code - 100; | |
418 if(prefix == LEVEL_TAB_BITS) | |
419 prefix += get_level_prefix(gb); | |
420 | |
421 //first coefficient has suffix_length equal to 0 or 1 | |
422 if(prefix<14){ //FIXME try to build a large unified VLC table for all this | |
423 if(suffix_length) | |
424 level_code= (prefix<<1) + get_bits1(gb); //part | |
425 else | |
426 level_code= prefix; //part | |
427 }else if(prefix==14){ | |
428 if(suffix_length) | |
429 level_code= (prefix<<1) + get_bits1(gb); //part | |
430 else | |
431 level_code= prefix + get_bits(gb, 4); //part | |
432 }else{ | |
433 level_code= 30 + get_bits(gb, prefix-3); //part | |
434 if(prefix>=16) | |
435 level_code += (1<<(prefix-3))-4096; | |
436 } | |
437 | |
438 if(trailing_ones < 3) level_code += 2; | |
439 | |
440 suffix_length = 2; | |
441 mask= -(level_code&1); | |
442 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask; | |
443 }else{ | |
10993
3932fd1a1c25
Optimize level_code computation, 6cpu cycles speedup.
michael
parents:
10991
diff
changeset
|
444 level_code += ((level_code>>31)|1) & -(trailing_ones < 3); |
10866 | 445 |
10994 | 446 suffix_length = 1 + (level_code + 3U > 6U); |
10866 | 447 level[trailing_ones]= level_code; |
448 } | |
449 | |
450 //remaining coefficients have suffix_length > 0 | |
451 for(i=trailing_ones+1;i<total_coeff;i++) { | |
452 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX }; | |
453 int bitsi= show_bits(gb, LEVEL_TAB_BITS); | |
454 level_code= cavlc_level_tab[suffix_length][bitsi][0]; | |
455 | |
456 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]); | |
457 if(level_code >= 100){ | |
458 prefix= level_code - 100; | |
459 if(prefix == LEVEL_TAB_BITS){ | |
460 prefix += get_level_prefix(gb); | |
461 } | |
462 if(prefix<15){ | |
463 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length); | |
464 }else{ | |
465 level_code = (15<<suffix_length) + get_bits(gb, prefix-3); | |
466 if(prefix>=16) | |
467 level_code += (1<<(prefix-3))-4096; | |
468 } | |
469 mask= -(level_code&1); | |
470 level_code= (((2+level_code)>>1) ^ mask) - mask; | |
471 } | |
472 level[i]= level_code; | |
10995
5352afcd6748
Optimize suffix_length computation, 1 cpu cycle speedup.
michael
parents:
10994
diff
changeset
|
473 suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length]; |
10866 | 474 } |
475 } | |
476 | |
477 if(total_coeff == max_coeff) | |
478 zeros_left=0; | |
479 else{ | |
480 if(n == CHROMA_DC_BLOCK_INDEX) | |
10996 | 481 zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); |
10866 | 482 else |
10996 | 483 zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1); |
10866 | 484 } |
485 | |
486 coeff_num = zeros_left + total_coeff - 1; | |
487 j = scantable[coeff_num]; | |
488 if(n > 24){ | |
489 block[j] = level[0]; | |
490 for(i=1;i<total_coeff;i++) { | |
491 if(zeros_left <= 0) | |
492 run_before = 0; | |
493 else if(zeros_left < 7){ | |
10996 | 494 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); |
10866 | 495 }else{ |
496 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); | |
497 } | |
498 zeros_left -= run_before; | |
499 coeff_num -= 1 + run_before; | |
500 j= scantable[ coeff_num ]; | |
501 | |
502 block[j]= level[i]; | |
503 } | |
504 }else{ | |
505 block[j] = (level[0] * qmul[j] + 32)>>6; | |
506 for(i=1;i<total_coeff;i++) { | |
507 if(zeros_left <= 0) | |
508 run_before = 0; | |
509 else if(zeros_left < 7){ | |
10996 | 510 run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); |
10866 | 511 }else{ |
512 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); | |
513 } | |
514 zeros_left -= run_before; | |
515 coeff_num -= 1 + run_before; | |
516 j= scantable[ coeff_num ]; | |
517 | |
518 block[j]= (level[i] * qmul[j] + 32)>>6; | |
519 } | |
520 } | |
521 | |
522 if(zeros_left<0){ | |
523 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y); | |
524 return -1; | |
525 } | |
526 | |
527 return 0; | |
528 } | |
529 | |
530 int ff_h264_decode_mb_cavlc(H264Context *h){ | |
531 MpegEncContext * const s = &h->s; | |
532 int mb_xy; | |
533 int partition_count; | |
534 unsigned int mb_type, cbp; | |
535 int dct8x8_allowed= h->pps.transform_8x8_mode; | |
536 | |
537 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride; | |
538 | |
539 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y); | |
540 cbp = 0; /* avoid warning. FIXME: find a solution without slowing | |
541 down the code */ | |
542 if(h->slice_type_nos != FF_I_TYPE){ | |
543 if(s->mb_skip_run==-1) | |
544 s->mb_skip_run= get_ue_golomb(&s->gb); | |
545 | |
546 if (s->mb_skip_run--) { | |
547 if(FRAME_MBAFF && (s->mb_y&1) == 0){ | |
548 if(s->mb_skip_run==0) | |
549 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb); | |
550 else | |
551 predict_field_decoding_flag(h); | |
552 } | |
553 decode_mb_skip(h); | |
554 return 0; | |
555 } | |
556 } | |
557 if(FRAME_MBAFF){ | |
558 if( (s->mb_y&1) == 0 ) | |
559 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb); | |
560 } | |
561 | |
562 h->prev_mb_skipped= 0; | |
563 | |
564 mb_type= get_ue_golomb(&s->gb); | |
565 if(h->slice_type_nos == FF_B_TYPE){ | |
566 if(mb_type < 23){ | |
567 partition_count= b_mb_type_info[mb_type].partition_count; | |
568 mb_type= b_mb_type_info[mb_type].type; | |
569 }else{ | |
570 mb_type -= 23; | |
571 goto decode_intra_mb; | |
572 } | |
573 }else if(h->slice_type_nos == FF_P_TYPE){ | |
574 if(mb_type < 5){ | |
575 partition_count= p_mb_type_info[mb_type].partition_count; | |
576 mb_type= p_mb_type_info[mb_type].type; | |
577 }else{ | |
578 mb_type -= 5; | |
579 goto decode_intra_mb; | |
580 } | |
581 }else{ | |
582 assert(h->slice_type_nos == FF_I_TYPE); | |
583 if(h->slice_type == FF_SI_TYPE && mb_type) | |
584 mb_type--; | |
585 decode_intra_mb: | |
586 if(mb_type > 25){ | |
587 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y); | |
588 return -1; | |
589 } | |
590 partition_count=0; | |
591 cbp= i_mb_type_info[mb_type].cbp; | |
592 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode; | |
593 mb_type= i_mb_type_info[mb_type].type; | |
594 } | |
595 | |
596 if(MB_FIELD) | |
597 mb_type |= MB_TYPE_INTERLACED; | |
598 | |
599 h->slice_table[ mb_xy ]= h->slice_num; | |
600 | |
601 if(IS_INTRA_PCM(mb_type)){ | |
602 unsigned int x; | |
603 | |
604 // We assume these blocks are very rare so we do not optimize it. | |
605 align_get_bits(&s->gb); | |
606 | |
607 // The pixels are stored in the same order as levels in h->mb array. | |
608 for(x=0; x < (CHROMA ? 384 : 256); x++){ | |
609 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8); | |
610 } | |
611 | |
612 // In deblocking, the quantizer is 0 | |
613 s->current_picture.qscale_table[mb_xy]= 0; | |
614 // All coeffs are present | |
10906 | 615 memset(h->non_zero_count[mb_xy], 16, 32); |
10866 | 616 |
617 s->current_picture.mb_type[mb_xy]= mb_type; | |
618 return 0; | |
619 } | |
620 | |
621 if(MB_MBAFF){ | |
622 h->ref_count[0] <<= 1; | |
623 h->ref_count[1] <<= 1; | |
624 } | |
625 | |
10907 | 626 fill_decode_caches(h, mb_type); |
10866 | 627 |
628 //mb_pred | |
629 if(IS_INTRA(mb_type)){ | |
630 int pred_mode; | |
631 // init_top_left_availability(h); | |
632 if(IS_INTRA4x4(mb_type)){ | |
633 int i; | |
634 int di = 1; | |
635 if(dct8x8_allowed && get_bits1(&s->gb)){ | |
636 mb_type |= MB_TYPE_8x8DCT; | |
637 di = 4; | |
638 } | |
639 | |
640 // fill_intra4x4_pred_table(h); | |
641 for(i=0; i<16; i+=di){ | |
642 int mode= pred_intra_mode(h, i); | |
643 | |
644 if(!get_bits1(&s->gb)){ | |
645 const int rem_mode= get_bits(&s->gb, 3); | |
646 mode = rem_mode + (rem_mode >= mode); | |
647 } | |
648 | |
649 if(di==4) | |
650 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 ); | |
651 else | |
652 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode; | |
653 } | |
654 ff_h264_write_back_intra_pred_mode(h); | |
655 if( ff_h264_check_intra4x4_pred_mode(h) < 0) | |
656 return -1; | |
657 }else{ | |
658 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode); | |
659 if(h->intra16x16_pred_mode < 0) | |
660 return -1; | |
661 } | |
662 if(CHROMA){ | |
663 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb)); | |
664 if(pred_mode < 0) | |
665 return -1; | |
666 h->chroma_pred_mode= pred_mode; | |
667 } | |
668 }else if(partition_count==4){ | |
669 int i, j, sub_partition_count[4], list, ref[2][4]; | |
670 | |
671 if(h->slice_type_nos == FF_B_TYPE){ | |
672 for(i=0; i<4; i++){ | |
673 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb); | |
674 if(h->sub_mb_type[i] >=13){ | |
675 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y); | |
676 return -1; | |
677 } | |
678 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; | |
679 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type; | |
680 } | |
10946 | 681 if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) { |
10866 | 682 ff_h264_pred_direct_motion(h, &mb_type); |
683 h->ref_cache[0][scan8[4]] = | |
684 h->ref_cache[1][scan8[4]] = | |
685 h->ref_cache[0][scan8[12]] = | |
686 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE; | |
687 } | |
688 }else{ | |
689 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ? | |
690 for(i=0; i<4; i++){ | |
691 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb); | |
692 if(h->sub_mb_type[i] >=4){ | |
693 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y); | |
694 return -1; | |
695 } | |
696 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; | |
697 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type; | |
698 } | |
699 } | |
700 | |
701 for(list=0; list<h->list_count; list++){ | |
702 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list]; | |
703 for(i=0; i<4; i++){ | |
704 if(IS_DIRECT(h->sub_mb_type[i])) continue; | |
705 if(IS_DIR(h->sub_mb_type[i], 0, list)){ | |
706 unsigned int tmp; | |
707 if(ref_count == 1){ | |
708 tmp= 0; | |
709 }else if(ref_count == 2){ | |
710 tmp= get_bits1(&s->gb)^1; | |
711 }else{ | |
712 tmp= get_ue_golomb_31(&s->gb); | |
713 if(tmp>=ref_count){ | |
714 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp); | |
715 return -1; | |
716 } | |
717 } | |
718 ref[list][i]= tmp; | |
719 }else{ | |
720 //FIXME | |
721 ref[list][i] = -1; | |
722 } | |
723 } | |
724 } | |
725 | |
726 if(dct8x8_allowed) | |
727 dct8x8_allowed = get_dct8x8_allowed(h); | |
728 | |
729 for(list=0; list<h->list_count; list++){ | |
730 for(i=0; i<4; i++){ | |
731 if(IS_DIRECT(h->sub_mb_type[i])) { | |
732 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ]; | |
733 continue; | |
734 } | |
735 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]= | |
736 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i]; | |
737 | |
738 if(IS_DIR(h->sub_mb_type[i], 0, list)){ | |
739 const int sub_mb_type= h->sub_mb_type[i]; | |
740 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1; | |
741 for(j=0; j<sub_partition_count[i]; j++){ | |
742 int mx, my; | |
743 const int index= 4*i + block_width*j; | |
744 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ]; | |
745 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my); | |
746 mx += get_se_golomb(&s->gb); | |
747 my += get_se_golomb(&s->gb); | |
748 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | |
749 | |
750 if(IS_SUB_8X8(sub_mb_type)){ | |
751 mv_cache[ 1 ][0]= | |
752 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx; | |
753 mv_cache[ 1 ][1]= | |
754 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my; | |
755 }else if(IS_SUB_8X4(sub_mb_type)){ | |
756 mv_cache[ 1 ][0]= mx; | |
757 mv_cache[ 1 ][1]= my; | |
758 }else if(IS_SUB_4X8(sub_mb_type)){ | |
759 mv_cache[ 8 ][0]= mx; | |
760 mv_cache[ 8 ][1]= my; | |
761 } | |
762 mv_cache[ 0 ][0]= mx; | |
763 mv_cache[ 0 ][1]= my; | |
764 } | |
765 }else{ | |
766 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0]; | |
767 p[0] = p[1]= | |
768 p[8] = p[9]= 0; | |
769 } | |
770 } | |
771 } | |
772 }else if(IS_DIRECT(mb_type)){ | |
773 ff_h264_pred_direct_motion(h, &mb_type); | |
774 dct8x8_allowed &= h->sps.direct_8x8_inference_flag; | |
775 }else{ | |
776 int list, mx, my, i; | |
777 //FIXME we should set ref_idx_l? to 0 if we use that later ... | |
778 if(IS_16X16(mb_type)){ | |
779 for(list=0; list<h->list_count; list++){ | |
780 unsigned int val; | |
781 if(IS_DIR(mb_type, 0, list)){ | |
782 if(h->ref_count[list]==1){ | |
783 val= 0; | |
784 }else if(h->ref_count[list]==2){ | |
785 val= get_bits1(&s->gb)^1; | |
786 }else{ | |
787 val= get_ue_golomb_31(&s->gb); | |
788 if(val >= h->ref_count[list]){ | |
789 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); | |
790 return -1; | |
791 } | |
792 } | |
793 }else | |
794 val= LIST_NOT_USED&0xFF; | |
795 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1); | |
796 } | |
797 for(list=0; list<h->list_count; list++){ | |
798 unsigned int val; | |
799 if(IS_DIR(mb_type, 0, list)){ | |
800 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my); | |
801 mx += get_se_golomb(&s->gb); | |
802 my += get_se_golomb(&s->gb); | |
803 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | |
804 | |
805 val= pack16to32(mx,my); | |
806 }else | |
807 val=0; | |
808 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4); | |
809 } | |
810 } | |
811 else if(IS_16X8(mb_type)){ | |
812 for(list=0; list<h->list_count; list++){ | |
813 for(i=0; i<2; i++){ | |
814 unsigned int val; | |
815 if(IS_DIR(mb_type, i, list)){ | |
816 if(h->ref_count[list] == 1){ | |
817 val= 0; | |
818 }else if(h->ref_count[list] == 2){ | |
819 val= get_bits1(&s->gb)^1; | |
820 }else{ | |
821 val= get_ue_golomb_31(&s->gb); | |
822 if(val >= h->ref_count[list]){ | |
823 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); | |
824 return -1; | |
825 } | |
826 } | |
827 }else | |
828 val= LIST_NOT_USED&0xFF; | |
829 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1); | |
830 } | |
831 } | |
832 for(list=0; list<h->list_count; list++){ | |
833 for(i=0; i<2; i++){ | |
834 unsigned int val; | |
835 if(IS_DIR(mb_type, i, list)){ | |
836 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my); | |
837 mx += get_se_golomb(&s->gb); | |
838 my += get_se_golomb(&s->gb); | |
839 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | |
840 | |
841 val= pack16to32(mx,my); | |
842 }else | |
843 val=0; | |
844 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4); | |
845 } | |
846 } | |
847 }else{ | |
848 assert(IS_8X16(mb_type)); | |
849 for(list=0; list<h->list_count; list++){ | |
850 for(i=0; i<2; i++){ | |
851 unsigned int val; | |
852 if(IS_DIR(mb_type, i, list)){ //FIXME optimize | |
853 if(h->ref_count[list]==1){ | |
854 val= 0; | |
855 }else if(h->ref_count[list]==2){ | |
856 val= get_bits1(&s->gb)^1; | |
857 }else{ | |
858 val= get_ue_golomb_31(&s->gb); | |
859 if(val >= h->ref_count[list]){ | |
860 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); | |
861 return -1; | |
862 } | |
863 } | |
864 }else | |
865 val= LIST_NOT_USED&0xFF; | |
866 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1); | |
867 } | |
868 } | |
869 for(list=0; list<h->list_count; list++){ | |
870 for(i=0; i<2; i++){ | |
871 unsigned int val; | |
872 if(IS_DIR(mb_type, i, list)){ | |
873 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my); | |
874 mx += get_se_golomb(&s->gb); | |
875 my += get_se_golomb(&s->gb); | |
876 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | |
877 | |
878 val= pack16to32(mx,my); | |
879 }else | |
880 val=0; | |
881 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4); | |
882 } | |
883 } | |
884 } | |
885 } | |
886 | |
887 if(IS_INTER(mb_type)) | |
888 write_back_motion(h, mb_type); | |
889 | |
890 if(!IS_INTRA16x16(mb_type)){ | |
891 cbp= get_ue_golomb(&s->gb); | |
892 if(cbp > 47){ | |
893 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y); | |
894 return -1; | |
895 } | |
896 | |
897 if(CHROMA){ | |
898 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp]; | |
899 else cbp= golomb_to_inter_cbp [cbp]; | |
900 }else{ | |
901 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp]; | |
902 else cbp= golomb_to_inter_cbp_gray[cbp]; | |
903 } | |
904 } | |
905 | |
906 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){ | |
10999 | 907 mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb); |
10866 | 908 } |
10920 | 909 h->cbp= |
910 h->cbp_table[mb_xy]= cbp; | |
10866 | 911 s->current_picture.mb_type[mb_xy]= mb_type; |
912 | |
913 if(cbp || IS_INTRA16x16(mb_type)){ | |
914 int i8x8, i4x4, chroma_idx; | |
915 int dquant; | |
916 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; | |
917 const uint8_t *scan, *scan8x8, *dc_scan; | |
918 | |
919 if(IS_INTERLACED(mb_type)){ | |
920 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; | |
921 scan= s->qscale ? h->field_scan : h->field_scan_q0; | |
922 dc_scan= luma_dc_field_scan; | |
923 }else{ | |
924 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; | |
925 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; | |
926 dc_scan= luma_dc_zigzag_scan; | |
927 } | |
928 | |
929 dquant= get_se_golomb(&s->gb); | |
930 | |
10959 | 931 s->qscale += dquant; |
10866 | 932 |
933 if(((unsigned)s->qscale) > 51){ | |
934 if(s->qscale<0) s->qscale+= 52; | |
935 else s->qscale-= 52; | |
10959 | 936 if(((unsigned)s->qscale) > 51){ |
937 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y); | |
938 return -1; | |
939 } | |
10866 | 940 } |
941 | |
942 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale); | |
943 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale); | |
944 if(IS_INTRA16x16(mb_type)){ | |
945 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){ | |
946 return -1; //FIXME continue if partitioned and other return -1 too | |
947 } | |
948 | |
949 assert((cbp&15) == 0 || (cbp&15) == 15); | |
950 | |
951 if(cbp&15){ | |
952 for(i8x8=0; i8x8<4; i8x8++){ | |
953 for(i4x4=0; i4x4<4; i4x4++){ | |
954 const int index= i4x4 + 4*i8x8; | |
955 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){ | |
956 return -1; | |
957 } | |
958 } | |
959 } | |
960 }else{ | |
961 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); | |
962 } | |
963 }else{ | |
964 for(i8x8=0; i8x8<4; i8x8++){ | |
965 if(cbp & (1<<i8x8)){ | |
966 if(IS_8x8DCT(mb_type)){ | |
967 DCTELEM *buf = &h->mb[64*i8x8]; | |
968 uint8_t *nnz; | |
969 for(i4x4=0; i4x4<4; i4x4++){ | |
970 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4, | |
971 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) | |
972 return -1; | |
973 } | |
974 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; | |
975 nnz[0] += nnz[1] + nnz[8] + nnz[9]; | |
976 }else{ | |
977 for(i4x4=0; i4x4<4; i4x4++){ | |
978 const int index= i4x4 + 4*i8x8; | |
979 | |
980 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){ | |
981 return -1; | |
982 } | |
983 } | |
984 } | |
985 }else{ | |
986 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; | |
987 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; | |
988 } | |
989 } | |
990 } | |
991 | |
992 if(cbp&0x30){ | |
993 for(chroma_idx=0; chroma_idx<2; chroma_idx++) | |
994 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){ | |
995 return -1; | |
996 } | |
997 } | |
998 | |
999 if(cbp&0x20){ | |
1000 for(chroma_idx=0; chroma_idx<2; chroma_idx++){ | |
1001 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]]; | |
1002 for(i4x4=0; i4x4<4; i4x4++){ | |
1003 const int index= 16 + 4*chroma_idx + i4x4; | |
1004 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){ | |
1005 return -1; | |
1006 } | |
1007 } | |
1008 } | |
1009 }else{ | |
1010 uint8_t * const nnz= &h->non_zero_count_cache[0]; | |
1011 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = | |
1012 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; | |
1013 } | |
1014 }else{ | |
1015 uint8_t * const nnz= &h->non_zero_count_cache[0]; | |
1016 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1); | |
1017 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = | |
1018 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; | |
1019 } | |
1020 s->current_picture.qscale_table[mb_xy]= s->qscale; | |
1021 write_back_non_zero_count(h); | |
1022 | |
1023 if(MB_MBAFF){ | |
1024 h->ref_count[0] >>= 1; | |
1025 h->ref_count[1] >>= 1; | |
1026 } | |
1027 | |
1028 return 0; | |
1029 } | |
1030 |