comparison h264_cavlc.c @ 10866:d26e9b4d2ca1 libavcodec

Split cavlc out of h264.c. Seems to speed the code up a little... The placement of many generic functions between h264.c and h264.h is still open Currently they are a little randomly placed between them.
author michael
date Wed, 13 Jan 2010 01:59:19 +0000
parents
children 13a84faba50d
comparison
equal deleted inserted replaced
10865:bcdc5343a577 10866:d26e9b4d2ca1
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file libavcodec/h264_cavlc.c
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
28 #include "internal.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h" // FIXME FIXME FIXME
33 #include "h264_mvpred.h"
34 #include "golomb.h"
35
36 #if ARCH_X86
37 #include "x86/h264_i386.h"
38 #endif
39
40 //#undef NDEBUG
41 #include <assert.h>
42
43 static const uint8_t golomb_to_intra4x4_cbp[48]={
44 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46,
45 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4,
46 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41
47 };
48
49 static const uint8_t golomb_to_inter_cbp[48]={
50 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13,
51 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
52 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
53 };
54
55 static const uint8_t golomb_to_inter_cbp_gray[16]={
56 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
57 };
58
59 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
60 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
61 };
62
63 static const uint8_t chroma_dc_coeff_token_len[4*5]={
64 2, 0, 0, 0,
65 6, 1, 0, 0,
66 6, 6, 3, 0,
67 6, 7, 7, 6,
68 6, 8, 8, 7,
69 };
70
71 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
72 1, 0, 0, 0,
73 7, 1, 0, 0,
74 4, 6, 1, 0,
75 3, 3, 2, 5,
76 2, 3, 2, 0,
77 };
78
79 static const uint8_t coeff_token_len[4][4*17]={
80 {
81 1, 0, 0, 0,
82 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
83 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
84 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
85 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
86 },
87 {
88 2, 0, 0, 0,
89 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
90 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
91 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
92 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
93 },
94 {
95 4, 0, 0, 0,
96 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
97 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
98 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
99 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
100 },
101 {
102 6, 0, 0, 0,
103 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
104 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
105 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
106 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
107 }
108 };
109
110 static const uint8_t coeff_token_bits[4][4*17]={
111 {
112 1, 0, 0, 0,
113 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
114 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
115 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
116 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
117 },
118 {
119 3, 0, 0, 0,
120 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
121 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
122 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
123 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
124 },
125 {
126 15, 0, 0, 0,
127 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
128 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
129 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
130 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
131 },
132 {
133 3, 0, 0, 0,
134 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
135 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
136 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
137 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
138 }
139 };
140
141 static const uint8_t total_zeros_len[16][16]= {
142 {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
143 {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
144 {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
145 {5,3,4,4,3,3,3,4,3,4,5,5,5},
146 {4,4,4,3,3,3,3,3,4,5,4,5},
147 {6,5,3,3,3,3,3,3,4,3,6},
148 {6,5,3,3,3,2,3,4,3,6},
149 {6,4,5,3,2,2,3,3,6},
150 {6,6,4,2,2,3,2,5},
151 {5,5,3,2,2,2,4},
152 {4,4,3,3,1,3},
153 {4,4,2,1,3},
154 {3,3,1,2},
155 {2,2,1},
156 {1,1},
157 };
158
159 static const uint8_t total_zeros_bits[16][16]= {
160 {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
161 {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
162 {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
163 {3,7,5,4,6,5,4,3,3,2,2,1,0},
164 {5,4,3,7,6,5,4,3,2,1,1,0},
165 {1,1,7,6,5,4,3,2,1,1,0},
166 {1,1,5,4,3,3,2,1,1,0},
167 {1,1,1,3,3,2,2,1,0},
168 {1,0,1,3,2,1,1,1},
169 {1,0,1,3,2,1,1},
170 {0,1,1,2,1,3},
171 {0,1,1,1,1},
172 {0,1,1,1},
173 {0,1,1},
174 {0,1},
175 };
176
177 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
178 { 1, 2, 3, 3,},
179 { 1, 2, 2, 0,},
180 { 1, 1, 0, 0,},
181 };
182
183 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
184 { 1, 1, 1, 0,},
185 { 1, 1, 0, 0,},
186 { 1, 0, 0, 0,},
187 };
188
189 static const uint8_t run_len[7][16]={
190 {1,1},
191 {1,2,2},
192 {2,2,2,2},
193 {2,2,2,3,3},
194 {2,2,3,3,3,3},
195 {2,3,3,3,3,3,3},
196 {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
197 };
198
199 static const uint8_t run_bits[7][16]={
200 {1,0},
201 {1,1,0},
202 {3,2,1,0},
203 {3,2,1,1,0},
204 {3,2,3,2,1,0},
205 {3,0,1,3,2,5,4},
206 {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
207 };
208
209 static VLC coeff_token_vlc[4];
210 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
211 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
212
213 static VLC chroma_dc_coeff_token_vlc;
214 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
215 static const int chroma_dc_coeff_token_vlc_table_size = 256;
216
217 static VLC total_zeros_vlc[15];
218 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
219 static const int total_zeros_vlc_tables_size = 512;
220
221 static VLC chroma_dc_total_zeros_vlc[3];
222 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
223 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
224
225 static VLC run_vlc[6];
226 static VLC_TYPE run_vlc_tables[6][8][2];
227 static const int run_vlc_tables_size = 8;
228
229 static VLC run7_vlc;
230 static VLC_TYPE run7_vlc_table[96][2];
231 static const int run7_vlc_table_size = 96;
232
233 #define LEVEL_TAB_BITS 8
234 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
235
236
237 /**
238 * gets the predicted number of non-zero coefficients.
239 * @param n block index
240 */
241 static inline int pred_non_zero_count(H264Context *h, int n){
242 const int index8= scan8[n];
243 const int left= h->non_zero_count_cache[index8 - 1];
244 const int top = h->non_zero_count_cache[index8 - 8];
245 int i= left + top;
246
247 if(i<64) i= (i+1)>>1;
248
249 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
250
251 return i&31;
252 }
253
254 static av_cold void init_cavlc_level_tab(void){
255 int suffix_length, mask;
256 unsigned int i;
257
258 for(suffix_length=0; suffix_length<7; suffix_length++){
259 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
260 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
261 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
262
263 mask= -(level_code&1);
264 level_code= (((2+level_code)>>1) ^ mask) - mask;
265 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
266 cavlc_level_tab[suffix_length][i][0]= level_code;
267 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
268 }else if(prefix + 1 <= LEVEL_TAB_BITS){
269 cavlc_level_tab[suffix_length][i][0]= prefix+100;
270 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
271 }else{
272 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
273 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
274 }
275 }
276 }
277 }
278
279 av_cold void ff_h264_decode_init_vlc(void){
280 static int done = 0;
281
282 if (!done) {
283 int i;
284 int offset;
285 done = 1;
286
287 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
288 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
289 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
290 &chroma_dc_coeff_token_len [0], 1, 1,
291 &chroma_dc_coeff_token_bits[0], 1, 1,
292 INIT_VLC_USE_NEW_STATIC);
293
294 offset = 0;
295 for(i=0; i<4; i++){
296 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
297 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
298 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
299 &coeff_token_len [i][0], 1, 1,
300 &coeff_token_bits[i][0], 1, 1,
301 INIT_VLC_USE_NEW_STATIC);
302 offset += coeff_token_vlc_tables_size[i];
303 }
304 /*
305 * This is a one time safety check to make sure that
306 * the packed static coeff_token_vlc table sizes
307 * were initialized correctly.
308 */
309 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
310
311 for(i=0; i<3; i++){
312 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
313 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
314 init_vlc(&chroma_dc_total_zeros_vlc[i],
315 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
316 &chroma_dc_total_zeros_len [i][0], 1, 1,
317 &chroma_dc_total_zeros_bits[i][0], 1, 1,
318 INIT_VLC_USE_NEW_STATIC);
319 }
320 for(i=0; i<15; i++){
321 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
322 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
323 init_vlc(&total_zeros_vlc[i],
324 TOTAL_ZEROS_VLC_BITS, 16,
325 &total_zeros_len [i][0], 1, 1,
326 &total_zeros_bits[i][0], 1, 1,
327 INIT_VLC_USE_NEW_STATIC);
328 }
329
330 for(i=0; i<6; i++){
331 run_vlc[i].table = run_vlc_tables[i];
332 run_vlc[i].table_allocated = run_vlc_tables_size;
333 init_vlc(&run_vlc[i],
334 RUN_VLC_BITS, 7,
335 &run_len [i][0], 1, 1,
336 &run_bits[i][0], 1, 1,
337 INIT_VLC_USE_NEW_STATIC);
338 }
339 run7_vlc.table = run7_vlc_table,
340 run7_vlc.table_allocated = run7_vlc_table_size;
341 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
342 &run_len [6][0], 1, 1,
343 &run_bits[6][0], 1, 1,
344 INIT_VLC_USE_NEW_STATIC);
345
346 init_cavlc_level_tab();
347 }
348 }
349
350 /**
351 *
352 */
353 static inline int get_level_prefix(GetBitContext *gb){
354 unsigned int buf;
355 int log;
356
357 OPEN_READER(re, gb);
358 UPDATE_CACHE(re, gb);
359 buf=GET_CACHE(re, gb);
360
361 log= 32 - av_log2(buf);
362 #ifdef TRACE
363 print_bin(buf>>(32-log), log);
364 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
365 #endif
366
367 LAST_SKIP_BITS(re, gb, log);
368 CLOSE_READER(re, gb);
369
370 return log-1;
371 }
372
373 /**
374 * decodes a residual block.
375 * @param n block index
376 * @param scantable scantable
377 * @param max_coeff number of coefficients in the block
378 * @return <0 if an error occurred
379 */
380 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
381 MpegEncContext * const s = &h->s;
382 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
383 int level[16];
384 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
385
386 //FIXME put trailing_onex into the context
387
388 if(n == CHROMA_DC_BLOCK_INDEX){
389 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
390 total_coeff= coeff_token>>2;
391 }else{
392 if(n == LUMA_DC_BLOCK_INDEX){
393 total_coeff= pred_non_zero_count(h, 0);
394 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
395 total_coeff= coeff_token>>2;
396 }else{
397 total_coeff= pred_non_zero_count(h, n);
398 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
399 total_coeff= coeff_token>>2;
400 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
401 }
402 }
403
404 //FIXME set last_non_zero?
405
406 if(total_coeff==0)
407 return 0;
408 if(total_coeff > (unsigned)max_coeff) {
409 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
410 return -1;
411 }
412
413 trailing_ones= coeff_token&3;
414 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
415 assert(total_coeff<=16);
416
417 i = show_bits(gb, 3);
418 skip_bits(gb, trailing_ones);
419 level[0] = 1-((i&4)>>1);
420 level[1] = 1-((i&2) );
421 level[2] = 1-((i&1)<<1);
422
423 if(trailing_ones<total_coeff) {
424 int mask, prefix;
425 int suffix_length = total_coeff > 10 && trailing_ones < 3;
426 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
427 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
428
429 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
430 if(level_code >= 100){
431 prefix= level_code - 100;
432 if(prefix == LEVEL_TAB_BITS)
433 prefix += get_level_prefix(gb);
434
435 //first coefficient has suffix_length equal to 0 or 1
436 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
437 if(suffix_length)
438 level_code= (prefix<<1) + get_bits1(gb); //part
439 else
440 level_code= prefix; //part
441 }else if(prefix==14){
442 if(suffix_length)
443 level_code= (prefix<<1) + get_bits1(gb); //part
444 else
445 level_code= prefix + get_bits(gb, 4); //part
446 }else{
447 level_code= 30 + get_bits(gb, prefix-3); //part
448 if(prefix>=16)
449 level_code += (1<<(prefix-3))-4096;
450 }
451
452 if(trailing_ones < 3) level_code += 2;
453
454 suffix_length = 2;
455 mask= -(level_code&1);
456 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
457 }else{
458 if(trailing_ones < 3) level_code += (level_code>>31)|1;
459
460 suffix_length = 1;
461 if(level_code + 3U > 6U)
462 suffix_length++;
463 level[trailing_ones]= level_code;
464 }
465
466 //remaining coefficients have suffix_length > 0
467 for(i=trailing_ones+1;i<total_coeff;i++) {
468 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
469 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
470 level_code= cavlc_level_tab[suffix_length][bitsi][0];
471
472 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
473 if(level_code >= 100){
474 prefix= level_code - 100;
475 if(prefix == LEVEL_TAB_BITS){
476 prefix += get_level_prefix(gb);
477 }
478 if(prefix<15){
479 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
480 }else{
481 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
482 if(prefix>=16)
483 level_code += (1<<(prefix-3))-4096;
484 }
485 mask= -(level_code&1);
486 level_code= (((2+level_code)>>1) ^ mask) - mask;
487 }
488 level[i]= level_code;
489
490 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
491 suffix_length++;
492 }
493 }
494
495 if(total_coeff == max_coeff)
496 zeros_left=0;
497 else{
498 if(n == CHROMA_DC_BLOCK_INDEX)
499 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
500 else
501 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
502 }
503
504 coeff_num = zeros_left + total_coeff - 1;
505 j = scantable[coeff_num];
506 if(n > 24){
507 block[j] = level[0];
508 for(i=1;i<total_coeff;i++) {
509 if(zeros_left <= 0)
510 run_before = 0;
511 else if(zeros_left < 7){
512 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
513 }else{
514 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
515 }
516 zeros_left -= run_before;
517 coeff_num -= 1 + run_before;
518 j= scantable[ coeff_num ];
519
520 block[j]= level[i];
521 }
522 }else{
523 block[j] = (level[0] * qmul[j] + 32)>>6;
524 for(i=1;i<total_coeff;i++) {
525 if(zeros_left <= 0)
526 run_before = 0;
527 else if(zeros_left < 7){
528 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
529 }else{
530 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
531 }
532 zeros_left -= run_before;
533 coeff_num -= 1 + run_before;
534 j= scantable[ coeff_num ];
535
536 block[j]= (level[i] * qmul[j] + 32)>>6;
537 }
538 }
539
540 if(zeros_left<0){
541 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
542 return -1;
543 }
544
545 return 0;
546 }
547
548 int ff_h264_decode_mb_cavlc(H264Context *h){
549 MpegEncContext * const s = &h->s;
550 int mb_xy;
551 int partition_count;
552 unsigned int mb_type, cbp;
553 int dct8x8_allowed= h->pps.transform_8x8_mode;
554
555 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
556
557 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
558 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
559 down the code */
560 if(h->slice_type_nos != FF_I_TYPE){
561 if(s->mb_skip_run==-1)
562 s->mb_skip_run= get_ue_golomb(&s->gb);
563
564 if (s->mb_skip_run--) {
565 if(FRAME_MBAFF && (s->mb_y&1) == 0){
566 if(s->mb_skip_run==0)
567 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
568 else
569 predict_field_decoding_flag(h);
570 }
571 decode_mb_skip(h);
572 return 0;
573 }
574 }
575 if(FRAME_MBAFF){
576 if( (s->mb_y&1) == 0 )
577 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
578 }
579
580 h->prev_mb_skipped= 0;
581
582 mb_type= get_ue_golomb(&s->gb);
583 if(h->slice_type_nos == FF_B_TYPE){
584 if(mb_type < 23){
585 partition_count= b_mb_type_info[mb_type].partition_count;
586 mb_type= b_mb_type_info[mb_type].type;
587 }else{
588 mb_type -= 23;
589 goto decode_intra_mb;
590 }
591 }else if(h->slice_type_nos == FF_P_TYPE){
592 if(mb_type < 5){
593 partition_count= p_mb_type_info[mb_type].partition_count;
594 mb_type= p_mb_type_info[mb_type].type;
595 }else{
596 mb_type -= 5;
597 goto decode_intra_mb;
598 }
599 }else{
600 assert(h->slice_type_nos == FF_I_TYPE);
601 if(h->slice_type == FF_SI_TYPE && mb_type)
602 mb_type--;
603 decode_intra_mb:
604 if(mb_type > 25){
605 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
606 return -1;
607 }
608 partition_count=0;
609 cbp= i_mb_type_info[mb_type].cbp;
610 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
611 mb_type= i_mb_type_info[mb_type].type;
612 }
613
614 if(MB_FIELD)
615 mb_type |= MB_TYPE_INTERLACED;
616
617 h->slice_table[ mb_xy ]= h->slice_num;
618
619 if(IS_INTRA_PCM(mb_type)){
620 unsigned int x;
621
622 // We assume these blocks are very rare so we do not optimize it.
623 align_get_bits(&s->gb);
624
625 // The pixels are stored in the same order as levels in h->mb array.
626 for(x=0; x < (CHROMA ? 384 : 256); x++){
627 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
628 }
629
630 // In deblocking, the quantizer is 0
631 s->current_picture.qscale_table[mb_xy]= 0;
632 // All coeffs are present
633 memset(h->non_zero_count[mb_xy], 16, 16);
634
635 s->current_picture.mb_type[mb_xy]= mb_type;
636 return 0;
637 }
638
639 if(MB_MBAFF){
640 h->ref_count[0] <<= 1;
641 h->ref_count[1] <<= 1;
642 }
643
644 fill_caches(h, mb_type, 0);
645
646 //mb_pred
647 if(IS_INTRA(mb_type)){
648 int pred_mode;
649 // init_top_left_availability(h);
650 if(IS_INTRA4x4(mb_type)){
651 int i;
652 int di = 1;
653 if(dct8x8_allowed && get_bits1(&s->gb)){
654 mb_type |= MB_TYPE_8x8DCT;
655 di = 4;
656 }
657
658 // fill_intra4x4_pred_table(h);
659 for(i=0; i<16; i+=di){
660 int mode= pred_intra_mode(h, i);
661
662 if(!get_bits1(&s->gb)){
663 const int rem_mode= get_bits(&s->gb, 3);
664 mode = rem_mode + (rem_mode >= mode);
665 }
666
667 if(di==4)
668 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
669 else
670 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
671 }
672 ff_h264_write_back_intra_pred_mode(h);
673 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
674 return -1;
675 }else{
676 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
677 if(h->intra16x16_pred_mode < 0)
678 return -1;
679 }
680 if(CHROMA){
681 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
682 if(pred_mode < 0)
683 return -1;
684 h->chroma_pred_mode= pred_mode;
685 }
686 }else if(partition_count==4){
687 int i, j, sub_partition_count[4], list, ref[2][4];
688
689 if(h->slice_type_nos == FF_B_TYPE){
690 for(i=0; i<4; i++){
691 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
692 if(h->sub_mb_type[i] >=13){
693 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
694 return -1;
695 }
696 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
697 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
698 }
699 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
700 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
701 ff_h264_pred_direct_motion(h, &mb_type);
702 h->ref_cache[0][scan8[4]] =
703 h->ref_cache[1][scan8[4]] =
704 h->ref_cache[0][scan8[12]] =
705 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
706 }
707 }else{
708 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
709 for(i=0; i<4; i++){
710 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
711 if(h->sub_mb_type[i] >=4){
712 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
713 return -1;
714 }
715 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
716 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
717 }
718 }
719
720 for(list=0; list<h->list_count; list++){
721 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
722 for(i=0; i<4; i++){
723 if(IS_DIRECT(h->sub_mb_type[i])) continue;
724 if(IS_DIR(h->sub_mb_type[i], 0, list)){
725 unsigned int tmp;
726 if(ref_count == 1){
727 tmp= 0;
728 }else if(ref_count == 2){
729 tmp= get_bits1(&s->gb)^1;
730 }else{
731 tmp= get_ue_golomb_31(&s->gb);
732 if(tmp>=ref_count){
733 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
734 return -1;
735 }
736 }
737 ref[list][i]= tmp;
738 }else{
739 //FIXME
740 ref[list][i] = -1;
741 }
742 }
743 }
744
745 if(dct8x8_allowed)
746 dct8x8_allowed = get_dct8x8_allowed(h);
747
748 for(list=0; list<h->list_count; list++){
749 for(i=0; i<4; i++){
750 if(IS_DIRECT(h->sub_mb_type[i])) {
751 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
752 continue;
753 }
754 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
755 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
756
757 if(IS_DIR(h->sub_mb_type[i], 0, list)){
758 const int sub_mb_type= h->sub_mb_type[i];
759 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
760 for(j=0; j<sub_partition_count[i]; j++){
761 int mx, my;
762 const int index= 4*i + block_width*j;
763 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
764 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
765 mx += get_se_golomb(&s->gb);
766 my += get_se_golomb(&s->gb);
767 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
768
769 if(IS_SUB_8X8(sub_mb_type)){
770 mv_cache[ 1 ][0]=
771 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
772 mv_cache[ 1 ][1]=
773 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
774 }else if(IS_SUB_8X4(sub_mb_type)){
775 mv_cache[ 1 ][0]= mx;
776 mv_cache[ 1 ][1]= my;
777 }else if(IS_SUB_4X8(sub_mb_type)){
778 mv_cache[ 8 ][0]= mx;
779 mv_cache[ 8 ][1]= my;
780 }
781 mv_cache[ 0 ][0]= mx;
782 mv_cache[ 0 ][1]= my;
783 }
784 }else{
785 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
786 p[0] = p[1]=
787 p[8] = p[9]= 0;
788 }
789 }
790 }
791 }else if(IS_DIRECT(mb_type)){
792 ff_h264_pred_direct_motion(h, &mb_type);
793 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
794 }else{
795 int list, mx, my, i;
796 //FIXME we should set ref_idx_l? to 0 if we use that later ...
797 if(IS_16X16(mb_type)){
798 for(list=0; list<h->list_count; list++){
799 unsigned int val;
800 if(IS_DIR(mb_type, 0, list)){
801 if(h->ref_count[list]==1){
802 val= 0;
803 }else if(h->ref_count[list]==2){
804 val= get_bits1(&s->gb)^1;
805 }else{
806 val= get_ue_golomb_31(&s->gb);
807 if(val >= h->ref_count[list]){
808 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
809 return -1;
810 }
811 }
812 }else
813 val= LIST_NOT_USED&0xFF;
814 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
815 }
816 for(list=0; list<h->list_count; list++){
817 unsigned int val;
818 if(IS_DIR(mb_type, 0, list)){
819 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
820 mx += get_se_golomb(&s->gb);
821 my += get_se_golomb(&s->gb);
822 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
823
824 val= pack16to32(mx,my);
825 }else
826 val=0;
827 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
828 }
829 }
830 else if(IS_16X8(mb_type)){
831 for(list=0; list<h->list_count; list++){
832 for(i=0; i<2; i++){
833 unsigned int val;
834 if(IS_DIR(mb_type, i, list)){
835 if(h->ref_count[list] == 1){
836 val= 0;
837 }else if(h->ref_count[list] == 2){
838 val= get_bits1(&s->gb)^1;
839 }else{
840 val= get_ue_golomb_31(&s->gb);
841 if(val >= h->ref_count[list]){
842 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
843 return -1;
844 }
845 }
846 }else
847 val= LIST_NOT_USED&0xFF;
848 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
849 }
850 }
851 for(list=0; list<h->list_count; list++){
852 for(i=0; i<2; i++){
853 unsigned int val;
854 if(IS_DIR(mb_type, i, list)){
855 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
856 mx += get_se_golomb(&s->gb);
857 my += get_se_golomb(&s->gb);
858 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
859
860 val= pack16to32(mx,my);
861 }else
862 val=0;
863 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
864 }
865 }
866 }else{
867 assert(IS_8X16(mb_type));
868 for(list=0; list<h->list_count; list++){
869 for(i=0; i<2; i++){
870 unsigned int val;
871 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
872 if(h->ref_count[list]==1){
873 val= 0;
874 }else if(h->ref_count[list]==2){
875 val= get_bits1(&s->gb)^1;
876 }else{
877 val= get_ue_golomb_31(&s->gb);
878 if(val >= h->ref_count[list]){
879 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
880 return -1;
881 }
882 }
883 }else
884 val= LIST_NOT_USED&0xFF;
885 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
886 }
887 }
888 for(list=0; list<h->list_count; list++){
889 for(i=0; i<2; i++){
890 unsigned int val;
891 if(IS_DIR(mb_type, i, list)){
892 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
893 mx += get_se_golomb(&s->gb);
894 my += get_se_golomb(&s->gb);
895 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
896
897 val= pack16to32(mx,my);
898 }else
899 val=0;
900 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
901 }
902 }
903 }
904 }
905
906 if(IS_INTER(mb_type))
907 write_back_motion(h, mb_type);
908
909 if(!IS_INTRA16x16(mb_type)){
910 cbp= get_ue_golomb(&s->gb);
911 if(cbp > 47){
912 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
913 return -1;
914 }
915
916 if(CHROMA){
917 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
918 else cbp= golomb_to_inter_cbp [cbp];
919 }else{
920 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
921 else cbp= golomb_to_inter_cbp_gray[cbp];
922 }
923 }
924 h->cbp = cbp;
925
926 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
927 if(get_bits1(&s->gb)){
928 mb_type |= MB_TYPE_8x8DCT;
929 h->cbp_table[mb_xy]= cbp;
930 }
931 }
932 s->current_picture.mb_type[mb_xy]= mb_type;
933
934 if(cbp || IS_INTRA16x16(mb_type)){
935 int i8x8, i4x4, chroma_idx;
936 int dquant;
937 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
938 const uint8_t *scan, *scan8x8, *dc_scan;
939
940 // fill_non_zero_count_cache(h);
941
942 if(IS_INTERLACED(mb_type)){
943 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
944 scan= s->qscale ? h->field_scan : h->field_scan_q0;
945 dc_scan= luma_dc_field_scan;
946 }else{
947 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
948 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
949 dc_scan= luma_dc_zigzag_scan;
950 }
951
952 dquant= get_se_golomb(&s->gb);
953
954 if( dquant > 25 || dquant < -26 ){
955 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
956 return -1;
957 }
958
959 s->qscale += dquant;
960 if(((unsigned)s->qscale) > 51){
961 if(s->qscale<0) s->qscale+= 52;
962 else s->qscale-= 52;
963 }
964
965 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
966 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
967 if(IS_INTRA16x16(mb_type)){
968 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
969 return -1; //FIXME continue if partitioned and other return -1 too
970 }
971
972 assert((cbp&15) == 0 || (cbp&15) == 15);
973
974 if(cbp&15){
975 for(i8x8=0; i8x8<4; i8x8++){
976 for(i4x4=0; i4x4<4; i4x4++){
977 const int index= i4x4 + 4*i8x8;
978 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
979 return -1;
980 }
981 }
982 }
983 }else{
984 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
985 }
986 }else{
987 for(i8x8=0; i8x8<4; i8x8++){
988 if(cbp & (1<<i8x8)){
989 if(IS_8x8DCT(mb_type)){
990 DCTELEM *buf = &h->mb[64*i8x8];
991 uint8_t *nnz;
992 for(i4x4=0; i4x4<4; i4x4++){
993 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
994 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
995 return -1;
996 }
997 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
998 nnz[0] += nnz[1] + nnz[8] + nnz[9];
999 }else{
1000 for(i4x4=0; i4x4<4; i4x4++){
1001 const int index= i4x4 + 4*i8x8;
1002
1003 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
1004 return -1;
1005 }
1006 }
1007 }
1008 }else{
1009 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
1010 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
1011 }
1012 }
1013 }
1014
1015 if(cbp&0x30){
1016 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1017 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
1018 return -1;
1019 }
1020 }
1021
1022 if(cbp&0x20){
1023 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1024 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1025 for(i4x4=0; i4x4<4; i4x4++){
1026 const int index= 16 + 4*chroma_idx + i4x4;
1027 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1028 return -1;
1029 }
1030 }
1031 }
1032 }else{
1033 uint8_t * const nnz= &h->non_zero_count_cache[0];
1034 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1035 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1036 }
1037 }else{
1038 uint8_t * const nnz= &h->non_zero_count_cache[0];
1039 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1040 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1041 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1042 }
1043 s->current_picture.qscale_table[mb_xy]= s->qscale;
1044 write_back_non_zero_count(h);
1045
1046 if(MB_MBAFF){
1047 h->ref_count[0] >>= 1;
1048 h->ref_count[1] >>= 1;
1049 }
1050
1051 return 0;
1052 }
1053