comparison h264.c @ 10866:d26e9b4d2ca1 libavcodec

Split cavlc out of h264.c. Seems to speed the code up a little... The placement of many generic functions between h264.c and h264.h is still open Currently they are a little randomly placed between them.
author michael
date Wed, 13 Jan 2010 01:59:19 +0000
parents e3f5eb016712
children 7101061bfa0f
comparison
equal deleted inserted replaced
10865:bcdc5343a577 10866:d26e9b4d2ca1
44 #endif 44 #endif
45 45
46 //#undef NDEBUG 46 //#undef NDEBUG
47 #include <assert.h> 47 #include <assert.h>
48 48
49 static VLC coeff_token_vlc[4];
50 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
51 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
52
53 static VLC chroma_dc_coeff_token_vlc;
54 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
55 static const int chroma_dc_coeff_token_vlc_table_size = 256;
56
57 static VLC total_zeros_vlc[15];
58 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
59 static const int total_zeros_vlc_tables_size = 512;
60
61 static VLC chroma_dc_total_zeros_vlc[3];
62 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
63 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
64
65 static VLC run_vlc[6];
66 static VLC_TYPE run_vlc_tables[6][8][2];
67 static const int run_vlc_tables_size = 8;
68
69 static VLC run7_vlc;
70 static VLC_TYPE run7_vlc_table[96][2];
71 static const int run7_vlc_table_size = 96;
72
73 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); 49 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
74 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); 50 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
75 51
76 static const uint8_t rem6[52]={ 52 static const uint8_t rem6[52]={
77 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 53 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
78 }; 54 };
79 55
80 static const uint8_t div6[52]={ 56 static const uint8_t div6[52]={
81 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 57 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 }; 58 };
83
84 static const uint8_t left_block_options[4][8]={
85 {0,1,2,3,7,10,8,11},
86 {2,2,3,3,8,11,8,11},
87 {0,0,1,1,7,10,7,10},
88 {0,2,0,2,7,10,7,10}
89 };
90
91 #define LEVEL_TAB_BITS 8
92 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
93
94 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
95 MpegEncContext * const s = &h->s;
96 const int mb_xy= h->mb_xy;
97 int topleft_xy, top_xy, topright_xy, left_xy[2];
98 int topleft_type, top_type, topright_type, left_type[2];
99 const uint8_t * left_block;
100 int topleft_partition= -1;
101 int i;
102
103 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
104
105 //FIXME deblocking could skip the intra and nnz parts.
106 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
107 return;
108
109 /* Wow, what a mess, why didn't they simplify the interlacing & intra
110 * stuff, I can't imagine that these complex rules are worth it. */
111
112 topleft_xy = top_xy - 1;
113 topright_xy= top_xy + 1;
114 left_xy[1] = left_xy[0] = mb_xy-1;
115 left_block = left_block_options[0];
116 if(FRAME_MBAFF){
117 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
118 const int top_pair_xy = pair_xy - s->mb_stride;
119 const int topleft_pair_xy = top_pair_xy - 1;
120 const int topright_pair_xy = top_pair_xy + 1;
121 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
122 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
123 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
124 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
125 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
126 const int bottom = (s->mb_y & 1);
127 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
128
129 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
130 top_xy -= s->mb_stride;
131 }
132 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
135 topleft_xy += s->mb_stride;
136 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
137 topleft_partition = 0;
138 }
139 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
140 topright_xy -= s->mb_stride;
141 }
142 if (left_mb_field_flag != curr_mb_field_flag) {
143 left_xy[1] = left_xy[0] = pair_xy - 1;
144 if (curr_mb_field_flag) {
145 left_xy[1] += s->mb_stride;
146 left_block = left_block_options[3];
147 } else {
148 left_block= left_block_options[2 - bottom];
149 }
150 }
151 }
152
153 h->top_mb_xy = top_xy;
154 h->left_mb_xy[0] = left_xy[0];
155 h->left_mb_xy[1] = left_xy[1];
156 if(for_deblock){
157 topleft_type = 0;
158 topright_type = 0;
159 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
160 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
161 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
162
163 if(MB_MBAFF && !IS_INTRA(mb_type)){
164 int list;
165 for(list=0; list<h->list_count; list++){
166 //These values where changed for ease of performing MC, we need to change them back
167 //FIXME maybe we can make MC and loop filter use the same values or prevent
168 //the MC code from changing ref_cache and rather use a temporary array.
169 if(USES_LIST(mb_type,list)){
170 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
171 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
172 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
173 ref += h->b8_stride;
174 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
175 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
176 }
177 }
178 }
179 }else{
180 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
181 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
182 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
183 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
184 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
185
186 if(IS_INTRA(mb_type)){
187 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
188 h->topleft_samples_available=
189 h->top_samples_available=
190 h->left_samples_available= 0xFFFF;
191 h->topright_samples_available= 0xEEEA;
192
193 if(!(top_type & type_mask)){
194 h->topleft_samples_available= 0xB3FF;
195 h->top_samples_available= 0x33FF;
196 h->topright_samples_available= 0x26EA;
197 }
198 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
199 if(IS_INTERLACED(mb_type)){
200 if(!(left_type[0] & type_mask)){
201 h->topleft_samples_available&= 0xDFFF;
202 h->left_samples_available&= 0x5FFF;
203 }
204 if(!(left_type[1] & type_mask)){
205 h->topleft_samples_available&= 0xFF5F;
206 h->left_samples_available&= 0xFF5F;
207 }
208 }else{
209 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
210 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
211 assert(left_xy[0] == left_xy[1]);
212 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
213 h->topleft_samples_available&= 0xDF5F;
214 h->left_samples_available&= 0x5F5F;
215 }
216 }
217 }else{
218 if(!(left_type[0] & type_mask)){
219 h->topleft_samples_available&= 0xDF5F;
220 h->left_samples_available&= 0x5F5F;
221 }
222 }
223
224 if(!(topleft_type & type_mask))
225 h->topleft_samples_available&= 0x7FFF;
226
227 if(!(topright_type & type_mask))
228 h->topright_samples_available&= 0xFBFF;
229
230 if(IS_INTRA4x4(mb_type)){
231 if(IS_INTRA4x4(top_type)){
232 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
233 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
234 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
235 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
236 }else{
237 int pred;
238 if(!(top_type & type_mask))
239 pred= -1;
240 else{
241 pred= 2;
242 }
243 h->intra4x4_pred_mode_cache[4+8*0]=
244 h->intra4x4_pred_mode_cache[5+8*0]=
245 h->intra4x4_pred_mode_cache[6+8*0]=
246 h->intra4x4_pred_mode_cache[7+8*0]= pred;
247 }
248 for(i=0; i<2; i++){
249 if(IS_INTRA4x4(left_type[i])){
250 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
251 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
252 }else{
253 int pred;
254 if(!(left_type[i] & type_mask))
255 pred= -1;
256 else{
257 pred= 2;
258 }
259 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
260 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
261 }
262 }
263 }
264 }
265 }
266
267
268 /*
269 0 . T T. T T T T
270 1 L . .L . . . .
271 2 L . .L . . . .
272 3 . T TL . . . .
273 4 L . .L . . . .
274 5 L . .. . . . .
275 */
276 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
277 if(top_type){
278 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
279 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
280 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
281 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
282
283 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
284 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
285
286 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
287 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
288
289 }else{
290 h->non_zero_count_cache[4+8*0]=
291 h->non_zero_count_cache[5+8*0]=
292 h->non_zero_count_cache[6+8*0]=
293 h->non_zero_count_cache[7+8*0]=
294
295 h->non_zero_count_cache[1+8*0]=
296 h->non_zero_count_cache[2+8*0]=
297
298 h->non_zero_count_cache[1+8*3]=
299 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
300
301 }
302
303 for (i=0; i<2; i++) {
304 if(left_type[i]){
305 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
306 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
307 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
308 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
309 }else{
310 h->non_zero_count_cache[3+8*1 + 2*8*i]=
311 h->non_zero_count_cache[3+8*2 + 2*8*i]=
312 h->non_zero_count_cache[0+8*1 + 8*i]=
313 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
314 }
315 }
316
317 if( h->pps.cabac ) {
318 // top_cbp
319 if(top_type) {
320 h->top_cbp = h->cbp_table[top_xy];
321 } else if(IS_INTRA(mb_type)) {
322 h->top_cbp = 0x1C0;
323 } else {
324 h->top_cbp = 0;
325 }
326 // left_cbp
327 if (left_type[0]) {
328 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
329 } else if(IS_INTRA(mb_type)) {
330 h->left_cbp = 0x1C0;
331 } else {
332 h->left_cbp = 0;
333 }
334 if (left_type[0]) {
335 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
336 }
337 if (left_type[1]) {
338 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
339 }
340 }
341
342 #if 1
343 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
344 int list;
345 for(list=0; list<h->list_count; list++){
346 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
347 /*if(!h->mv_cache_clean[list]){
348 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
349 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
350 h->mv_cache_clean[list]= 1;
351 }*/
352 continue;
353 }
354 h->mv_cache_clean[list]= 0;
355
356 if(USES_LIST(top_type, list)){
357 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
358 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
359 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
360 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
361 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
362 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
363 h->ref_cache[list][scan8[0] + 0 - 1*8]=
364 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
365 h->ref_cache[list][scan8[0] + 2 - 1*8]=
366 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
367 }else{
368 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
369 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
370 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
371 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
372 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
373 }
374
375 for(i=0; i<2; i++){
376 int cache_idx = scan8[0] - 1 + i*2*8;
377 if(USES_LIST(left_type[i], list)){
378 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
379 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
380 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
381 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
382 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
383 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
384 }else{
385 *(uint32_t*)h->mv_cache [list][cache_idx ]=
386 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
387 h->ref_cache[list][cache_idx ]=
388 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
389 }
390 }
391
392 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
393 continue;
394
395 if(USES_LIST(topleft_type, list)){
396 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
397 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
398 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
399 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
400 }else{
401 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
402 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
403 }
404
405 if(USES_LIST(topright_type, list)){
406 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
407 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
408 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
409 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
410 }else{
411 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
412 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
413 }
414
415 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
416 continue;
417
418 h->ref_cache[list][scan8[5 ]+1] =
419 h->ref_cache[list][scan8[7 ]+1] =
420 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
421 h->ref_cache[list][scan8[4 ]] =
422 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
423 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
424 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
425 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
426 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
427 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
428
429 if( h->pps.cabac ) {
430 /* XXX beurk, Load mvd */
431 if(USES_LIST(top_type, list)){
432 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
433 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
434 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
435 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
436 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
437 }else{
438 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
439 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
440 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
441 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
442 }
443 if(USES_LIST(left_type[0], list)){
444 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
445 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
446 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
447 }else{
448 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
449 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
450 }
451 if(USES_LIST(left_type[1], list)){
452 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
453 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
455 }else{
456 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
458 }
459 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
460 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
461 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
462 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
463 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
464
465 if(h->slice_type_nos == FF_B_TYPE){
466 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
467
468 if(IS_DIRECT(top_type)){
469 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
470 }else if(IS_8X8(top_type)){
471 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
472 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
473 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
474 }else{
475 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
476 }
477
478 if(IS_DIRECT(left_type[0]))
479 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
480 else if(IS_8X8(left_type[0]))
481 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
482 else
483 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
484
485 if(IS_DIRECT(left_type[1]))
486 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
487 else if(IS_8X8(left_type[1]))
488 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
489 else
490 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
491 }
492 }
493
494 if(FRAME_MBAFF){
495 #define MAP_MVS\
496 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
497 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
498 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
499 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
500 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
501 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
502 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
503 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
504 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
505 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
506 if(MB_FIELD){
507 #define MAP_F2F(idx, mb_type)\
508 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
509 h->ref_cache[list][idx] <<= 1;\
510 h->mv_cache[list][idx][1] /= 2;\
511 h->mvd_cache[list][idx][1] /= 2;\
512 }
513 MAP_MVS
514 #undef MAP_F2F
515 }else{
516 #define MAP_F2F(idx, mb_type)\
517 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
518 h->ref_cache[list][idx] >>= 1;\
519 h->mv_cache[list][idx][1] <<= 1;\
520 h->mvd_cache[list][idx][1] <<= 1;\
521 }
522 MAP_MVS
523 #undef MAP_F2F
524 }
525 }
526 }
527 }
528 #endif
529
530 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
531 }
532 59
533 void ff_h264_write_back_intra_pred_mode(H264Context *h){ 60 void ff_h264_write_back_intra_pred_mode(H264Context *h){
534 const int mb_xy= h->mb_xy; 61 const int mb_xy= h->mb_xy;
535 62
536 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1]; 63 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
612 return -1; 139 return -1;
613 } 140 }
614 } 141 }
615 142
616 return mode; 143 return mode;
617 }
618
619 /**
620 * gets the predicted intra4x4 prediction mode.
621 */
622 static inline int pred_intra_mode(H264Context *h, int n){
623 const int index8= scan8[n];
624 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
625 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
626 const int min= FFMIN(left, top);
627
628 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
629
630 if(min<0) return DC_PRED;
631 else return min;
632 }
633
634 static inline void write_back_non_zero_count(H264Context *h){
635 const int mb_xy= h->mb_xy;
636
637 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
638 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
639 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
640 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
641 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
642 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
643 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
644
645 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
646 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
647 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
648
649 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
650 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
651 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
652 }
653
654 /**
655 * gets the predicted number of non-zero coefficients.
656 * @param n block index
657 */
658 static inline int pred_non_zero_count(H264Context *h, int n){
659 const int index8= scan8[n];
660 const int left= h->non_zero_count_cache[index8 - 1];
661 const int top = h->non_zero_count_cache[index8 - 8];
662 int i= left + top;
663
664 if(i<64) i= (i+1)>>1;
665
666 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
667
668 return i&31;
669 }
670
671 static inline void write_back_motion(H264Context *h, int mb_type){
672 MpegEncContext * const s = &h->s;
673 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
674 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
675 int list;
676
677 if(!USES_LIST(mb_type, 0))
678 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
679
680 for(list=0; list<h->list_count; list++){
681 int y;
682 if(!USES_LIST(mb_type, list))
683 continue;
684
685 for(y=0; y<4; y++){
686 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
687 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
688 }
689 if( h->pps.cabac ) {
690 if(IS_SKIP(mb_type))
691 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
692 else
693 for(y=0; y<4; y++){
694 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
695 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
696 }
697 }
698
699 {
700 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
701 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
702 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
703 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
704 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
705 }
706 }
707
708 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
709 if(IS_8X8(mb_type)){
710 uint8_t *direct_table = &h->direct_table[b8_xy];
711 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
712 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
713 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
714 }
715 }
716 } 144 }
717 145
718 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){ 146 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
719 int i, si, di; 147 int i, si, di;
720 uint8_t *dst; 148 uint8_t *dst;
1223 } 651 }
1224 652
1225 prefetch_motion(h, 1); 653 prefetch_motion(h, 1);
1226 } 654 }
1227 655
1228 static av_cold void init_cavlc_level_tab(void){
1229 int suffix_length, mask;
1230 unsigned int i;
1231
1232 for(suffix_length=0; suffix_length<7; suffix_length++){
1233 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1234 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1235 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1236
1237 mask= -(level_code&1);
1238 level_code= (((2+level_code)>>1) ^ mask) - mask;
1239 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1240 cavlc_level_tab[suffix_length][i][0]= level_code;
1241 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1242 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1243 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1244 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1245 }else{
1246 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1247 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1248 }
1249 }
1250 }
1251 }
1252
1253 static av_cold void decode_init_vlc(void){
1254 static int done = 0;
1255
1256 if (!done) {
1257 int i;
1258 int offset;
1259 done = 1;
1260
1261 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1262 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1263 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1264 &chroma_dc_coeff_token_len [0], 1, 1,
1265 &chroma_dc_coeff_token_bits[0], 1, 1,
1266 INIT_VLC_USE_NEW_STATIC);
1267
1268 offset = 0;
1269 for(i=0; i<4; i++){
1270 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1271 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1272 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1273 &coeff_token_len [i][0], 1, 1,
1274 &coeff_token_bits[i][0], 1, 1,
1275 INIT_VLC_USE_NEW_STATIC);
1276 offset += coeff_token_vlc_tables_size[i];
1277 }
1278 /*
1279 * This is a one time safety check to make sure that
1280 * the packed static coeff_token_vlc table sizes
1281 * were initialized correctly.
1282 */
1283 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1284
1285 for(i=0; i<3; i++){
1286 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1287 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1288 init_vlc(&chroma_dc_total_zeros_vlc[i],
1289 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1290 &chroma_dc_total_zeros_len [i][0], 1, 1,
1291 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1292 INIT_VLC_USE_NEW_STATIC);
1293 }
1294 for(i=0; i<15; i++){
1295 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1296 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1297 init_vlc(&total_zeros_vlc[i],
1298 TOTAL_ZEROS_VLC_BITS, 16,
1299 &total_zeros_len [i][0], 1, 1,
1300 &total_zeros_bits[i][0], 1, 1,
1301 INIT_VLC_USE_NEW_STATIC);
1302 }
1303
1304 for(i=0; i<6; i++){
1305 run_vlc[i].table = run_vlc_tables[i];
1306 run_vlc[i].table_allocated = run_vlc_tables_size;
1307 init_vlc(&run_vlc[i],
1308 RUN_VLC_BITS, 7,
1309 &run_len [i][0], 1, 1,
1310 &run_bits[i][0], 1, 1,
1311 INIT_VLC_USE_NEW_STATIC);
1312 }
1313 run7_vlc.table = run7_vlc_table,
1314 run7_vlc.table_allocated = run7_vlc_table_size;
1315 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1316 &run_len [6][0], 1, 1,
1317 &run_bits[6][0], 1, 1,
1318 INIT_VLC_USE_NEW_STATIC);
1319
1320 init_cavlc_level_tab();
1321 }
1322 }
1323 656
1324 static void free_tables(H264Context *h){ 657 static void free_tables(H264Context *h){
1325 int i; 658 int i;
1326 H264Context *hx; 659 H264Context *hx;
1327 av_freep(&h->intra4x4_pred_mode); 660 av_freep(&h->intra4x4_pred_mode);
1528 if(!avctx->has_b_frames) 861 if(!avctx->has_b_frames)
1529 s->low_delay= 1; 862 s->low_delay= 1;
1530 863
1531 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; 864 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
1532 865
1533 decode_init_vlc(); 866 ff_h264_decode_init_vlc();
1534 867
1535 if(avctx->extradata_size > 0 && avctx->extradata && 868 if(avctx->extradata_size > 0 && avctx->extradata &&
1536 *(char *)avctx->extradata == 1){ 869 *(char *)avctx->extradata == 1){
1537 h->is_avc = 1; 870 h->is_avc = 1;
1538 h->got_avcC = 0; 871 h->got_avcC = 0;
2838 case FF_SI_TYPE: return 4; 2171 case FF_SI_TYPE: return 4;
2839 default: return -1; 2172 default: return -1;
2840 } 2173 }
2841 } 2174 }
2842 2175
2843 /**
2844 *
2845 */
2846 static inline int get_level_prefix(GetBitContext *gb){
2847 unsigned int buf;
2848 int log;
2849
2850 OPEN_READER(re, gb);
2851 UPDATE_CACHE(re, gb);
2852 buf=GET_CACHE(re, gb);
2853
2854 log= 32 - av_log2(buf);
2855 #ifdef TRACE
2856 print_bin(buf>>(32-log), log);
2857 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
2858 #endif
2859
2860 LAST_SKIP_BITS(re, gb, log);
2861 CLOSE_READER(re, gb);
2862
2863 return log-1;
2864 }
2865
2866 static inline int get_dct8x8_allowed(H264Context *h){
2867 if(h->sps.direct_8x8_inference_flag)
2868 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
2869 else
2870 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
2871 }
2872
2873 /**
2874 * decodes a residual block.
2875 * @param n block index
2876 * @param scantable scantable
2877 * @param max_coeff number of coefficients in the block
2878 * @return <0 if an error occurred
2879 */
2880 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
2881 MpegEncContext * const s = &h->s;
2882 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
2883 int level[16];
2884 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
2885
2886 //FIXME put trailing_onex into the context
2887
2888 if(n == CHROMA_DC_BLOCK_INDEX){
2889 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
2890 total_coeff= coeff_token>>2;
2891 }else{
2892 if(n == LUMA_DC_BLOCK_INDEX){
2893 total_coeff= pred_non_zero_count(h, 0);
2894 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
2895 total_coeff= coeff_token>>2;
2896 }else{
2897 total_coeff= pred_non_zero_count(h, n);
2898 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
2899 total_coeff= coeff_token>>2;
2900 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
2901 }
2902 }
2903
2904 //FIXME set last_non_zero?
2905
2906 if(total_coeff==0)
2907 return 0;
2908 if(total_coeff > (unsigned)max_coeff) {
2909 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
2910 return -1;
2911 }
2912
2913 trailing_ones= coeff_token&3;
2914 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
2915 assert(total_coeff<=16);
2916
2917 i = show_bits(gb, 3);
2918 skip_bits(gb, trailing_ones);
2919 level[0] = 1-((i&4)>>1);
2920 level[1] = 1-((i&2) );
2921 level[2] = 1-((i&1)<<1);
2922
2923 if(trailing_ones<total_coeff) {
2924 int mask, prefix;
2925 int suffix_length = total_coeff > 10 && trailing_ones < 3;
2926 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
2927 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
2928
2929 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
2930 if(level_code >= 100){
2931 prefix= level_code - 100;
2932 if(prefix == LEVEL_TAB_BITS)
2933 prefix += get_level_prefix(gb);
2934
2935 //first coefficient has suffix_length equal to 0 or 1
2936 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
2937 if(suffix_length)
2938 level_code= (prefix<<1) + get_bits1(gb); //part
2939 else
2940 level_code= prefix; //part
2941 }else if(prefix==14){
2942 if(suffix_length)
2943 level_code= (prefix<<1) + get_bits1(gb); //part
2944 else
2945 level_code= prefix + get_bits(gb, 4); //part
2946 }else{
2947 level_code= 30 + get_bits(gb, prefix-3); //part
2948 if(prefix>=16)
2949 level_code += (1<<(prefix-3))-4096;
2950 }
2951
2952 if(trailing_ones < 3) level_code += 2;
2953
2954 suffix_length = 2;
2955 mask= -(level_code&1);
2956 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
2957 }else{
2958 if(trailing_ones < 3) level_code += (level_code>>31)|1;
2959
2960 suffix_length = 1;
2961 if(level_code + 3U > 6U)
2962 suffix_length++;
2963 level[trailing_ones]= level_code;
2964 }
2965
2966 //remaining coefficients have suffix_length > 0
2967 for(i=trailing_ones+1;i<total_coeff;i++) {
2968 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
2969 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
2970 level_code= cavlc_level_tab[suffix_length][bitsi][0];
2971
2972 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
2973 if(level_code >= 100){
2974 prefix= level_code - 100;
2975 if(prefix == LEVEL_TAB_BITS){
2976 prefix += get_level_prefix(gb);
2977 }
2978 if(prefix<15){
2979 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
2980 }else{
2981 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
2982 if(prefix>=16)
2983 level_code += (1<<(prefix-3))-4096;
2984 }
2985 mask= -(level_code&1);
2986 level_code= (((2+level_code)>>1) ^ mask) - mask;
2987 }
2988 level[i]= level_code;
2989
2990 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
2991 suffix_length++;
2992 }
2993 }
2994
2995 if(total_coeff == max_coeff)
2996 zeros_left=0;
2997 else{
2998 if(n == CHROMA_DC_BLOCK_INDEX)
2999 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
3000 else
3001 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
3002 }
3003
3004 coeff_num = zeros_left + total_coeff - 1;
3005 j = scantable[coeff_num];
3006 if(n > 24){
3007 block[j] = level[0];
3008 for(i=1;i<total_coeff;i++) {
3009 if(zeros_left <= 0)
3010 run_before = 0;
3011 else if(zeros_left < 7){
3012 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
3013 }else{
3014 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
3015 }
3016 zeros_left -= run_before;
3017 coeff_num -= 1 + run_before;
3018 j= scantable[ coeff_num ];
3019
3020 block[j]= level[i];
3021 }
3022 }else{
3023 block[j] = (level[0] * qmul[j] + 32)>>6;
3024 for(i=1;i<total_coeff;i++) {
3025 if(zeros_left <= 0)
3026 run_before = 0;
3027 else if(zeros_left < 7){
3028 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
3029 }else{
3030 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
3031 }
3032 zeros_left -= run_before;
3033 coeff_num -= 1 + run_before;
3034 j= scantable[ coeff_num ];
3035
3036 block[j]= (level[i] * qmul[j] + 32)>>6;
3037 }
3038 }
3039
3040 if(zeros_left<0){
3041 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
3042 return -1;
3043 }
3044
3045 return 0;
3046 }
3047
3048 static void predict_field_decoding_flag(H264Context *h){
3049 MpegEncContext * const s = &h->s;
3050 const int mb_xy= h->mb_xy;
3051 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
3052 ? s->current_picture.mb_type[mb_xy-1]
3053 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
3054 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
3055 : 0;
3056 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
3057 }
3058
3059 /**
3060 * decodes a P_SKIP or B_SKIP macroblock
3061 */
3062 static void decode_mb_skip(H264Context *h){
3063 MpegEncContext * const s = &h->s;
3064 const int mb_xy= h->mb_xy;
3065 int mb_type=0;
3066
3067 memset(h->non_zero_count[mb_xy], 0, 16);
3068 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
3069
3070 if(MB_FIELD)
3071 mb_type|= MB_TYPE_INTERLACED;
3072
3073 if( h->slice_type_nos == FF_B_TYPE )
3074 {
3075 // just for fill_caches. pred_direct_motion will set the real mb_type
3076 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
3077
3078 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
3079 ff_h264_pred_direct_motion(h, &mb_type);
3080 mb_type|= MB_TYPE_SKIP;
3081 }
3082 else
3083 {
3084 int mx, my;
3085 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
3086
3087 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
3088 pred_pskip_motion(h, &mx, &my);
3089 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
3090 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
3091 }
3092
3093 write_back_motion(h, mb_type);
3094 s->current_picture.mb_type[mb_xy]= mb_type;
3095 s->current_picture.qscale_table[mb_xy]= s->qscale;
3096 h->slice_table[ mb_xy ]= h->slice_num;
3097 h->prev_mb_skipped= 1;
3098 }
3099
3100 /**
3101 * decodes a macroblock
3102 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
3103 */
3104 static int decode_mb_cavlc(H264Context *h){
3105 MpegEncContext * const s = &h->s;
3106 int mb_xy;
3107 int partition_count;
3108 unsigned int mb_type, cbp;
3109 int dct8x8_allowed= h->pps.transform_8x8_mode;
3110
3111 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
3112
3113 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
3114 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
3115 down the code */
3116 if(h->slice_type_nos != FF_I_TYPE){
3117 if(s->mb_skip_run==-1)
3118 s->mb_skip_run= get_ue_golomb(&s->gb);
3119
3120 if (s->mb_skip_run--) {
3121 if(FRAME_MBAFF && (s->mb_y&1) == 0){
3122 if(s->mb_skip_run==0)
3123 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
3124 else
3125 predict_field_decoding_flag(h);
3126 }
3127 decode_mb_skip(h);
3128 return 0;
3129 }
3130 }
3131 if(FRAME_MBAFF){
3132 if( (s->mb_y&1) == 0 )
3133 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
3134 }
3135
3136 h->prev_mb_skipped= 0;
3137
3138 mb_type= get_ue_golomb(&s->gb);
3139 if(h->slice_type_nos == FF_B_TYPE){
3140 if(mb_type < 23){
3141 partition_count= b_mb_type_info[mb_type].partition_count;
3142 mb_type= b_mb_type_info[mb_type].type;
3143 }else{
3144 mb_type -= 23;
3145 goto decode_intra_mb;
3146 }
3147 }else if(h->slice_type_nos == FF_P_TYPE){
3148 if(mb_type < 5){
3149 partition_count= p_mb_type_info[mb_type].partition_count;
3150 mb_type= p_mb_type_info[mb_type].type;
3151 }else{
3152 mb_type -= 5;
3153 goto decode_intra_mb;
3154 }
3155 }else{
3156 assert(h->slice_type_nos == FF_I_TYPE);
3157 if(h->slice_type == FF_SI_TYPE && mb_type)
3158 mb_type--;
3159 decode_intra_mb:
3160 if(mb_type > 25){
3161 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
3162 return -1;
3163 }
3164 partition_count=0;
3165 cbp= i_mb_type_info[mb_type].cbp;
3166 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
3167 mb_type= i_mb_type_info[mb_type].type;
3168 }
3169
3170 if(MB_FIELD)
3171 mb_type |= MB_TYPE_INTERLACED;
3172
3173 h->slice_table[ mb_xy ]= h->slice_num;
3174
3175 if(IS_INTRA_PCM(mb_type)){
3176 unsigned int x;
3177
3178 // We assume these blocks are very rare so we do not optimize it.
3179 align_get_bits(&s->gb);
3180
3181 // The pixels are stored in the same order as levels in h->mb array.
3182 for(x=0; x < (CHROMA ? 384 : 256); x++){
3183 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
3184 }
3185
3186 // In deblocking, the quantizer is 0
3187 s->current_picture.qscale_table[mb_xy]= 0;
3188 // All coeffs are present
3189 memset(h->non_zero_count[mb_xy], 16, 16);
3190
3191 s->current_picture.mb_type[mb_xy]= mb_type;
3192 return 0;
3193 }
3194
3195 if(MB_MBAFF){
3196 h->ref_count[0] <<= 1;
3197 h->ref_count[1] <<= 1;
3198 }
3199
3200 fill_caches(h, mb_type, 0);
3201
3202 //mb_pred
3203 if(IS_INTRA(mb_type)){
3204 int pred_mode;
3205 // init_top_left_availability(h);
3206 if(IS_INTRA4x4(mb_type)){
3207 int i;
3208 int di = 1;
3209 if(dct8x8_allowed && get_bits1(&s->gb)){
3210 mb_type |= MB_TYPE_8x8DCT;
3211 di = 4;
3212 }
3213
3214 // fill_intra4x4_pred_table(h);
3215 for(i=0; i<16; i+=di){
3216 int mode= pred_intra_mode(h, i);
3217
3218 if(!get_bits1(&s->gb)){
3219 const int rem_mode= get_bits(&s->gb, 3);
3220 mode = rem_mode + (rem_mode >= mode);
3221 }
3222
3223 if(di==4)
3224 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
3225 else
3226 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
3227 }
3228 ff_h264_write_back_intra_pred_mode(h);
3229 if( ff_h264_check_intra4x4_pred_mode(h) < 0)
3230 return -1;
3231 }else{
3232 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
3233 if(h->intra16x16_pred_mode < 0)
3234 return -1;
3235 }
3236 if(CHROMA){
3237 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
3238 if(pred_mode < 0)
3239 return -1;
3240 h->chroma_pred_mode= pred_mode;
3241 }
3242 }else if(partition_count==4){
3243 int i, j, sub_partition_count[4], list, ref[2][4];
3244
3245 if(h->slice_type_nos == FF_B_TYPE){
3246 for(i=0; i<4; i++){
3247 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
3248 if(h->sub_mb_type[i] >=13){
3249 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
3250 return -1;
3251 }
3252 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
3253 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
3254 }
3255 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
3256 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
3257 ff_h264_pred_direct_motion(h, &mb_type);
3258 h->ref_cache[0][scan8[4]] =
3259 h->ref_cache[1][scan8[4]] =
3260 h->ref_cache[0][scan8[12]] =
3261 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
3262 }
3263 }else{
3264 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
3265 for(i=0; i<4; i++){
3266 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
3267 if(h->sub_mb_type[i] >=4){
3268 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
3269 return -1;
3270 }
3271 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
3272 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
3273 }
3274 }
3275
3276 for(list=0; list<h->list_count; list++){
3277 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
3278 for(i=0; i<4; i++){
3279 if(IS_DIRECT(h->sub_mb_type[i])) continue;
3280 if(IS_DIR(h->sub_mb_type[i], 0, list)){
3281 unsigned int tmp;
3282 if(ref_count == 1){
3283 tmp= 0;
3284 }else if(ref_count == 2){
3285 tmp= get_bits1(&s->gb)^1;
3286 }else{
3287 tmp= get_ue_golomb_31(&s->gb);
3288 if(tmp>=ref_count){
3289 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
3290 return -1;
3291 }
3292 }
3293 ref[list][i]= tmp;
3294 }else{
3295 //FIXME
3296 ref[list][i] = -1;
3297 }
3298 }
3299 }
3300
3301 if(dct8x8_allowed)
3302 dct8x8_allowed = get_dct8x8_allowed(h);
3303
3304 for(list=0; list<h->list_count; list++){
3305 for(i=0; i<4; i++){
3306 if(IS_DIRECT(h->sub_mb_type[i])) {
3307 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
3308 continue;
3309 }
3310 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
3311 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
3312
3313 if(IS_DIR(h->sub_mb_type[i], 0, list)){
3314 const int sub_mb_type= h->sub_mb_type[i];
3315 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
3316 for(j=0; j<sub_partition_count[i]; j++){
3317 int mx, my;
3318 const int index= 4*i + block_width*j;
3319 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
3320 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
3321 mx += get_se_golomb(&s->gb);
3322 my += get_se_golomb(&s->gb);
3323 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
3324
3325 if(IS_SUB_8X8(sub_mb_type)){
3326 mv_cache[ 1 ][0]=
3327 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
3328 mv_cache[ 1 ][1]=
3329 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
3330 }else if(IS_SUB_8X4(sub_mb_type)){
3331 mv_cache[ 1 ][0]= mx;
3332 mv_cache[ 1 ][1]= my;
3333 }else if(IS_SUB_4X8(sub_mb_type)){
3334 mv_cache[ 8 ][0]= mx;
3335 mv_cache[ 8 ][1]= my;
3336 }
3337 mv_cache[ 0 ][0]= mx;
3338 mv_cache[ 0 ][1]= my;
3339 }
3340 }else{
3341 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
3342 p[0] = p[1]=
3343 p[8] = p[9]= 0;
3344 }
3345 }
3346 }
3347 }else if(IS_DIRECT(mb_type)){
3348 ff_h264_pred_direct_motion(h, &mb_type);
3349 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
3350 }else{
3351 int list, mx, my, i;
3352 //FIXME we should set ref_idx_l? to 0 if we use that later ...
3353 if(IS_16X16(mb_type)){
3354 for(list=0; list<h->list_count; list++){
3355 unsigned int val;
3356 if(IS_DIR(mb_type, 0, list)){
3357 if(h->ref_count[list]==1){
3358 val= 0;
3359 }else if(h->ref_count[list]==2){
3360 val= get_bits1(&s->gb)^1;
3361 }else{
3362 val= get_ue_golomb_31(&s->gb);
3363 if(val >= h->ref_count[list]){
3364 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
3365 return -1;
3366 }
3367 }
3368 }else
3369 val= LIST_NOT_USED&0xFF;
3370 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
3371 }
3372 for(list=0; list<h->list_count; list++){
3373 unsigned int val;
3374 if(IS_DIR(mb_type, 0, list)){
3375 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
3376 mx += get_se_golomb(&s->gb);
3377 my += get_se_golomb(&s->gb);
3378 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
3379
3380 val= pack16to32(mx,my);
3381 }else
3382 val=0;
3383 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
3384 }
3385 }
3386 else if(IS_16X8(mb_type)){
3387 for(list=0; list<h->list_count; list++){
3388 for(i=0; i<2; i++){
3389 unsigned int val;
3390 if(IS_DIR(mb_type, i, list)){
3391 if(h->ref_count[list] == 1){
3392 val= 0;
3393 }else if(h->ref_count[list] == 2){
3394 val= get_bits1(&s->gb)^1;
3395 }else{
3396 val= get_ue_golomb_31(&s->gb);
3397 if(val >= h->ref_count[list]){
3398 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
3399 return -1;
3400 }
3401 }
3402 }else
3403 val= LIST_NOT_USED&0xFF;
3404 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
3405 }
3406 }
3407 for(list=0; list<h->list_count; list++){
3408 for(i=0; i<2; i++){
3409 unsigned int val;
3410 if(IS_DIR(mb_type, i, list)){
3411 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
3412 mx += get_se_golomb(&s->gb);
3413 my += get_se_golomb(&s->gb);
3414 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
3415
3416 val= pack16to32(mx,my);
3417 }else
3418 val=0;
3419 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
3420 }
3421 }
3422 }else{
3423 assert(IS_8X16(mb_type));
3424 for(list=0; list<h->list_count; list++){
3425 for(i=0; i<2; i++){
3426 unsigned int val;
3427 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
3428 if(h->ref_count[list]==1){
3429 val= 0;
3430 }else if(h->ref_count[list]==2){
3431 val= get_bits1(&s->gb)^1;
3432 }else{
3433 val= get_ue_golomb_31(&s->gb);
3434 if(val >= h->ref_count[list]){
3435 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
3436 return -1;
3437 }
3438 }
3439 }else
3440 val= LIST_NOT_USED&0xFF;
3441 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
3442 }
3443 }
3444 for(list=0; list<h->list_count; list++){
3445 for(i=0; i<2; i++){
3446 unsigned int val;
3447 if(IS_DIR(mb_type, i, list)){
3448 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
3449 mx += get_se_golomb(&s->gb);
3450 my += get_se_golomb(&s->gb);
3451 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
3452
3453 val= pack16to32(mx,my);
3454 }else
3455 val=0;
3456 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
3457 }
3458 }
3459 }
3460 }
3461
3462 if(IS_INTER(mb_type))
3463 write_back_motion(h, mb_type);
3464
3465 if(!IS_INTRA16x16(mb_type)){
3466 cbp= get_ue_golomb(&s->gb);
3467 if(cbp > 47){
3468 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
3469 return -1;
3470 }
3471
3472 if(CHROMA){
3473 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
3474 else cbp= golomb_to_inter_cbp [cbp];
3475 }else{
3476 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
3477 else cbp= golomb_to_inter_cbp_gray[cbp];
3478 }
3479 }
3480 h->cbp = cbp;
3481
3482 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
3483 if(get_bits1(&s->gb)){
3484 mb_type |= MB_TYPE_8x8DCT;
3485 h->cbp_table[mb_xy]= cbp;
3486 }
3487 }
3488 s->current_picture.mb_type[mb_xy]= mb_type;
3489
3490 if(cbp || IS_INTRA16x16(mb_type)){
3491 int i8x8, i4x4, chroma_idx;
3492 int dquant;
3493 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
3494 const uint8_t *scan, *scan8x8, *dc_scan;
3495
3496 // fill_non_zero_count_cache(h);
3497
3498 if(IS_INTERLACED(mb_type)){
3499 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
3500 scan= s->qscale ? h->field_scan : h->field_scan_q0;
3501 dc_scan= luma_dc_field_scan;
3502 }else{
3503 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
3504 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
3505 dc_scan= luma_dc_zigzag_scan;
3506 }
3507
3508 dquant= get_se_golomb(&s->gb);
3509
3510 if( dquant > 25 || dquant < -26 ){
3511 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
3512 return -1;
3513 }
3514
3515 s->qscale += dquant;
3516 if(((unsigned)s->qscale) > 51){
3517 if(s->qscale<0) s->qscale+= 52;
3518 else s->qscale-= 52;
3519 }
3520
3521 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
3522 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
3523 if(IS_INTRA16x16(mb_type)){
3524 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
3525 return -1; //FIXME continue if partitioned and other return -1 too
3526 }
3527
3528 assert((cbp&15) == 0 || (cbp&15) == 15);
3529
3530 if(cbp&15){
3531 for(i8x8=0; i8x8<4; i8x8++){
3532 for(i4x4=0; i4x4<4; i4x4++){
3533 const int index= i4x4 + 4*i8x8;
3534 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
3535 return -1;
3536 }
3537 }
3538 }
3539 }else{
3540 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
3541 }
3542 }else{
3543 for(i8x8=0; i8x8<4; i8x8++){
3544 if(cbp & (1<<i8x8)){
3545 if(IS_8x8DCT(mb_type)){
3546 DCTELEM *buf = &h->mb[64*i8x8];
3547 uint8_t *nnz;
3548 for(i4x4=0; i4x4<4; i4x4++){
3549 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
3550 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
3551 return -1;
3552 }
3553 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
3554 nnz[0] += nnz[1] + nnz[8] + nnz[9];
3555 }else{
3556 for(i4x4=0; i4x4<4; i4x4++){
3557 const int index= i4x4 + 4*i8x8;
3558
3559 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
3560 return -1;
3561 }
3562 }
3563 }
3564 }else{
3565 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
3566 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
3567 }
3568 }
3569 }
3570
3571 if(cbp&0x30){
3572 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
3573 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
3574 return -1;
3575 }
3576 }
3577
3578 if(cbp&0x20){
3579 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
3580 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
3581 for(i4x4=0; i4x4<4; i4x4++){
3582 const int index= 16 + 4*chroma_idx + i4x4;
3583 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
3584 return -1;
3585 }
3586 }
3587 }
3588 }else{
3589 uint8_t * const nnz= &h->non_zero_count_cache[0];
3590 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
3591 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
3592 }
3593 }else{
3594 uint8_t * const nnz= &h->non_zero_count_cache[0];
3595 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
3596 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
3597 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
3598 }
3599 s->current_picture.qscale_table[mb_xy]= s->qscale;
3600 write_back_non_zero_count(h);
3601
3602 if(MB_MBAFF){
3603 h->ref_count[0] >>= 1;
3604 h->ref_count[1] >>= 1;
3605 }
3606
3607 return 0;
3608 }
3609 2176
3610 static int decode_cabac_field_decoding_flag(H264Context *h) { 2177 static int decode_cabac_field_decoding_flag(H264Context *h) {
3611 MpegEncContext * const s = &h->s; 2178 MpegEncContext * const s = &h->s;
3612 const int mb_x = s->mb_x; 2179 const int mb_x = s->mb_x;
3613 const int mb_y = s->mb_y & ~1; 2180 const int mb_y = s->mb_y & ~1;
4764 } 3331 }
4765 } 3332 }
4766 3333
4767 } else { 3334 } else {
4768 for(;;){ 3335 for(;;){
4769 int ret = decode_mb_cavlc(h); 3336 int ret = ff_h264_decode_mb_cavlc(h);
4770 3337
4771 if(ret>=0) ff_h264_hl_decode_mb(h); 3338 if(ret>=0) ff_h264_hl_decode_mb(h);
4772 3339
4773 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ? 3340 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
4774 s->mb_y++; 3341 s->mb_y++;
4775 ret = decode_mb_cavlc(h); 3342 ret = ff_h264_decode_mb_cavlc(h);
4776 3343
4777 if(ret>=0) ff_h264_hl_decode_mb(h); 3344 if(ret>=0) ff_h264_hl_decode_mb(h);
4778 s->mb_y--; 3345 s->mb_y--;
4779 } 3346 }
4780 3347