Mercurial > libavcodec.hg
comparison h264.c @ 10866:d26e9b4d2ca1 libavcodec
Split cavlc out of h264.c.
Seems to speed the code up a little...
The placement of many generic functions between h264.c and h264.h is still open
Currently they are a little randomly placed between them.
author | michael |
---|---|
date | Wed, 13 Jan 2010 01:59:19 +0000 |
parents | e3f5eb016712 |
children | 7101061bfa0f |
comparison
equal
deleted
inserted
replaced
10865:bcdc5343a577 | 10866:d26e9b4d2ca1 |
---|---|
44 #endif | 44 #endif |
45 | 45 |
46 //#undef NDEBUG | 46 //#undef NDEBUG |
47 #include <assert.h> | 47 #include <assert.h> |
48 | 48 |
49 static VLC coeff_token_vlc[4]; | |
50 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2]; | |
51 static const int coeff_token_vlc_tables_size[4]={520,332,280,256}; | |
52 | |
53 static VLC chroma_dc_coeff_token_vlc; | |
54 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; | |
55 static const int chroma_dc_coeff_token_vlc_table_size = 256; | |
56 | |
57 static VLC total_zeros_vlc[15]; | |
58 static VLC_TYPE total_zeros_vlc_tables[15][512][2]; | |
59 static const int total_zeros_vlc_tables_size = 512; | |
60 | |
61 static VLC chroma_dc_total_zeros_vlc[3]; | |
62 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; | |
63 static const int chroma_dc_total_zeros_vlc_tables_size = 8; | |
64 | |
65 static VLC run_vlc[6]; | |
66 static VLC_TYPE run_vlc_tables[6][8][2]; | |
67 static const int run_vlc_tables_size = 8; | |
68 | |
69 static VLC run7_vlc; | |
70 static VLC_TYPE run7_vlc_table[96][2]; | |
71 static const int run7_vlc_table_size = 96; | |
72 | |
73 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); | 49 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); |
74 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); | 50 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); |
75 | 51 |
76 static const uint8_t rem6[52]={ | 52 static const uint8_t rem6[52]={ |
77 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, | 53 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, |
78 }; | 54 }; |
79 | 55 |
80 static const uint8_t div6[52]={ | 56 static const uint8_t div6[52]={ |
81 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, | 57 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, |
82 }; | 58 }; |
83 | |
84 static const uint8_t left_block_options[4][8]={ | |
85 {0,1,2,3,7,10,8,11}, | |
86 {2,2,3,3,8,11,8,11}, | |
87 {0,0,1,1,7,10,7,10}, | |
88 {0,2,0,2,7,10,7,10} | |
89 }; | |
90 | |
91 #define LEVEL_TAB_BITS 8 | |
92 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; | |
93 | |
94 static void fill_caches(H264Context *h, int mb_type, int for_deblock){ | |
95 MpegEncContext * const s = &h->s; | |
96 const int mb_xy= h->mb_xy; | |
97 int topleft_xy, top_xy, topright_xy, left_xy[2]; | |
98 int topleft_type, top_type, topright_type, left_type[2]; | |
99 const uint8_t * left_block; | |
100 int topleft_partition= -1; | |
101 int i; | |
102 | |
103 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE); | |
104 | |
105 //FIXME deblocking could skip the intra and nnz parts. | |
106 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF) | |
107 return; | |
108 | |
109 /* Wow, what a mess, why didn't they simplify the interlacing & intra | |
110 * stuff, I can't imagine that these complex rules are worth it. */ | |
111 | |
112 topleft_xy = top_xy - 1; | |
113 topright_xy= top_xy + 1; | |
114 left_xy[1] = left_xy[0] = mb_xy-1; | |
115 left_block = left_block_options[0]; | |
116 if(FRAME_MBAFF){ | |
117 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; | |
118 const int top_pair_xy = pair_xy - s->mb_stride; | |
119 const int topleft_pair_xy = top_pair_xy - 1; | |
120 const int topright_pair_xy = top_pair_xy + 1; | |
121 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]); | |
122 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); | |
123 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]); | |
124 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); | |
125 const int curr_mb_field_flag = IS_INTERLACED(mb_type); | |
126 const int bottom = (s->mb_y & 1); | |
127 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag); | |
128 | |
129 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){ | |
130 top_xy -= s->mb_stride; | |
131 } | |
132 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){ | |
133 topleft_xy -= s->mb_stride; | |
134 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) { | |
135 topleft_xy += s->mb_stride; | |
136 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition | |
137 topleft_partition = 0; | |
138 } | |
139 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){ | |
140 topright_xy -= s->mb_stride; | |
141 } | |
142 if (left_mb_field_flag != curr_mb_field_flag) { | |
143 left_xy[1] = left_xy[0] = pair_xy - 1; | |
144 if (curr_mb_field_flag) { | |
145 left_xy[1] += s->mb_stride; | |
146 left_block = left_block_options[3]; | |
147 } else { | |
148 left_block= left_block_options[2 - bottom]; | |
149 } | |
150 } | |
151 } | |
152 | |
153 h->top_mb_xy = top_xy; | |
154 h->left_mb_xy[0] = left_xy[0]; | |
155 h->left_mb_xy[1] = left_xy[1]; | |
156 if(for_deblock){ | |
157 topleft_type = 0; | |
158 topright_type = 0; | |
159 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0; | |
160 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0; | |
161 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0; | |
162 | |
163 if(MB_MBAFF && !IS_INTRA(mb_type)){ | |
164 int list; | |
165 for(list=0; list<h->list_count; list++){ | |
166 //These values where changed for ease of performing MC, we need to change them back | |
167 //FIXME maybe we can make MC and loop filter use the same values or prevent | |
168 //the MC code from changing ref_cache and rather use a temporary array. | |
169 if(USES_LIST(mb_type,list)){ | |
170 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; | |
171 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = | |
172 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101; | |
173 ref += h->b8_stride; | |
174 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = | |
175 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101; | |
176 } | |
177 } | |
178 } | |
179 }else{ | |
180 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; | |
181 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; | |
182 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; | |
183 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; | |
184 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; | |
185 | |
186 if(IS_INTRA(mb_type)){ | |
187 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; | |
188 h->topleft_samples_available= | |
189 h->top_samples_available= | |
190 h->left_samples_available= 0xFFFF; | |
191 h->topright_samples_available= 0xEEEA; | |
192 | |
193 if(!(top_type & type_mask)){ | |
194 h->topleft_samples_available= 0xB3FF; | |
195 h->top_samples_available= 0x33FF; | |
196 h->topright_samples_available= 0x26EA; | |
197 } | |
198 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){ | |
199 if(IS_INTERLACED(mb_type)){ | |
200 if(!(left_type[0] & type_mask)){ | |
201 h->topleft_samples_available&= 0xDFFF; | |
202 h->left_samples_available&= 0x5FFF; | |
203 } | |
204 if(!(left_type[1] & type_mask)){ | |
205 h->topleft_samples_available&= 0xFF5F; | |
206 h->left_samples_available&= 0xFF5F; | |
207 } | |
208 }else{ | |
209 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num | |
210 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0; | |
211 assert(left_xy[0] == left_xy[1]); | |
212 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){ | |
213 h->topleft_samples_available&= 0xDF5F; | |
214 h->left_samples_available&= 0x5F5F; | |
215 } | |
216 } | |
217 }else{ | |
218 if(!(left_type[0] & type_mask)){ | |
219 h->topleft_samples_available&= 0xDF5F; | |
220 h->left_samples_available&= 0x5F5F; | |
221 } | |
222 } | |
223 | |
224 if(!(topleft_type & type_mask)) | |
225 h->topleft_samples_available&= 0x7FFF; | |
226 | |
227 if(!(topright_type & type_mask)) | |
228 h->topright_samples_available&= 0xFBFF; | |
229 | |
230 if(IS_INTRA4x4(mb_type)){ | |
231 if(IS_INTRA4x4(top_type)){ | |
232 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4]; | |
233 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5]; | |
234 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6]; | |
235 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3]; | |
236 }else{ | |
237 int pred; | |
238 if(!(top_type & type_mask)) | |
239 pred= -1; | |
240 else{ | |
241 pred= 2; | |
242 } | |
243 h->intra4x4_pred_mode_cache[4+8*0]= | |
244 h->intra4x4_pred_mode_cache[5+8*0]= | |
245 h->intra4x4_pred_mode_cache[6+8*0]= | |
246 h->intra4x4_pred_mode_cache[7+8*0]= pred; | |
247 } | |
248 for(i=0; i<2; i++){ | |
249 if(IS_INTRA4x4(left_type[i])){ | |
250 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]]; | |
251 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]]; | |
252 }else{ | |
253 int pred; | |
254 if(!(left_type[i] & type_mask)) | |
255 pred= -1; | |
256 else{ | |
257 pred= 2; | |
258 } | |
259 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= | |
260 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; | |
261 } | |
262 } | |
263 } | |
264 } | |
265 } | |
266 | |
267 | |
268 /* | |
269 0 . T T. T T T T | |
270 1 L . .L . . . . | |
271 2 L . .L . . . . | |
272 3 . T TL . . . . | |
273 4 L . .L . . . . | |
274 5 L . .. . . . . | |
275 */ | |
276 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) | |
277 if(top_type){ | |
278 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4]; | |
279 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5]; | |
280 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6]; | |
281 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3]; | |
282 | |
283 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9]; | |
284 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8]; | |
285 | |
286 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12]; | |
287 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11]; | |
288 | |
289 }else{ | |
290 h->non_zero_count_cache[4+8*0]= | |
291 h->non_zero_count_cache[5+8*0]= | |
292 h->non_zero_count_cache[6+8*0]= | |
293 h->non_zero_count_cache[7+8*0]= | |
294 | |
295 h->non_zero_count_cache[1+8*0]= | |
296 h->non_zero_count_cache[2+8*0]= | |
297 | |
298 h->non_zero_count_cache[1+8*3]= | |
299 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; | |
300 | |
301 } | |
302 | |
303 for (i=0; i<2; i++) { | |
304 if(left_type[i]){ | |
305 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]]; | |
306 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]]; | |
307 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]]; | |
308 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]]; | |
309 }else{ | |
310 h->non_zero_count_cache[3+8*1 + 2*8*i]= | |
311 h->non_zero_count_cache[3+8*2 + 2*8*i]= | |
312 h->non_zero_count_cache[0+8*1 + 8*i]= | |
313 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; | |
314 } | |
315 } | |
316 | |
317 if( h->pps.cabac ) { | |
318 // top_cbp | |
319 if(top_type) { | |
320 h->top_cbp = h->cbp_table[top_xy]; | |
321 } else if(IS_INTRA(mb_type)) { | |
322 h->top_cbp = 0x1C0; | |
323 } else { | |
324 h->top_cbp = 0; | |
325 } | |
326 // left_cbp | |
327 if (left_type[0]) { | |
328 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0; | |
329 } else if(IS_INTRA(mb_type)) { | |
330 h->left_cbp = 0x1C0; | |
331 } else { | |
332 h->left_cbp = 0; | |
333 } | |
334 if (left_type[0]) { | |
335 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1; | |
336 } | |
337 if (left_type[1]) { | |
338 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3; | |
339 } | |
340 } | |
341 | |
342 #if 1 | |
343 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ | |
344 int list; | |
345 for(list=0; list<h->list_count; list++){ | |
346 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){ | |
347 /*if(!h->mv_cache_clean[list]){ | |
348 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? | |
349 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); | |
350 h->mv_cache_clean[list]= 1; | |
351 }*/ | |
352 continue; | |
353 } | |
354 h->mv_cache_clean[list]= 0; | |
355 | |
356 if(USES_LIST(top_type, list)){ | |
357 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | |
358 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; | |
359 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; | |
360 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1]; | |
361 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2]; | |
362 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3]; | |
363 h->ref_cache[list][scan8[0] + 0 - 1*8]= | |
364 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; | |
365 h->ref_cache[list][scan8[0] + 2 - 1*8]= | |
366 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; | |
367 }else{ | |
368 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= | |
369 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= | |
370 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= | |
371 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; | |
372 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; | |
373 } | |
374 | |
375 for(i=0; i<2; i++){ | |
376 int cache_idx = scan8[0] - 1 + i*2*8; | |
377 if(USES_LIST(left_type[i], list)){ | |
378 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; | |
379 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; | |
380 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; | |
381 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; | |
382 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; | |
383 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; | |
384 }else{ | |
385 *(uint32_t*)h->mv_cache [list][cache_idx ]= | |
386 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; | |
387 h->ref_cache[list][cache_idx ]= | |
388 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE; | |
389 } | |
390 } | |
391 | |
392 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF)) | |
393 continue; | |
394 | |
395 if(USES_LIST(topleft_type, list)){ | |
396 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride); | |
397 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride); | |
398 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; | |
399 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; | |
400 }else{ | |
401 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; | |
402 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; | |
403 } | |
404 | |
405 if(USES_LIST(topright_type, list)){ | |
406 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; | |
407 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; | |
408 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; | |
409 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; | |
410 }else{ | |
411 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; | |
412 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; | |
413 } | |
414 | |
415 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF) | |
416 continue; | |
417 | |
418 h->ref_cache[list][scan8[5 ]+1] = | |
419 h->ref_cache[list][scan8[7 ]+1] = | |
420 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) | |
421 h->ref_cache[list][scan8[4 ]] = | |
422 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; | |
423 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= | |
424 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= | |
425 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) | |
426 *(uint32_t*)h->mv_cache [list][scan8[4 ]]= | |
427 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; | |
428 | |
429 if( h->pps.cabac ) { | |
430 /* XXX beurk, Load mvd */ | |
431 if(USES_LIST(top_type, list)){ | |
432 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | |
433 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0]; | |
434 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1]; | |
435 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2]; | |
436 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3]; | |
437 }else{ | |
438 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]= | |
439 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]= | |
440 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= | |
441 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0; | |
442 } | |
443 if(USES_LIST(left_type[0], list)){ | |
444 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | |
445 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; | |
446 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; | |
447 }else{ | |
448 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= | |
449 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; | |
450 } | |
451 if(USES_LIST(left_type[1], list)){ | |
452 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; | |
453 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; | |
454 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; | |
455 }else{ | |
456 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= | |
457 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; | |
458 } | |
459 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= | |
460 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= | |
461 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) | |
462 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= | |
463 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; | |
464 | |
465 if(h->slice_type_nos == FF_B_TYPE){ | |
466 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1); | |
467 | |
468 if(IS_DIRECT(top_type)){ | |
469 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101; | |
470 }else if(IS_8X8(top_type)){ | |
471 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; | |
472 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; | |
473 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; | |
474 }else{ | |
475 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0; | |
476 } | |
477 | |
478 if(IS_DIRECT(left_type[0])) | |
479 h->direct_cache[scan8[0] - 1 + 0*8]= 1; | |
480 else if(IS_8X8(left_type[0])) | |
481 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)]; | |
482 else | |
483 h->direct_cache[scan8[0] - 1 + 0*8]= 0; | |
484 | |
485 if(IS_DIRECT(left_type[1])) | |
486 h->direct_cache[scan8[0] - 1 + 2*8]= 1; | |
487 else if(IS_8X8(left_type[1])) | |
488 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)]; | |
489 else | |
490 h->direct_cache[scan8[0] - 1 + 2*8]= 0; | |
491 } | |
492 } | |
493 | |
494 if(FRAME_MBAFF){ | |
495 #define MAP_MVS\ | |
496 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ | |
497 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ | |
498 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ | |
499 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ | |
500 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ | |
501 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ | |
502 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ | |
503 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ | |
504 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ | |
505 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) | |
506 if(MB_FIELD){ | |
507 #define MAP_F2F(idx, mb_type)\ | |
508 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ | |
509 h->ref_cache[list][idx] <<= 1;\ | |
510 h->mv_cache[list][idx][1] /= 2;\ | |
511 h->mvd_cache[list][idx][1] /= 2;\ | |
512 } | |
513 MAP_MVS | |
514 #undef MAP_F2F | |
515 }else{ | |
516 #define MAP_F2F(idx, mb_type)\ | |
517 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ | |
518 h->ref_cache[list][idx] >>= 1;\ | |
519 h->mv_cache[list][idx][1] <<= 1;\ | |
520 h->mvd_cache[list][idx][1] <<= 1;\ | |
521 } | |
522 MAP_MVS | |
523 #undef MAP_F2F | |
524 } | |
525 } | |
526 } | |
527 } | |
528 #endif | |
529 | |
530 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); | |
531 } | |
532 | 59 |
533 void ff_h264_write_back_intra_pred_mode(H264Context *h){ | 60 void ff_h264_write_back_intra_pred_mode(H264Context *h){ |
534 const int mb_xy= h->mb_xy; | 61 const int mb_xy= h->mb_xy; |
535 | 62 |
536 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1]; | 63 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1]; |
612 return -1; | 139 return -1; |
613 } | 140 } |
614 } | 141 } |
615 | 142 |
616 return mode; | 143 return mode; |
617 } | |
618 | |
619 /** | |
620 * gets the predicted intra4x4 prediction mode. | |
621 */ | |
622 static inline int pred_intra_mode(H264Context *h, int n){ | |
623 const int index8= scan8[n]; | |
624 const int left= h->intra4x4_pred_mode_cache[index8 - 1]; | |
625 const int top = h->intra4x4_pred_mode_cache[index8 - 8]; | |
626 const int min= FFMIN(left, top); | |
627 | |
628 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min); | |
629 | |
630 if(min<0) return DC_PRED; | |
631 else return min; | |
632 } | |
633 | |
634 static inline void write_back_non_zero_count(H264Context *h){ | |
635 const int mb_xy= h->mb_xy; | |
636 | |
637 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1]; | |
638 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2]; | |
639 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3]; | |
640 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4]; | |
641 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4]; | |
642 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4]; | |
643 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4]; | |
644 | |
645 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2]; | |
646 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2]; | |
647 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1]; | |
648 | |
649 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5]; | |
650 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5]; | |
651 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4]; | |
652 } | |
653 | |
654 /** | |
655 * gets the predicted number of non-zero coefficients. | |
656 * @param n block index | |
657 */ | |
658 static inline int pred_non_zero_count(H264Context *h, int n){ | |
659 const int index8= scan8[n]; | |
660 const int left= h->non_zero_count_cache[index8 - 1]; | |
661 const int top = h->non_zero_count_cache[index8 - 8]; | |
662 int i= left + top; | |
663 | |
664 if(i<64) i= (i+1)>>1; | |
665 | |
666 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31); | |
667 | |
668 return i&31; | |
669 } | |
670 | |
671 static inline void write_back_motion(H264Context *h, int mb_type){ | |
672 MpegEncContext * const s = &h->s; | |
673 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; | |
674 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; | |
675 int list; | |
676 | |
677 if(!USES_LIST(mb_type, 0)) | |
678 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1); | |
679 | |
680 for(list=0; list<h->list_count; list++){ | |
681 int y; | |
682 if(!USES_LIST(mb_type, list)) | |
683 continue; | |
684 | |
685 for(y=0; y<4; y++){ | |
686 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y]; | |
687 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y]; | |
688 } | |
689 if( h->pps.cabac ) { | |
690 if(IS_SKIP(mb_type)) | |
691 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4); | |
692 else | |
693 for(y=0; y<4; y++){ | |
694 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y]; | |
695 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y]; | |
696 } | |
697 } | |
698 | |
699 { | |
700 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; | |
701 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; | |
702 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; | |
703 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; | |
704 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; | |
705 } | |
706 } | |
707 | |
708 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){ | |
709 if(IS_8X8(mb_type)){ | |
710 uint8_t *direct_table = &h->direct_table[b8_xy]; | |
711 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; | |
712 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; | |
713 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; | |
714 } | |
715 } | |
716 } | 144 } |
717 | 145 |
718 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){ | 146 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){ |
719 int i, si, di; | 147 int i, si, di; |
720 uint8_t *dst; | 148 uint8_t *dst; |
1223 } | 651 } |
1224 | 652 |
1225 prefetch_motion(h, 1); | 653 prefetch_motion(h, 1); |
1226 } | 654 } |
1227 | 655 |
1228 static av_cold void init_cavlc_level_tab(void){ | |
1229 int suffix_length, mask; | |
1230 unsigned int i; | |
1231 | |
1232 for(suffix_length=0; suffix_length<7; suffix_length++){ | |
1233 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){ | |
1234 int prefix= LEVEL_TAB_BITS - av_log2(2*i); | |
1235 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length); | |
1236 | |
1237 mask= -(level_code&1); | |
1238 level_code= (((2+level_code)>>1) ^ mask) - mask; | |
1239 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){ | |
1240 cavlc_level_tab[suffix_length][i][0]= level_code; | |
1241 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length; | |
1242 }else if(prefix + 1 <= LEVEL_TAB_BITS){ | |
1243 cavlc_level_tab[suffix_length][i][0]= prefix+100; | |
1244 cavlc_level_tab[suffix_length][i][1]= prefix + 1; | |
1245 }else{ | |
1246 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100; | |
1247 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS; | |
1248 } | |
1249 } | |
1250 } | |
1251 } | |
1252 | |
1253 static av_cold void decode_init_vlc(void){ | |
1254 static int done = 0; | |
1255 | |
1256 if (!done) { | |
1257 int i; | |
1258 int offset; | |
1259 done = 1; | |
1260 | |
1261 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table; | |
1262 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size; | |
1263 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, | |
1264 &chroma_dc_coeff_token_len [0], 1, 1, | |
1265 &chroma_dc_coeff_token_bits[0], 1, 1, | |
1266 INIT_VLC_USE_NEW_STATIC); | |
1267 | |
1268 offset = 0; | |
1269 for(i=0; i<4; i++){ | |
1270 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; | |
1271 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i]; | |
1272 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, | |
1273 &coeff_token_len [i][0], 1, 1, | |
1274 &coeff_token_bits[i][0], 1, 1, | |
1275 INIT_VLC_USE_NEW_STATIC); | |
1276 offset += coeff_token_vlc_tables_size[i]; | |
1277 } | |
1278 /* | |
1279 * This is a one time safety check to make sure that | |
1280 * the packed static coeff_token_vlc table sizes | |
1281 * were initialized correctly. | |
1282 */ | |
1283 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables)); | |
1284 | |
1285 for(i=0; i<3; i++){ | |
1286 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i]; | |
1287 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size; | |
1288 init_vlc(&chroma_dc_total_zeros_vlc[i], | |
1289 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4, | |
1290 &chroma_dc_total_zeros_len [i][0], 1, 1, | |
1291 &chroma_dc_total_zeros_bits[i][0], 1, 1, | |
1292 INIT_VLC_USE_NEW_STATIC); | |
1293 } | |
1294 for(i=0; i<15; i++){ | |
1295 total_zeros_vlc[i].table = total_zeros_vlc_tables[i]; | |
1296 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size; | |
1297 init_vlc(&total_zeros_vlc[i], | |
1298 TOTAL_ZEROS_VLC_BITS, 16, | |
1299 &total_zeros_len [i][0], 1, 1, | |
1300 &total_zeros_bits[i][0], 1, 1, | |
1301 INIT_VLC_USE_NEW_STATIC); | |
1302 } | |
1303 | |
1304 for(i=0; i<6; i++){ | |
1305 run_vlc[i].table = run_vlc_tables[i]; | |
1306 run_vlc[i].table_allocated = run_vlc_tables_size; | |
1307 init_vlc(&run_vlc[i], | |
1308 RUN_VLC_BITS, 7, | |
1309 &run_len [i][0], 1, 1, | |
1310 &run_bits[i][0], 1, 1, | |
1311 INIT_VLC_USE_NEW_STATIC); | |
1312 } | |
1313 run7_vlc.table = run7_vlc_table, | |
1314 run7_vlc.table_allocated = run7_vlc_table_size; | |
1315 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, | |
1316 &run_len [6][0], 1, 1, | |
1317 &run_bits[6][0], 1, 1, | |
1318 INIT_VLC_USE_NEW_STATIC); | |
1319 | |
1320 init_cavlc_level_tab(); | |
1321 } | |
1322 } | |
1323 | 656 |
1324 static void free_tables(H264Context *h){ | 657 static void free_tables(H264Context *h){ |
1325 int i; | 658 int i; |
1326 H264Context *hx; | 659 H264Context *hx; |
1327 av_freep(&h->intra4x4_pred_mode); | 660 av_freep(&h->intra4x4_pred_mode); |
1528 if(!avctx->has_b_frames) | 861 if(!avctx->has_b_frames) |
1529 s->low_delay= 1; | 862 s->low_delay= 1; |
1530 | 863 |
1531 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; | 864 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; |
1532 | 865 |
1533 decode_init_vlc(); | 866 ff_h264_decode_init_vlc(); |
1534 | 867 |
1535 if(avctx->extradata_size > 0 && avctx->extradata && | 868 if(avctx->extradata_size > 0 && avctx->extradata && |
1536 *(char *)avctx->extradata == 1){ | 869 *(char *)avctx->extradata == 1){ |
1537 h->is_avc = 1; | 870 h->is_avc = 1; |
1538 h->got_avcC = 0; | 871 h->got_avcC = 0; |
2838 case FF_SI_TYPE: return 4; | 2171 case FF_SI_TYPE: return 4; |
2839 default: return -1; | 2172 default: return -1; |
2840 } | 2173 } |
2841 } | 2174 } |
2842 | 2175 |
2843 /** | |
2844 * | |
2845 */ | |
2846 static inline int get_level_prefix(GetBitContext *gb){ | |
2847 unsigned int buf; | |
2848 int log; | |
2849 | |
2850 OPEN_READER(re, gb); | |
2851 UPDATE_CACHE(re, gb); | |
2852 buf=GET_CACHE(re, gb); | |
2853 | |
2854 log= 32 - av_log2(buf); | |
2855 #ifdef TRACE | |
2856 print_bin(buf>>(32-log), log); | |
2857 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__); | |
2858 #endif | |
2859 | |
2860 LAST_SKIP_BITS(re, gb, log); | |
2861 CLOSE_READER(re, gb); | |
2862 | |
2863 return log-1; | |
2864 } | |
2865 | |
2866 static inline int get_dct8x8_allowed(H264Context *h){ | |
2867 if(h->sps.direct_8x8_inference_flag) | |
2868 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); | |
2869 else | |
2870 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); | |
2871 } | |
2872 | |
2873 /** | |
2874 * decodes a residual block. | |
2875 * @param n block index | |
2876 * @param scantable scantable | |
2877 * @param max_coeff number of coefficients in the block | |
2878 * @return <0 if an error occurred | |
2879 */ | |
2880 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){ | |
2881 MpegEncContext * const s = &h->s; | |
2882 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}; | |
2883 int level[16]; | |
2884 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before; | |
2885 | |
2886 //FIXME put trailing_onex into the context | |
2887 | |
2888 if(n == CHROMA_DC_BLOCK_INDEX){ | |
2889 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); | |
2890 total_coeff= coeff_token>>2; | |
2891 }else{ | |
2892 if(n == LUMA_DC_BLOCK_INDEX){ | |
2893 total_coeff= pred_non_zero_count(h, 0); | |
2894 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2); | |
2895 total_coeff= coeff_token>>2; | |
2896 }else{ | |
2897 total_coeff= pred_non_zero_count(h, n); | |
2898 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2); | |
2899 total_coeff= coeff_token>>2; | |
2900 h->non_zero_count_cache[ scan8[n] ]= total_coeff; | |
2901 } | |
2902 } | |
2903 | |
2904 //FIXME set last_non_zero? | |
2905 | |
2906 if(total_coeff==0) | |
2907 return 0; | |
2908 if(total_coeff > (unsigned)max_coeff) { | |
2909 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff); | |
2910 return -1; | |
2911 } | |
2912 | |
2913 trailing_ones= coeff_token&3; | |
2914 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff); | |
2915 assert(total_coeff<=16); | |
2916 | |
2917 i = show_bits(gb, 3); | |
2918 skip_bits(gb, trailing_ones); | |
2919 level[0] = 1-((i&4)>>1); | |
2920 level[1] = 1-((i&2) ); | |
2921 level[2] = 1-((i&1)<<1); | |
2922 | |
2923 if(trailing_ones<total_coeff) { | |
2924 int mask, prefix; | |
2925 int suffix_length = total_coeff > 10 && trailing_ones < 3; | |
2926 int bitsi= show_bits(gb, LEVEL_TAB_BITS); | |
2927 int level_code= cavlc_level_tab[suffix_length][bitsi][0]; | |
2928 | |
2929 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]); | |
2930 if(level_code >= 100){ | |
2931 prefix= level_code - 100; | |
2932 if(prefix == LEVEL_TAB_BITS) | |
2933 prefix += get_level_prefix(gb); | |
2934 | |
2935 //first coefficient has suffix_length equal to 0 or 1 | |
2936 if(prefix<14){ //FIXME try to build a large unified VLC table for all this | |
2937 if(suffix_length) | |
2938 level_code= (prefix<<1) + get_bits1(gb); //part | |
2939 else | |
2940 level_code= prefix; //part | |
2941 }else if(prefix==14){ | |
2942 if(suffix_length) | |
2943 level_code= (prefix<<1) + get_bits1(gb); //part | |
2944 else | |
2945 level_code= prefix + get_bits(gb, 4); //part | |
2946 }else{ | |
2947 level_code= 30 + get_bits(gb, prefix-3); //part | |
2948 if(prefix>=16) | |
2949 level_code += (1<<(prefix-3))-4096; | |
2950 } | |
2951 | |
2952 if(trailing_ones < 3) level_code += 2; | |
2953 | |
2954 suffix_length = 2; | |
2955 mask= -(level_code&1); | |
2956 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask; | |
2957 }else{ | |
2958 if(trailing_ones < 3) level_code += (level_code>>31)|1; | |
2959 | |
2960 suffix_length = 1; | |
2961 if(level_code + 3U > 6U) | |
2962 suffix_length++; | |
2963 level[trailing_ones]= level_code; | |
2964 } | |
2965 | |
2966 //remaining coefficients have suffix_length > 0 | |
2967 for(i=trailing_ones+1;i<total_coeff;i++) { | |
2968 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX }; | |
2969 int bitsi= show_bits(gb, LEVEL_TAB_BITS); | |
2970 level_code= cavlc_level_tab[suffix_length][bitsi][0]; | |
2971 | |
2972 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]); | |
2973 if(level_code >= 100){ | |
2974 prefix= level_code - 100; | |
2975 if(prefix == LEVEL_TAB_BITS){ | |
2976 prefix += get_level_prefix(gb); | |
2977 } | |
2978 if(prefix<15){ | |
2979 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length); | |
2980 }else{ | |
2981 level_code = (15<<suffix_length) + get_bits(gb, prefix-3); | |
2982 if(prefix>=16) | |
2983 level_code += (1<<(prefix-3))-4096; | |
2984 } | |
2985 mask= -(level_code&1); | |
2986 level_code= (((2+level_code)>>1) ^ mask) - mask; | |
2987 } | |
2988 level[i]= level_code; | |
2989 | |
2990 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length]) | |
2991 suffix_length++; | |
2992 } | |
2993 } | |
2994 | |
2995 if(total_coeff == max_coeff) | |
2996 zeros_left=0; | |
2997 else{ | |
2998 if(n == CHROMA_DC_BLOCK_INDEX) | |
2999 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); | |
3000 else | |
3001 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1); | |
3002 } | |
3003 | |
3004 coeff_num = zeros_left + total_coeff - 1; | |
3005 j = scantable[coeff_num]; | |
3006 if(n > 24){ | |
3007 block[j] = level[0]; | |
3008 for(i=1;i<total_coeff;i++) { | |
3009 if(zeros_left <= 0) | |
3010 run_before = 0; | |
3011 else if(zeros_left < 7){ | |
3012 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1); | |
3013 }else{ | |
3014 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); | |
3015 } | |
3016 zeros_left -= run_before; | |
3017 coeff_num -= 1 + run_before; | |
3018 j= scantable[ coeff_num ]; | |
3019 | |
3020 block[j]= level[i]; | |
3021 } | |
3022 }else{ | |
3023 block[j] = (level[0] * qmul[j] + 32)>>6; | |
3024 for(i=1;i<total_coeff;i++) { | |
3025 if(zeros_left <= 0) | |
3026 run_before = 0; | |
3027 else if(zeros_left < 7){ | |
3028 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1); | |
3029 }else{ | |
3030 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); | |
3031 } | |
3032 zeros_left -= run_before; | |
3033 coeff_num -= 1 + run_before; | |
3034 j= scantable[ coeff_num ]; | |
3035 | |
3036 block[j]= (level[i] * qmul[j] + 32)>>6; | |
3037 } | |
3038 } | |
3039 | |
3040 if(zeros_left<0){ | |
3041 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y); | |
3042 return -1; | |
3043 } | |
3044 | |
3045 return 0; | |
3046 } | |
3047 | |
3048 static void predict_field_decoding_flag(H264Context *h){ | |
3049 MpegEncContext * const s = &h->s; | |
3050 const int mb_xy= h->mb_xy; | |
3051 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) | |
3052 ? s->current_picture.mb_type[mb_xy-1] | |
3053 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) | |
3054 ? s->current_picture.mb_type[mb_xy-s->mb_stride] | |
3055 : 0; | |
3056 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; | |
3057 } | |
3058 | |
3059 /** | |
3060 * decodes a P_SKIP or B_SKIP macroblock | |
3061 */ | |
3062 static void decode_mb_skip(H264Context *h){ | |
3063 MpegEncContext * const s = &h->s; | |
3064 const int mb_xy= h->mb_xy; | |
3065 int mb_type=0; | |
3066 | |
3067 memset(h->non_zero_count[mb_xy], 0, 16); | |
3068 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui | |
3069 | |
3070 if(MB_FIELD) | |
3071 mb_type|= MB_TYPE_INTERLACED; | |
3072 | |
3073 if( h->slice_type_nos == FF_B_TYPE ) | |
3074 { | |
3075 // just for fill_caches. pred_direct_motion will set the real mb_type | |
3076 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; | |
3077 | |
3078 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... | |
3079 ff_h264_pred_direct_motion(h, &mb_type); | |
3080 mb_type|= MB_TYPE_SKIP; | |
3081 } | |
3082 else | |
3083 { | |
3084 int mx, my; | |
3085 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; | |
3086 | |
3087 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... | |
3088 pred_pskip_motion(h, &mx, &my); | |
3089 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); | |
3090 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); | |
3091 } | |
3092 | |
3093 write_back_motion(h, mb_type); | |
3094 s->current_picture.mb_type[mb_xy]= mb_type; | |
3095 s->current_picture.qscale_table[mb_xy]= s->qscale; | |
3096 h->slice_table[ mb_xy ]= h->slice_num; | |
3097 h->prev_mb_skipped= 1; | |
3098 } | |
3099 | |
3100 /** | |
3101 * decodes a macroblock | |
3102 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed | |
3103 */ | |
3104 static int decode_mb_cavlc(H264Context *h){ | |
3105 MpegEncContext * const s = &h->s; | |
3106 int mb_xy; | |
3107 int partition_count; | |
3108 unsigned int mb_type, cbp; | |
3109 int dct8x8_allowed= h->pps.transform_8x8_mode; | |
3110 | |
3111 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride; | |
3112 | |
3113 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y); | |
3114 cbp = 0; /* avoid warning. FIXME: find a solution without slowing | |
3115 down the code */ | |
3116 if(h->slice_type_nos != FF_I_TYPE){ | |
3117 if(s->mb_skip_run==-1) | |
3118 s->mb_skip_run= get_ue_golomb(&s->gb); | |
3119 | |
3120 if (s->mb_skip_run--) { | |
3121 if(FRAME_MBAFF && (s->mb_y&1) == 0){ | |
3122 if(s->mb_skip_run==0) | |
3123 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb); | |
3124 else | |
3125 predict_field_decoding_flag(h); | |
3126 } | |
3127 decode_mb_skip(h); | |
3128 return 0; | |
3129 } | |
3130 } | |
3131 if(FRAME_MBAFF){ | |
3132 if( (s->mb_y&1) == 0 ) | |
3133 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb); | |
3134 } | |
3135 | |
3136 h->prev_mb_skipped= 0; | |
3137 | |
3138 mb_type= get_ue_golomb(&s->gb); | |
3139 if(h->slice_type_nos == FF_B_TYPE){ | |
3140 if(mb_type < 23){ | |
3141 partition_count= b_mb_type_info[mb_type].partition_count; | |
3142 mb_type= b_mb_type_info[mb_type].type; | |
3143 }else{ | |
3144 mb_type -= 23; | |
3145 goto decode_intra_mb; | |
3146 } | |
3147 }else if(h->slice_type_nos == FF_P_TYPE){ | |
3148 if(mb_type < 5){ | |
3149 partition_count= p_mb_type_info[mb_type].partition_count; | |
3150 mb_type= p_mb_type_info[mb_type].type; | |
3151 }else{ | |
3152 mb_type -= 5; | |
3153 goto decode_intra_mb; | |
3154 } | |
3155 }else{ | |
3156 assert(h->slice_type_nos == FF_I_TYPE); | |
3157 if(h->slice_type == FF_SI_TYPE && mb_type) | |
3158 mb_type--; | |
3159 decode_intra_mb: | |
3160 if(mb_type > 25){ | |
3161 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y); | |
3162 return -1; | |
3163 } | |
3164 partition_count=0; | |
3165 cbp= i_mb_type_info[mb_type].cbp; | |
3166 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode; | |
3167 mb_type= i_mb_type_info[mb_type].type; | |
3168 } | |
3169 | |
3170 if(MB_FIELD) | |
3171 mb_type |= MB_TYPE_INTERLACED; | |
3172 | |
3173 h->slice_table[ mb_xy ]= h->slice_num; | |
3174 | |
3175 if(IS_INTRA_PCM(mb_type)){ | |
3176 unsigned int x; | |
3177 | |
3178 // We assume these blocks are very rare so we do not optimize it. | |
3179 align_get_bits(&s->gb); | |
3180 | |
3181 // The pixels are stored in the same order as levels in h->mb array. | |
3182 for(x=0; x < (CHROMA ? 384 : 256); x++){ | |
3183 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8); | |
3184 } | |
3185 | |
3186 // In deblocking, the quantizer is 0 | |
3187 s->current_picture.qscale_table[mb_xy]= 0; | |
3188 // All coeffs are present | |
3189 memset(h->non_zero_count[mb_xy], 16, 16); | |
3190 | |
3191 s->current_picture.mb_type[mb_xy]= mb_type; | |
3192 return 0; | |
3193 } | |
3194 | |
3195 if(MB_MBAFF){ | |
3196 h->ref_count[0] <<= 1; | |
3197 h->ref_count[1] <<= 1; | |
3198 } | |
3199 | |
3200 fill_caches(h, mb_type, 0); | |
3201 | |
3202 //mb_pred | |
3203 if(IS_INTRA(mb_type)){ | |
3204 int pred_mode; | |
3205 // init_top_left_availability(h); | |
3206 if(IS_INTRA4x4(mb_type)){ | |
3207 int i; | |
3208 int di = 1; | |
3209 if(dct8x8_allowed && get_bits1(&s->gb)){ | |
3210 mb_type |= MB_TYPE_8x8DCT; | |
3211 di = 4; | |
3212 } | |
3213 | |
3214 // fill_intra4x4_pred_table(h); | |
3215 for(i=0; i<16; i+=di){ | |
3216 int mode= pred_intra_mode(h, i); | |
3217 | |
3218 if(!get_bits1(&s->gb)){ | |
3219 const int rem_mode= get_bits(&s->gb, 3); | |
3220 mode = rem_mode + (rem_mode >= mode); | |
3221 } | |
3222 | |
3223 if(di==4) | |
3224 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 ); | |
3225 else | |
3226 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode; | |
3227 } | |
3228 ff_h264_write_back_intra_pred_mode(h); | |
3229 if( ff_h264_check_intra4x4_pred_mode(h) < 0) | |
3230 return -1; | |
3231 }else{ | |
3232 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode); | |
3233 if(h->intra16x16_pred_mode < 0) | |
3234 return -1; | |
3235 } | |
3236 if(CHROMA){ | |
3237 pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb)); | |
3238 if(pred_mode < 0) | |
3239 return -1; | |
3240 h->chroma_pred_mode= pred_mode; | |
3241 } | |
3242 }else if(partition_count==4){ | |
3243 int i, j, sub_partition_count[4], list, ref[2][4]; | |
3244 | |
3245 if(h->slice_type_nos == FF_B_TYPE){ | |
3246 for(i=0; i<4; i++){ | |
3247 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb); | |
3248 if(h->sub_mb_type[i] >=13){ | |
3249 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y); | |
3250 return -1; | |
3251 } | |
3252 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; | |
3253 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type; | |
3254 } | |
3255 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1]) | |
3256 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) { | |
3257 ff_h264_pred_direct_motion(h, &mb_type); | |
3258 h->ref_cache[0][scan8[4]] = | |
3259 h->ref_cache[1][scan8[4]] = | |
3260 h->ref_cache[0][scan8[12]] = | |
3261 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE; | |
3262 } | |
3263 }else{ | |
3264 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ? | |
3265 for(i=0; i<4; i++){ | |
3266 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb); | |
3267 if(h->sub_mb_type[i] >=4){ | |
3268 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y); | |
3269 return -1; | |
3270 } | |
3271 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; | |
3272 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type; | |
3273 } | |
3274 } | |
3275 | |
3276 for(list=0; list<h->list_count; list++){ | |
3277 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list]; | |
3278 for(i=0; i<4; i++){ | |
3279 if(IS_DIRECT(h->sub_mb_type[i])) continue; | |
3280 if(IS_DIR(h->sub_mb_type[i], 0, list)){ | |
3281 unsigned int tmp; | |
3282 if(ref_count == 1){ | |
3283 tmp= 0; | |
3284 }else if(ref_count == 2){ | |
3285 tmp= get_bits1(&s->gb)^1; | |
3286 }else{ | |
3287 tmp= get_ue_golomb_31(&s->gb); | |
3288 if(tmp>=ref_count){ | |
3289 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp); | |
3290 return -1; | |
3291 } | |
3292 } | |
3293 ref[list][i]= tmp; | |
3294 }else{ | |
3295 //FIXME | |
3296 ref[list][i] = -1; | |
3297 } | |
3298 } | |
3299 } | |
3300 | |
3301 if(dct8x8_allowed) | |
3302 dct8x8_allowed = get_dct8x8_allowed(h); | |
3303 | |
3304 for(list=0; list<h->list_count; list++){ | |
3305 for(i=0; i<4; i++){ | |
3306 if(IS_DIRECT(h->sub_mb_type[i])) { | |
3307 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ]; | |
3308 continue; | |
3309 } | |
3310 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]= | |
3311 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i]; | |
3312 | |
3313 if(IS_DIR(h->sub_mb_type[i], 0, list)){ | |
3314 const int sub_mb_type= h->sub_mb_type[i]; | |
3315 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1; | |
3316 for(j=0; j<sub_partition_count[i]; j++){ | |
3317 int mx, my; | |
3318 const int index= 4*i + block_width*j; | |
3319 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ]; | |
3320 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my); | |
3321 mx += get_se_golomb(&s->gb); | |
3322 my += get_se_golomb(&s->gb); | |
3323 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | |
3324 | |
3325 if(IS_SUB_8X8(sub_mb_type)){ | |
3326 mv_cache[ 1 ][0]= | |
3327 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx; | |
3328 mv_cache[ 1 ][1]= | |
3329 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my; | |
3330 }else if(IS_SUB_8X4(sub_mb_type)){ | |
3331 mv_cache[ 1 ][0]= mx; | |
3332 mv_cache[ 1 ][1]= my; | |
3333 }else if(IS_SUB_4X8(sub_mb_type)){ | |
3334 mv_cache[ 8 ][0]= mx; | |
3335 mv_cache[ 8 ][1]= my; | |
3336 } | |
3337 mv_cache[ 0 ][0]= mx; | |
3338 mv_cache[ 0 ][1]= my; | |
3339 } | |
3340 }else{ | |
3341 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0]; | |
3342 p[0] = p[1]= | |
3343 p[8] = p[9]= 0; | |
3344 } | |
3345 } | |
3346 } | |
3347 }else if(IS_DIRECT(mb_type)){ | |
3348 ff_h264_pred_direct_motion(h, &mb_type); | |
3349 dct8x8_allowed &= h->sps.direct_8x8_inference_flag; | |
3350 }else{ | |
3351 int list, mx, my, i; | |
3352 //FIXME we should set ref_idx_l? to 0 if we use that later ... | |
3353 if(IS_16X16(mb_type)){ | |
3354 for(list=0; list<h->list_count; list++){ | |
3355 unsigned int val; | |
3356 if(IS_DIR(mb_type, 0, list)){ | |
3357 if(h->ref_count[list]==1){ | |
3358 val= 0; | |
3359 }else if(h->ref_count[list]==2){ | |
3360 val= get_bits1(&s->gb)^1; | |
3361 }else{ | |
3362 val= get_ue_golomb_31(&s->gb); | |
3363 if(val >= h->ref_count[list]){ | |
3364 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); | |
3365 return -1; | |
3366 } | |
3367 } | |
3368 }else | |
3369 val= LIST_NOT_USED&0xFF; | |
3370 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1); | |
3371 } | |
3372 for(list=0; list<h->list_count; list++){ | |
3373 unsigned int val; | |
3374 if(IS_DIR(mb_type, 0, list)){ | |
3375 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my); | |
3376 mx += get_se_golomb(&s->gb); | |
3377 my += get_se_golomb(&s->gb); | |
3378 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | |
3379 | |
3380 val= pack16to32(mx,my); | |
3381 }else | |
3382 val=0; | |
3383 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4); | |
3384 } | |
3385 } | |
3386 else if(IS_16X8(mb_type)){ | |
3387 for(list=0; list<h->list_count; list++){ | |
3388 for(i=0; i<2; i++){ | |
3389 unsigned int val; | |
3390 if(IS_DIR(mb_type, i, list)){ | |
3391 if(h->ref_count[list] == 1){ | |
3392 val= 0; | |
3393 }else if(h->ref_count[list] == 2){ | |
3394 val= get_bits1(&s->gb)^1; | |
3395 }else{ | |
3396 val= get_ue_golomb_31(&s->gb); | |
3397 if(val >= h->ref_count[list]){ | |
3398 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); | |
3399 return -1; | |
3400 } | |
3401 } | |
3402 }else | |
3403 val= LIST_NOT_USED&0xFF; | |
3404 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1); | |
3405 } | |
3406 } | |
3407 for(list=0; list<h->list_count; list++){ | |
3408 for(i=0; i<2; i++){ | |
3409 unsigned int val; | |
3410 if(IS_DIR(mb_type, i, list)){ | |
3411 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my); | |
3412 mx += get_se_golomb(&s->gb); | |
3413 my += get_se_golomb(&s->gb); | |
3414 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | |
3415 | |
3416 val= pack16to32(mx,my); | |
3417 }else | |
3418 val=0; | |
3419 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4); | |
3420 } | |
3421 } | |
3422 }else{ | |
3423 assert(IS_8X16(mb_type)); | |
3424 for(list=0; list<h->list_count; list++){ | |
3425 for(i=0; i<2; i++){ | |
3426 unsigned int val; | |
3427 if(IS_DIR(mb_type, i, list)){ //FIXME optimize | |
3428 if(h->ref_count[list]==1){ | |
3429 val= 0; | |
3430 }else if(h->ref_count[list]==2){ | |
3431 val= get_bits1(&s->gb)^1; | |
3432 }else{ | |
3433 val= get_ue_golomb_31(&s->gb); | |
3434 if(val >= h->ref_count[list]){ | |
3435 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); | |
3436 return -1; | |
3437 } | |
3438 } | |
3439 }else | |
3440 val= LIST_NOT_USED&0xFF; | |
3441 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1); | |
3442 } | |
3443 } | |
3444 for(list=0; list<h->list_count; list++){ | |
3445 for(i=0; i<2; i++){ | |
3446 unsigned int val; | |
3447 if(IS_DIR(mb_type, i, list)){ | |
3448 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my); | |
3449 mx += get_se_golomb(&s->gb); | |
3450 my += get_se_golomb(&s->gb); | |
3451 tprintf(s->avctx, "final mv:%d %d\n", mx, my); | |
3452 | |
3453 val= pack16to32(mx,my); | |
3454 }else | |
3455 val=0; | |
3456 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4); | |
3457 } | |
3458 } | |
3459 } | |
3460 } | |
3461 | |
3462 if(IS_INTER(mb_type)) | |
3463 write_back_motion(h, mb_type); | |
3464 | |
3465 if(!IS_INTRA16x16(mb_type)){ | |
3466 cbp= get_ue_golomb(&s->gb); | |
3467 if(cbp > 47){ | |
3468 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y); | |
3469 return -1; | |
3470 } | |
3471 | |
3472 if(CHROMA){ | |
3473 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp]; | |
3474 else cbp= golomb_to_inter_cbp [cbp]; | |
3475 }else{ | |
3476 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp]; | |
3477 else cbp= golomb_to_inter_cbp_gray[cbp]; | |
3478 } | |
3479 } | |
3480 h->cbp = cbp; | |
3481 | |
3482 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){ | |
3483 if(get_bits1(&s->gb)){ | |
3484 mb_type |= MB_TYPE_8x8DCT; | |
3485 h->cbp_table[mb_xy]= cbp; | |
3486 } | |
3487 } | |
3488 s->current_picture.mb_type[mb_xy]= mb_type; | |
3489 | |
3490 if(cbp || IS_INTRA16x16(mb_type)){ | |
3491 int i8x8, i4x4, chroma_idx; | |
3492 int dquant; | |
3493 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; | |
3494 const uint8_t *scan, *scan8x8, *dc_scan; | |
3495 | |
3496 // fill_non_zero_count_cache(h); | |
3497 | |
3498 if(IS_INTERLACED(mb_type)){ | |
3499 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; | |
3500 scan= s->qscale ? h->field_scan : h->field_scan_q0; | |
3501 dc_scan= luma_dc_field_scan; | |
3502 }else{ | |
3503 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; | |
3504 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; | |
3505 dc_scan= luma_dc_zigzag_scan; | |
3506 } | |
3507 | |
3508 dquant= get_se_golomb(&s->gb); | |
3509 | |
3510 if( dquant > 25 || dquant < -26 ){ | |
3511 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y); | |
3512 return -1; | |
3513 } | |
3514 | |
3515 s->qscale += dquant; | |
3516 if(((unsigned)s->qscale) > 51){ | |
3517 if(s->qscale<0) s->qscale+= 52; | |
3518 else s->qscale-= 52; | |
3519 } | |
3520 | |
3521 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale); | |
3522 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale); | |
3523 if(IS_INTRA16x16(mb_type)){ | |
3524 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){ | |
3525 return -1; //FIXME continue if partitioned and other return -1 too | |
3526 } | |
3527 | |
3528 assert((cbp&15) == 0 || (cbp&15) == 15); | |
3529 | |
3530 if(cbp&15){ | |
3531 for(i8x8=0; i8x8<4; i8x8++){ | |
3532 for(i4x4=0; i4x4<4; i4x4++){ | |
3533 const int index= i4x4 + 4*i8x8; | |
3534 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){ | |
3535 return -1; | |
3536 } | |
3537 } | |
3538 } | |
3539 }else{ | |
3540 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); | |
3541 } | |
3542 }else{ | |
3543 for(i8x8=0; i8x8<4; i8x8++){ | |
3544 if(cbp & (1<<i8x8)){ | |
3545 if(IS_8x8DCT(mb_type)){ | |
3546 DCTELEM *buf = &h->mb[64*i8x8]; | |
3547 uint8_t *nnz; | |
3548 for(i4x4=0; i4x4<4; i4x4++){ | |
3549 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4, | |
3550 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) | |
3551 return -1; | |
3552 } | |
3553 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; | |
3554 nnz[0] += nnz[1] + nnz[8] + nnz[9]; | |
3555 }else{ | |
3556 for(i4x4=0; i4x4<4; i4x4++){ | |
3557 const int index= i4x4 + 4*i8x8; | |
3558 | |
3559 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){ | |
3560 return -1; | |
3561 } | |
3562 } | |
3563 } | |
3564 }else{ | |
3565 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; | |
3566 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; | |
3567 } | |
3568 } | |
3569 } | |
3570 | |
3571 if(cbp&0x30){ | |
3572 for(chroma_idx=0; chroma_idx<2; chroma_idx++) | |
3573 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){ | |
3574 return -1; | |
3575 } | |
3576 } | |
3577 | |
3578 if(cbp&0x20){ | |
3579 for(chroma_idx=0; chroma_idx<2; chroma_idx++){ | |
3580 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]]; | |
3581 for(i4x4=0; i4x4<4; i4x4++){ | |
3582 const int index= 16 + 4*chroma_idx + i4x4; | |
3583 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){ | |
3584 return -1; | |
3585 } | |
3586 } | |
3587 } | |
3588 }else{ | |
3589 uint8_t * const nnz= &h->non_zero_count_cache[0]; | |
3590 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = | |
3591 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; | |
3592 } | |
3593 }else{ | |
3594 uint8_t * const nnz= &h->non_zero_count_cache[0]; | |
3595 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1); | |
3596 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = | |
3597 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; | |
3598 } | |
3599 s->current_picture.qscale_table[mb_xy]= s->qscale; | |
3600 write_back_non_zero_count(h); | |
3601 | |
3602 if(MB_MBAFF){ | |
3603 h->ref_count[0] >>= 1; | |
3604 h->ref_count[1] >>= 1; | |
3605 } | |
3606 | |
3607 return 0; | |
3608 } | |
3609 | 2176 |
3610 static int decode_cabac_field_decoding_flag(H264Context *h) { | 2177 static int decode_cabac_field_decoding_flag(H264Context *h) { |
3611 MpegEncContext * const s = &h->s; | 2178 MpegEncContext * const s = &h->s; |
3612 const int mb_x = s->mb_x; | 2179 const int mb_x = s->mb_x; |
3613 const int mb_y = s->mb_y & ~1; | 2180 const int mb_y = s->mb_y & ~1; |
4764 } | 3331 } |
4765 } | 3332 } |
4766 | 3333 |
4767 } else { | 3334 } else { |
4768 for(;;){ | 3335 for(;;){ |
4769 int ret = decode_mb_cavlc(h); | 3336 int ret = ff_h264_decode_mb_cavlc(h); |
4770 | 3337 |
4771 if(ret>=0) ff_h264_hl_decode_mb(h); | 3338 if(ret>=0) ff_h264_hl_decode_mb(h); |
4772 | 3339 |
4773 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ? | 3340 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ? |
4774 s->mb_y++; | 3341 s->mb_y++; |
4775 ret = decode_mb_cavlc(h); | 3342 ret = ff_h264_decode_mb_cavlc(h); |
4776 | 3343 |
4777 if(ret>=0) ff_h264_hl_decode_mb(h); | 3344 if(ret>=0) ff_h264_hl_decode_mb(h); |
4778 s->mb_y--; | 3345 s->mb_y--; |
4779 } | 3346 } |
4780 | 3347 |