comparison h264.c @ 10869:7101061bfa0f libavcodec

Split cabac decoding code out of h264.c. not slower according to benchmarks.
author michael
date Wed, 13 Jan 2010 02:35:36 +0000
parents d26e9b4d2ca1
children 12bdac3c245b
comparison
equal deleted inserted replaced
10868:13a84faba50d 10869:7101061bfa0f
2171 case FF_SI_TYPE: return 4; 2171 case FF_SI_TYPE: return 4;
2172 default: return -1; 2172 default: return -1;
2173 } 2173 }
2174 } 2174 }
2175 2175
2176
2177 static int decode_cabac_field_decoding_flag(H264Context *h) {
2178 MpegEncContext * const s = &h->s;
2179 const int mb_x = s->mb_x;
2180 const int mb_y = s->mb_y & ~1;
2181 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
2182 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
2183
2184 unsigned int ctx = 0;
2185
2186 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
2187 ctx += 1;
2188 }
2189 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
2190 ctx += 1;
2191 }
2192
2193 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
2194 }
2195
2196 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
2197 uint8_t *state= &h->cabac_state[ctx_base];
2198 int mb_type;
2199
2200 if(intra_slice){
2201 MpegEncContext * const s = &h->s;
2202 const int mba_xy = h->left_mb_xy[0];
2203 const int mbb_xy = h->top_mb_xy;
2204 int ctx=0;
2205 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
2206 ctx++;
2207 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
2208 ctx++;
2209 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
2210 return 0; /* I4x4 */
2211 state += 2;
2212 }else{
2213 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
2214 return 0; /* I4x4 */
2215 }
2216
2217 if( get_cabac_terminate( &h->cabac ) )
2218 return 25; /* PCM */
2219
2220 mb_type = 1; /* I16x16 */
2221 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
2222 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
2223 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
2224 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
2225 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
2226 return mb_type;
2227 }
2228
2229 static int decode_cabac_mb_type_b( H264Context *h ) {
2230 MpegEncContext * const s = &h->s;
2231
2232 const int mba_xy = h->left_mb_xy[0];
2233 const int mbb_xy = h->top_mb_xy;
2234 int ctx = 0;
2235 int bits;
2236 assert(h->slice_type_nos == FF_B_TYPE);
2237
2238 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
2239 ctx++;
2240 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
2241 ctx++;
2242
2243 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
2244 return 0; /* B_Direct_16x16 */
2245
2246 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
2247 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
2248 }
2249
2250 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
2251 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
2252 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
2253 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
2254 if( bits < 8 )
2255 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
2256 else if( bits == 13 ) {
2257 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
2258 } else if( bits == 14 )
2259 return 11; /* B_L1_L0_8x16 */
2260 else if( bits == 15 )
2261 return 22; /* B_8x8 */
2262
2263 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
2264 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
2265 }
2266
2267 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
2268 MpegEncContext * const s = &h->s;
2269 int mba_xy, mbb_xy;
2270 int ctx = 0;
2271
2272 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
2273 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
2274 mba_xy = mb_xy - 1;
2275 if( (mb_y&1)
2276 && h->slice_table[mba_xy] == h->slice_num
2277 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
2278 mba_xy += s->mb_stride;
2279 if( MB_FIELD ){
2280 mbb_xy = mb_xy - s->mb_stride;
2281 if( !(mb_y&1)
2282 && h->slice_table[mbb_xy] == h->slice_num
2283 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
2284 mbb_xy -= s->mb_stride;
2285 }else
2286 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
2287 }else{
2288 int mb_xy = h->mb_xy;
2289 mba_xy = mb_xy - 1;
2290 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
2291 }
2292
2293 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
2294 ctx++;
2295 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
2296 ctx++;
2297
2298 if( h->slice_type_nos == FF_B_TYPE )
2299 ctx += 13;
2300 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
2301 }
2302
2303 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
2304 int mode = 0;
2305
2306 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
2307 return pred_mode;
2308
2309 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
2310 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
2311 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
2312
2313 if( mode >= pred_mode )
2314 return mode + 1;
2315 else
2316 return mode;
2317 }
2318
2319 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
2320 const int mba_xy = h->left_mb_xy[0];
2321 const int mbb_xy = h->top_mb_xy;
2322
2323 int ctx = 0;
2324
2325 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
2326 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
2327 ctx++;
2328
2329 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
2330 ctx++;
2331
2332 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
2333 return 0;
2334
2335 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
2336 return 1;
2337 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
2338 return 2;
2339 else
2340 return 3;
2341 }
2342
2343 static int decode_cabac_mb_cbp_luma( H264Context *h) {
2344 int cbp_b, cbp_a, ctx, cbp = 0;
2345
2346 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
2347 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
2348
2349 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
2350 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
2351 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
2352 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
2353 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
2354 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
2355 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
2356 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
2357 return cbp;
2358 }
2359 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
2360 int ctx;
2361 int cbp_a, cbp_b;
2362
2363 cbp_a = (h->left_cbp>>4)&0x03;
2364 cbp_b = (h-> top_cbp>>4)&0x03;
2365
2366 ctx = 0;
2367 if( cbp_a > 0 ) ctx++;
2368 if( cbp_b > 0 ) ctx += 2;
2369 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
2370 return 0;
2371
2372 ctx = 4;
2373 if( cbp_a == 2 ) ctx++;
2374 if( cbp_b == 2 ) ctx += 2;
2375 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
2376 }
2377 static int decode_cabac_mb_dqp( H264Context *h) {
2378 int ctx= h->last_qscale_diff != 0;
2379 int val = 0;
2380
2381 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
2382 ctx= 2+(ctx>>1);
2383 val++;
2384 if(val > 102) //prevent infinite loop
2385 return INT_MIN;
2386 }
2387
2388 if( val&0x01 )
2389 return (val + 1)>>1 ;
2390 else
2391 return -((val + 1)>>1);
2392 }
2393 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
2394 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
2395 return 0; /* 8x8 */
2396 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
2397 return 1; /* 8x4 */
2398 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
2399 return 2; /* 4x8 */
2400 return 3; /* 4x4 */
2401 }
2402 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
2403 int type;
2404 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
2405 return 0; /* B_Direct_8x8 */
2406 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
2407 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
2408 type = 3;
2409 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
2410 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
2411 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
2412 type += 4;
2413 }
2414 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
2415 type += get_cabac( &h->cabac, &h->cabac_state[39] );
2416 return type;
2417 }
2418
2419 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
2420 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
2421 }
2422
2423 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
2424 int refa = h->ref_cache[list][scan8[n] - 1];
2425 int refb = h->ref_cache[list][scan8[n] - 8];
2426 int ref = 0;
2427 int ctx = 0;
2428
2429 if( h->slice_type_nos == FF_B_TYPE) {
2430 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
2431 ctx++;
2432 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
2433 ctx += 2;
2434 } else {
2435 if( refa > 0 )
2436 ctx++;
2437 if( refb > 0 )
2438 ctx += 2;
2439 }
2440
2441 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
2442 ref++;
2443 ctx = (ctx>>2)+4;
2444 if(ref >= 32 /*h->ref_list[list]*/){
2445 return -1;
2446 }
2447 }
2448 return ref;
2449 }
2450
2451 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
2452 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
2453 abs( h->mvd_cache[list][scan8[n] - 8][l] );
2454 int ctxbase = (l == 0) ? 40 : 47;
2455 int mvd;
2456 int ctx = (amvd>2) + (amvd>32);
2457
2458 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
2459 return 0;
2460
2461 mvd= 1;
2462 ctx= 3;
2463 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
2464 mvd++;
2465 if( ctx < 6 )
2466 ctx++;
2467 }
2468
2469 if( mvd >= 9 ) {
2470 int k = 3;
2471 while( get_cabac_bypass( &h->cabac ) ) {
2472 mvd += 1 << k;
2473 k++;
2474 if(k>24){
2475 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
2476 return INT_MIN;
2477 }
2478 }
2479 while( k-- ) {
2480 if( get_cabac_bypass( &h->cabac ) )
2481 mvd += 1 << k;
2482 }
2483 }
2484 return get_cabac_bypass_sign( &h->cabac, -mvd );
2485 }
2486
2487 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
2488 int nza, nzb;
2489 int ctx = 0;
2490
2491 if( is_dc ) {
2492 if( cat == 0 ) {
2493 nza = h->left_cbp&0x100;
2494 nzb = h-> top_cbp&0x100;
2495 } else {
2496 nza = (h->left_cbp>>(6+idx))&0x01;
2497 nzb = (h-> top_cbp>>(6+idx))&0x01;
2498 }
2499 } else {
2500 assert(cat == 1 || cat == 2 || cat == 4);
2501 nza = h->non_zero_count_cache[scan8[idx] - 1];
2502 nzb = h->non_zero_count_cache[scan8[idx] - 8];
2503 }
2504
2505 if( nza > 0 )
2506 ctx++;
2507
2508 if( nzb > 0 )
2509 ctx += 2;
2510
2511 return ctx + 4 * cat;
2512 }
2513
2514 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
2515 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2516 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2517 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
2518 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
2519 };
2520
2521 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
2522 static const int significant_coeff_flag_offset[2][6] = {
2523 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
2524 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
2525 };
2526 static const int last_coeff_flag_offset[2][6] = {
2527 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
2528 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
2529 };
2530 static const int coeff_abs_level_m1_offset[6] = {
2531 227+0, 227+10, 227+20, 227+30, 227+39, 426
2532 };
2533 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
2534 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
2535 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
2536 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
2537 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
2538 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
2539 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
2540 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
2541 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
2542 };
2543 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
2544 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
2545 * map node ctx => cabac ctx for level=1 */
2546 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
2547 /* map node ctx => cabac ctx for level>1 */
2548 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
2549 static const uint8_t coeff_abs_level_transition[2][8] = {
2550 /* update node ctx after decoding a level=1 */
2551 { 1, 2, 3, 3, 4, 5, 6, 7 },
2552 /* update node ctx after decoding a level>1 */
2553 { 4, 4, 4, 4, 5, 6, 7, 7 }
2554 };
2555
2556 int index[64];
2557
2558 int av_unused last;
2559 int coeff_count = 0;
2560 int node_ctx = 0;
2561
2562 uint8_t *significant_coeff_ctx_base;
2563 uint8_t *last_coeff_ctx_base;
2564 uint8_t *abs_level_m1_ctx_base;
2565
2566 #if !ARCH_X86
2567 #define CABAC_ON_STACK
2568 #endif
2569 #ifdef CABAC_ON_STACK
2570 #define CC &cc
2571 CABACContext cc;
2572 cc.range = h->cabac.range;
2573 cc.low = h->cabac.low;
2574 cc.bytestream= h->cabac.bytestream;
2575 #else
2576 #define CC &h->cabac
2577 #endif
2578
2579
2580 /* cat: 0-> DC 16x16 n = 0
2581 * 1-> AC 16x16 n = luma4x4idx
2582 * 2-> Luma4x4 n = luma4x4idx
2583 * 3-> DC Chroma n = iCbCr
2584 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
2585 * 5-> Luma8x8 n = 4 * luma8x8idx
2586 */
2587
2588 /* read coded block flag */
2589 if( is_dc || cat != 5 ) {
2590 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
2591 if( !is_dc )
2592 h->non_zero_count_cache[scan8[n]] = 0;
2593
2594 #ifdef CABAC_ON_STACK
2595 h->cabac.range = cc.range ;
2596 h->cabac.low = cc.low ;
2597 h->cabac.bytestream= cc.bytestream;
2598 #endif
2599 return;
2600 }
2601 }
2602
2603 significant_coeff_ctx_base = h->cabac_state
2604 + significant_coeff_flag_offset[MB_FIELD][cat];
2605 last_coeff_ctx_base = h->cabac_state
2606 + last_coeff_flag_offset[MB_FIELD][cat];
2607 abs_level_m1_ctx_base = h->cabac_state
2608 + coeff_abs_level_m1_offset[cat];
2609
2610 if( !is_dc && cat == 5 ) {
2611 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
2612 for(last= 0; last < coefs; last++) { \
2613 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
2614 if( get_cabac( CC, sig_ctx )) { \
2615 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
2616 index[coeff_count++] = last; \
2617 if( get_cabac( CC, last_ctx ) ) { \
2618 last= max_coeff; \
2619 break; \
2620 } \
2621 } \
2622 }\
2623 if( last == max_coeff -1 ) {\
2624 index[coeff_count++] = last;\
2625 }
2626 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
2627 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
2628 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
2629 } else {
2630 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
2631 #else
2632 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
2633 } else {
2634 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
2635 #endif
2636 }
2637 assert(coeff_count > 0);
2638
2639 if( is_dc ) {
2640 if( cat == 0 )
2641 h->cbp_table[h->mb_xy] |= 0x100;
2642 else
2643 h->cbp_table[h->mb_xy] |= 0x40 << n;
2644 } else {
2645 if( cat == 5 )
2646 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
2647 else {
2648 assert( cat == 1 || cat == 2 || cat == 4 );
2649 h->non_zero_count_cache[scan8[n]] = coeff_count;
2650 }
2651 }
2652
2653 do {
2654 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
2655
2656 int j= scantable[index[--coeff_count]];
2657
2658 if( get_cabac( CC, ctx ) == 0 ) {
2659 node_ctx = coeff_abs_level_transition[0][node_ctx];
2660 if( is_dc ) {
2661 block[j] = get_cabac_bypass_sign( CC, -1);
2662 }else{
2663 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
2664 }
2665 } else {
2666 int coeff_abs = 2;
2667 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
2668 node_ctx = coeff_abs_level_transition[1][node_ctx];
2669
2670 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
2671 coeff_abs++;
2672 }
2673
2674 if( coeff_abs >= 15 ) {
2675 int j = 0;
2676 while( get_cabac_bypass( CC ) ) {
2677 j++;
2678 }
2679
2680 coeff_abs=1;
2681 while( j-- ) {
2682 coeff_abs += coeff_abs + get_cabac_bypass( CC );
2683 }
2684 coeff_abs+= 14;
2685 }
2686
2687 if( is_dc ) {
2688 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
2689 }else{
2690 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
2691 }
2692 }
2693 } while( coeff_count );
2694 #ifdef CABAC_ON_STACK
2695 h->cabac.range = cc.range ;
2696 h->cabac.low = cc.low ;
2697 h->cabac.bytestream= cc.bytestream;
2698 #endif
2699
2700 }
2701
2702 #if !CONFIG_SMALL
2703 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
2704 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
2705 }
2706
2707 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
2708 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
2709 }
2710 #endif
2711
2712 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
2713 #if CONFIG_SMALL
2714 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
2715 #else
2716 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
2717 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
2718 #endif
2719 }
2720
2721 static inline void compute_mb_neighbors(H264Context *h)
2722 {
2723 MpegEncContext * const s = &h->s;
2724 const int mb_xy = h->mb_xy;
2725 h->top_mb_xy = mb_xy - s->mb_stride;
2726 h->left_mb_xy[0] = mb_xy - 1;
2727 if(FRAME_MBAFF){
2728 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
2729 const int top_pair_xy = pair_xy - s->mb_stride;
2730 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
2731 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
2732 const int curr_mb_field_flag = MB_FIELD;
2733 const int bottom = (s->mb_y & 1);
2734
2735 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
2736 h->top_mb_xy -= s->mb_stride;
2737 }
2738 if (!left_mb_field_flag == curr_mb_field_flag) {
2739 h->left_mb_xy[0] = pair_xy - 1;
2740 }
2741 } else if (FIELD_PICTURE) {
2742 h->top_mb_xy -= s->mb_stride;
2743 }
2744 return;
2745 }
2746
2747 /**
2748 * decodes a macroblock
2749 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
2750 */
2751 static int decode_mb_cabac(H264Context *h) {
2752 MpegEncContext * const s = &h->s;
2753 int mb_xy;
2754 int mb_type, partition_count, cbp = 0;
2755 int dct8x8_allowed= h->pps.transform_8x8_mode;
2756
2757 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2758
2759 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
2760 if( h->slice_type_nos != FF_I_TYPE ) {
2761 int skip;
2762 /* a skipped mb needs the aff flag from the following mb */
2763 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
2764 predict_field_decoding_flag(h);
2765 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
2766 skip = h->next_mb_skipped;
2767 else
2768 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
2769 /* read skip flags */
2770 if( skip ) {
2771 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
2772 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
2773 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
2774 if(!h->next_mb_skipped)
2775 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
2776 }
2777
2778 decode_mb_skip(h);
2779
2780 h->cbp_table[mb_xy] = 0;
2781 h->chroma_pred_mode_table[mb_xy] = 0;
2782 h->last_qscale_diff = 0;
2783
2784 return 0;
2785
2786 }
2787 }
2788 if(FRAME_MBAFF){
2789 if( (s->mb_y&1) == 0 )
2790 h->mb_mbaff =
2791 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
2792 }
2793
2794 h->prev_mb_skipped = 0;
2795
2796 compute_mb_neighbors(h);
2797
2798 if( h->slice_type_nos == FF_B_TYPE ) {
2799 mb_type = decode_cabac_mb_type_b( h );
2800 if( mb_type < 23 ){
2801 partition_count= b_mb_type_info[mb_type].partition_count;
2802 mb_type= b_mb_type_info[mb_type].type;
2803 }else{
2804 mb_type -= 23;
2805 goto decode_intra_mb;
2806 }
2807 } else if( h->slice_type_nos == FF_P_TYPE ) {
2808 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
2809 /* P-type */
2810 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
2811 /* P_L0_D16x16, P_8x8 */
2812 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
2813 } else {
2814 /* P_L0_D8x16, P_L0_D16x8 */
2815 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
2816 }
2817 partition_count= p_mb_type_info[mb_type].partition_count;
2818 mb_type= p_mb_type_info[mb_type].type;
2819 } else {
2820 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
2821 goto decode_intra_mb;
2822 }
2823 } else {
2824 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
2825 if(h->slice_type == FF_SI_TYPE && mb_type)
2826 mb_type--;
2827 assert(h->slice_type_nos == FF_I_TYPE);
2828 decode_intra_mb:
2829 partition_count = 0;
2830 cbp= i_mb_type_info[mb_type].cbp;
2831 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
2832 mb_type= i_mb_type_info[mb_type].type;
2833 }
2834 if(MB_FIELD)
2835 mb_type |= MB_TYPE_INTERLACED;
2836
2837 h->slice_table[ mb_xy ]= h->slice_num;
2838
2839 if(IS_INTRA_PCM(mb_type)) {
2840 const uint8_t *ptr;
2841
2842 // We assume these blocks are very rare so we do not optimize it.
2843 // FIXME The two following lines get the bitstream position in the cabac
2844 // decode, I think it should be done by a function in cabac.h (or cabac.c).
2845 ptr= h->cabac.bytestream;
2846 if(h->cabac.low&0x1) ptr--;
2847 if(CABAC_BITS==16){
2848 if(h->cabac.low&0x1FF) ptr--;
2849 }
2850
2851 // The pixels are stored in the same order as levels in h->mb array.
2852 memcpy(h->mb, ptr, 256); ptr+=256;
2853 if(CHROMA){
2854 memcpy(h->mb+128, ptr, 128); ptr+=128;
2855 }
2856
2857 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
2858
2859 // All blocks are present
2860 h->cbp_table[mb_xy] = 0x1ef;
2861 h->chroma_pred_mode_table[mb_xy] = 0;
2862 // In deblocking, the quantizer is 0
2863 s->current_picture.qscale_table[mb_xy]= 0;
2864 // All coeffs are present
2865 memset(h->non_zero_count[mb_xy], 16, 16);
2866 s->current_picture.mb_type[mb_xy]= mb_type;
2867 h->last_qscale_diff = 0;
2868 return 0;
2869 }
2870
2871 if(MB_MBAFF){
2872 h->ref_count[0] <<= 1;
2873 h->ref_count[1] <<= 1;
2874 }
2875
2876 fill_caches(h, mb_type, 0);
2877
2878 if( IS_INTRA( mb_type ) ) {
2879 int i, pred_mode;
2880 if( IS_INTRA4x4( mb_type ) ) {
2881 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
2882 mb_type |= MB_TYPE_8x8DCT;
2883 for( i = 0; i < 16; i+=4 ) {
2884 int pred = pred_intra_mode( h, i );
2885 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
2886 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
2887 }
2888 } else {
2889 for( i = 0; i < 16; i++ ) {
2890 int pred = pred_intra_mode( h, i );
2891 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
2892
2893 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
2894 }
2895 }
2896 ff_h264_write_back_intra_pred_mode(h);
2897 if( ff_h264_check_intra4x4_pred_mode(h) < 0 ) return -1;
2898 } else {
2899 h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode( h, h->intra16x16_pred_mode );
2900 if( h->intra16x16_pred_mode < 0 ) return -1;
2901 }
2902 if(CHROMA){
2903 h->chroma_pred_mode_table[mb_xy] =
2904 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
2905
2906 pred_mode= ff_h264_check_intra_pred_mode( h, pred_mode );
2907 if( pred_mode < 0 ) return -1;
2908 h->chroma_pred_mode= pred_mode;
2909 }
2910 } else if( partition_count == 4 ) {
2911 int i, j, sub_partition_count[4], list, ref[2][4];
2912
2913 if( h->slice_type_nos == FF_B_TYPE ) {
2914 for( i = 0; i < 4; i++ ) {
2915 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
2916 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
2917 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
2918 }
2919 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
2920 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
2921 ff_h264_pred_direct_motion(h, &mb_type);
2922 h->ref_cache[0][scan8[4]] =
2923 h->ref_cache[1][scan8[4]] =
2924 h->ref_cache[0][scan8[12]] =
2925 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
2926 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
2927 for( i = 0; i < 4; i++ )
2928 if( IS_DIRECT(h->sub_mb_type[i]) )
2929 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
2930 }
2931 }
2932 } else {
2933 for( i = 0; i < 4; i++ ) {
2934 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
2935 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
2936 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
2937 }
2938 }
2939
2940 for( list = 0; list < h->list_count; list++ ) {
2941 for( i = 0; i < 4; i++ ) {
2942 if(IS_DIRECT(h->sub_mb_type[i])) continue;
2943 if(IS_DIR(h->sub_mb_type[i], 0, list)){
2944 if( h->ref_count[list] > 1 ){
2945 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
2946 if(ref[list][i] >= (unsigned)h->ref_count[list]){
2947 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
2948 return -1;
2949 }
2950 }else
2951 ref[list][i] = 0;
2952 } else {
2953 ref[list][i] = -1;
2954 }
2955 h->ref_cache[list][ scan8[4*i]+1 ]=
2956 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
2957 }
2958 }
2959
2960 if(dct8x8_allowed)
2961 dct8x8_allowed = get_dct8x8_allowed(h);
2962
2963 for(list=0; list<h->list_count; list++){
2964 for(i=0; i<4; i++){
2965 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
2966 if(IS_DIRECT(h->sub_mb_type[i])){
2967 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
2968 continue;
2969 }
2970
2971 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
2972 const int sub_mb_type= h->sub_mb_type[i];
2973 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
2974 for(j=0; j<sub_partition_count[i]; j++){
2975 int mpx, mpy;
2976 int mx, my;
2977 const int index= 4*i + block_width*j;
2978 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
2979 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
2980 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
2981
2982 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
2983 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
2984 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
2985
2986 if(IS_SUB_8X8(sub_mb_type)){
2987 mv_cache[ 1 ][0]=
2988 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
2989 mv_cache[ 1 ][1]=
2990 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
2991
2992 mvd_cache[ 1 ][0]=
2993 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
2994 mvd_cache[ 1 ][1]=
2995 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
2996 }else if(IS_SUB_8X4(sub_mb_type)){
2997 mv_cache[ 1 ][0]= mx;
2998 mv_cache[ 1 ][1]= my;
2999
3000 mvd_cache[ 1 ][0]= mx - mpx;
3001 mvd_cache[ 1 ][1]= my - mpy;
3002 }else if(IS_SUB_4X8(sub_mb_type)){
3003 mv_cache[ 8 ][0]= mx;
3004 mv_cache[ 8 ][1]= my;
3005
3006 mvd_cache[ 8 ][0]= mx - mpx;
3007 mvd_cache[ 8 ][1]= my - mpy;
3008 }
3009 mv_cache[ 0 ][0]= mx;
3010 mv_cache[ 0 ][1]= my;
3011
3012 mvd_cache[ 0 ][0]= mx - mpx;
3013 mvd_cache[ 0 ][1]= my - mpy;
3014 }
3015 }else{
3016 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
3017 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
3018 p[0] = p[1] = p[8] = p[9] = 0;
3019 pd[0]= pd[1]= pd[8]= pd[9]= 0;
3020 }
3021 }
3022 }
3023 } else if( IS_DIRECT(mb_type) ) {
3024 ff_h264_pred_direct_motion(h, &mb_type);
3025 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
3026 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
3027 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
3028 } else {
3029 int list, mx, my, i, mpx, mpy;
3030 if(IS_16X16(mb_type)){
3031 for(list=0; list<h->list_count; list++){
3032 if(IS_DIR(mb_type, 0, list)){
3033 int ref;
3034 if(h->ref_count[list] > 1){
3035 ref= decode_cabac_mb_ref(h, list, 0);
3036 if(ref >= (unsigned)h->ref_count[list]){
3037 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
3038 return -1;
3039 }
3040 }else
3041 ref=0;
3042 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
3043 }else
3044 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
3045 }
3046 for(list=0; list<h->list_count; list++){
3047 if(IS_DIR(mb_type, 0, list)){
3048 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
3049
3050 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
3051 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
3052 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
3053
3054 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
3055 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
3056 }else
3057 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
3058 }
3059 }
3060 else if(IS_16X8(mb_type)){
3061 for(list=0; list<h->list_count; list++){
3062 for(i=0; i<2; i++){
3063 if(IS_DIR(mb_type, i, list)){
3064 int ref;
3065 if(h->ref_count[list] > 1){
3066 ref= decode_cabac_mb_ref( h, list, 8*i );
3067 if(ref >= (unsigned)h->ref_count[list]){
3068 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
3069 return -1;
3070 }
3071 }else
3072 ref=0;
3073 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
3074 }else
3075 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
3076 }
3077 }
3078 for(list=0; list<h->list_count; list++){
3079 for(i=0; i<2; i++){
3080 if(IS_DIR(mb_type, i, list)){
3081 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
3082 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
3083 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
3084 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
3085
3086 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
3087 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
3088 }else{
3089 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
3090 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
3091 }
3092 }
3093 }
3094 }else{
3095 assert(IS_8X16(mb_type));
3096 for(list=0; list<h->list_count; list++){
3097 for(i=0; i<2; i++){
3098 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
3099 int ref;
3100 if(h->ref_count[list] > 1){
3101 ref= decode_cabac_mb_ref( h, list, 4*i );
3102 if(ref >= (unsigned)h->ref_count[list]){
3103 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
3104 return -1;
3105 }
3106 }else
3107 ref=0;
3108 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
3109 }else
3110 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
3111 }
3112 }
3113 for(list=0; list<h->list_count; list++){
3114 for(i=0; i<2; i++){
3115 if(IS_DIR(mb_type, i, list)){
3116 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
3117 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
3118 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
3119
3120 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
3121 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
3122 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
3123 }else{
3124 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
3125 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
3126 }
3127 }
3128 }
3129 }
3130 }
3131
3132 if( IS_INTER( mb_type ) ) {
3133 h->chroma_pred_mode_table[mb_xy] = 0;
3134 write_back_motion( h, mb_type );
3135 }
3136
3137 if( !IS_INTRA16x16( mb_type ) ) {
3138 cbp = decode_cabac_mb_cbp_luma( h );
3139 if(CHROMA)
3140 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
3141 }
3142
3143 h->cbp_table[mb_xy] = h->cbp = cbp;
3144
3145 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
3146 if( decode_cabac_mb_transform_size( h ) )
3147 mb_type |= MB_TYPE_8x8DCT;
3148 }
3149 s->current_picture.mb_type[mb_xy]= mb_type;
3150
3151 if( cbp || IS_INTRA16x16( mb_type ) ) {
3152 const uint8_t *scan, *scan8x8, *dc_scan;
3153 const uint32_t *qmul;
3154 int dqp;
3155
3156 if(IS_INTERLACED(mb_type)){
3157 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
3158 scan= s->qscale ? h->field_scan : h->field_scan_q0;
3159 dc_scan= luma_dc_field_scan;
3160 }else{
3161 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
3162 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
3163 dc_scan= luma_dc_zigzag_scan;
3164 }
3165
3166 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
3167 if( dqp == INT_MIN ){
3168 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
3169 return -1;
3170 }
3171 s->qscale += dqp;
3172 if(((unsigned)s->qscale) > 51){
3173 if(s->qscale<0) s->qscale+= 52;
3174 else s->qscale-= 52;
3175 }
3176 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3177 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3178
3179 if( IS_INTRA16x16( mb_type ) ) {
3180 int i;
3181 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
3182 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
3183
3184 if( cbp&15 ) {
3185 qmul = h->dequant4_coeff[0][s->qscale];
3186 for( i = 0; i < 16; i++ ) {
3187 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
3188 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
3189 }
3190 } else {
3191 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
3192 }
3193 } else {
3194 int i8x8, i4x4;
3195 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
3196 if( cbp & (1<<i8x8) ) {
3197 if( IS_8x8DCT(mb_type) ) {
3198 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
3199 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
3200 } else {
3201 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
3202 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
3203 const int index = 4*i8x8 + i4x4;
3204 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
3205 //START_TIMER
3206 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
3207 //STOP_TIMER("decode_residual")
3208 }
3209 }
3210 } else {
3211 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
3212 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
3213 }
3214 }
3215 }
3216
3217 if( cbp&0x30 ){
3218 int c;
3219 for( c = 0; c < 2; c++ ) {
3220 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
3221 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
3222 }
3223 }
3224
3225 if( cbp&0x20 ) {
3226 int c, i;
3227 for( c = 0; c < 2; c++ ) {
3228 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
3229 for( i = 0; i < 4; i++ ) {
3230 const int index = 16 + 4 * c + i;
3231 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
3232 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
3233 }
3234 }
3235 } else {
3236 uint8_t * const nnz= &h->non_zero_count_cache[0];
3237 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
3238 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
3239 }
3240 } else {
3241 uint8_t * const nnz= &h->non_zero_count_cache[0];
3242 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
3243 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
3244 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
3245 h->last_qscale_diff = 0;
3246 }
3247
3248 s->current_picture.qscale_table[mb_xy]= s->qscale;
3249 write_back_non_zero_count(h);
3250
3251 if(MB_MBAFF){
3252 h->ref_count[0] >>= 1;
3253 h->ref_count[1] >>= 1;
3254 }
3255
3256 return 0;
3257 }
3258
3259 static int decode_slice(struct AVCodecContext *avctx, void *arg){ 2176 static int decode_slice(struct AVCodecContext *avctx, void *arg){
3260 H264Context *h = *(void**)arg; 2177 H264Context *h = *(void**)arg;
3261 MpegEncContext * const s = &h->s; 2178 MpegEncContext * const s = &h->s;
3262 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; 2179 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
3263 2180
3275 /* init cabac */ 2192 /* init cabac */
3276 ff_init_cabac_states( &h->cabac); 2193 ff_init_cabac_states( &h->cabac);
3277 ff_init_cabac_decoder( &h->cabac, 2194 ff_init_cabac_decoder( &h->cabac,
3278 s->gb.buffer + get_bits_count(&s->gb)/8, 2195 s->gb.buffer + get_bits_count(&s->gb)/8,
3279 (get_bits_left(&s->gb) + 7)/8); 2196 (get_bits_left(&s->gb) + 7)/8);
3280 /* calculate pre-state */ 2197
3281 for( i= 0; i < 460; i++ ) { 2198 ff_h264_init_cabac_states(h);
3282 int pre;
3283 if( h->slice_type_nos == FF_I_TYPE )
3284 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
3285 else
3286 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
3287
3288 if( pre <= 63 )
3289 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
3290 else
3291 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
3292 }
3293 2199
3294 for(;;){ 2200 for(;;){
3295 //START_TIMER 2201 //START_TIMER
3296 int ret = decode_mb_cabac(h); 2202 int ret = ff_h264_decode_mb_cabac(h);
3297 int eos; 2203 int eos;
3298 //STOP_TIMER("decode_mb_cabac") 2204 //STOP_TIMER("decode_mb_cabac")
3299 2205
3300 if(ret>=0) ff_h264_hl_decode_mb(h); 2206 if(ret>=0) ff_h264_hl_decode_mb(h);
3301 2207
3302 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? 2208 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
3303 s->mb_y++; 2209 s->mb_y++;
3304 2210
3305 ret = decode_mb_cabac(h); 2211 ret = ff_h264_decode_mb_cabac(h);
3306 2212
3307 if(ret>=0) ff_h264_hl_decode_mb(h); 2213 if(ret>=0) ff_h264_hl_decode_mb(h);
3308 s->mb_y--; 2214 s->mb_y--;
3309 } 2215 }
3310 eos = get_cabac_terminate( &h->cabac ); 2216 eos = get_cabac_terminate( &h->cabac );