Mercurial > libavcodec.hg
comparison snow.c @ 2993:cb0e26759cca libavcodec
iterative overlapped block based motion estimation for snow
author | michael |
---|---|
date | Wed, 28 Dec 2005 15:43:53 +0000 |
parents | bfabfdf9ce55 |
children | 657e8546090d |
comparison
equal
deleted
inserted
replaced
2992:f74ae8aff2a9 | 2993:cb0e26759cca |
---|---|
381 int16_t my; | 381 int16_t my; |
382 uint8_t color[3]; | 382 uint8_t color[3]; |
383 uint8_t type; | 383 uint8_t type; |
384 //#define TYPE_SPLIT 1 | 384 //#define TYPE_SPLIT 1 |
385 #define BLOCK_INTRA 1 | 385 #define BLOCK_INTRA 1 |
386 #define BLOCK_OPT 2 | |
386 //#define TYPE_NOCOLOR 4 | 387 //#define TYPE_NOCOLOR 4 |
387 uint8_t level; //FIXME merge into type? | 388 uint8_t level; //FIXME merge into type? |
388 }BlockNode; | 389 }BlockNode; |
390 | |
391 static const BlockNode null_block= { //FIXME add border maybe | |
392 .color= {128,128,128}, | |
393 .mx= 0, | |
394 .my= 0, | |
395 .type= 0, | |
396 .level= 0, | |
397 }; | |
389 | 398 |
390 #define LOG2_MB_SIZE 4 | 399 #define LOG2_MB_SIZE 4 |
391 #define MB_SIZE (1<<LOG2_MB_SIZE) | 400 #define MB_SIZE (1<<LOG2_MB_SIZE) |
392 | 401 |
393 typedef struct x_and_coeff{ | 402 typedef struct x_and_coeff{ |
431 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) | 440 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) |
432 | 441 |
433 AVCodecContext *avctx; | 442 AVCodecContext *avctx; |
434 RangeCoder c; | 443 RangeCoder c; |
435 DSPContext dsp; | 444 DSPContext dsp; |
436 AVFrame input_picture; | 445 AVFrame new_picture; |
446 AVFrame input_picture; ///< new_picture with the internal linesizes | |
437 AVFrame current_picture; | 447 AVFrame current_picture; |
438 AVFrame last_picture; | 448 AVFrame last_picture; |
439 AVFrame mconly_picture; | 449 AVFrame mconly_picture; |
440 // uint8_t q_context[16]; | 450 // uint8_t q_context[16]; |
441 uint8_t header_state[32]; | 451 uint8_t header_state[32]; |
461 int b_width; | 471 int b_width; |
462 int b_height; | 472 int b_height; |
463 int block_max_depth; | 473 int block_max_depth; |
464 Plane plane[MAX_PLANES]; | 474 Plane plane[MAX_PLANES]; |
465 BlockNode *block; | 475 BlockNode *block; |
476 #define ME_CACHE_SIZE 1024 | |
477 int me_cache[ME_CACHE_SIZE]; | |
478 int me_cache_generation; | |
466 slice_buffer sb; | 479 slice_buffer sb; |
467 | 480 |
468 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) | 481 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) |
469 }SnowContext; | 482 }SnowContext; |
470 | 483 |
476 int y; | 489 int y; |
477 } dwt_compose_t; | 490 } dwt_compose_t; |
478 | 491 |
479 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) | 492 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
480 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) | 493 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) |
494 | |
495 static void iterative_me(SnowContext *s); | |
481 | 496 |
482 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer) | 497 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer) |
483 { | 498 { |
484 int i; | 499 int i; |
485 | 500 |
2049 const int w= s->b_width << s->block_max_depth; | 2064 const int w= s->b_width << s->block_max_depth; |
2050 const int h= s->b_height << s->block_max_depth; | 2065 const int h= s->b_height << s->block_max_depth; |
2051 const int rem_depth= s->block_max_depth - level; | 2066 const int rem_depth= s->block_max_depth - level; |
2052 const int index= (x + y*w) << rem_depth; | 2067 const int index= (x + y*w) << rem_depth; |
2053 const int block_w= 1<<(LOG2_MB_SIZE - level); | 2068 const int block_w= 1<<(LOG2_MB_SIZE - level); |
2054 static BlockNode null_block= { //FIXME add border maybe | |
2055 .color= {128,128,128}, | |
2056 .mx= 0, | |
2057 .my= 0, | |
2058 .type= 0, | |
2059 .level= 0, | |
2060 }; | |
2061 int trx= (x+1)<<rem_depth; | 2069 int trx= (x+1)<<rem_depth; |
2062 int try= (y+1)<<rem_depth; | 2070 int try= (y+1)<<rem_depth; |
2063 BlockNode *left = x ? &s->block[index-1] : &null_block; | 2071 BlockNode *left = x ? &s->block[index-1] : &null_block; |
2064 BlockNode *top = y ? &s->block[index-w] : &null_block; | 2072 BlockNode *top = y ? &s->block[index-w] : &null_block; |
2065 BlockNode *right = trx<w ? &s->block[index+1] : &null_block; | 2073 BlockNode *right = trx<w ? &s->block[index+1] : &null_block; |
2070 int pcb= left->color[1]; | 2078 int pcb= left->color[1]; |
2071 int pcr= left->color[2]; | 2079 int pcr= left->color[2]; |
2072 int pmx= mid_pred(left->mx, top->mx, tr->mx); | 2080 int pmx= mid_pred(left->mx, top->mx, tr->mx); |
2073 int pmy= mid_pred(left->my, top->my, tr->my); | 2081 int pmy= mid_pred(left->my, top->my, tr->my); |
2074 int mx=0, my=0; | 2082 int mx=0, my=0; |
2075 int l,cr,cb, i; | 2083 int l,cr,cb; |
2076 const int stride= s->current_picture.linesize[0]; | 2084 const int stride= s->current_picture.linesize[0]; |
2077 const int uvstride= s->current_picture.linesize[1]; | 2085 const int uvstride= s->current_picture.linesize[1]; |
2078 const int instride= s->input_picture.linesize[0]; | 2086 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, |
2079 const int uvinstride= s->input_picture.linesize[1]; | 2087 s->input_picture.data[1] + (x + y*uvstride)*block_w/2, |
2080 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w; | 2088 s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; |
2081 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2; | |
2082 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2; | |
2083 uint8_t current_mb[3][stride*block_w]; | |
2084 uint8_t *current_data[3]= {¤t_mb[0][0], ¤t_mb[1][0], ¤t_mb[2][0]}; | |
2085 int P[10][2]; | 2089 int P[10][2]; |
2086 int16_t last_mv[3][2]; | 2090 int16_t last_mv[3][2]; |
2087 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused | 2091 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused |
2088 const int shift= 1+qpel; | 2092 const int shift= 1+qpel; |
2089 MotionEstContext *c= &s->m.me; | 2093 MotionEstContext *c= &s->m.me; |
2094 assert(sizeof(s->block_state) >= 256); | 2098 assert(sizeof(s->block_state) >= 256); |
2095 if(s->keyframe){ | 2099 if(s->keyframe){ |
2096 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); | 2100 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); |
2097 return 0; | 2101 return 0; |
2098 } | 2102 } |
2099 | |
2100 //FIXME optimize | |
2101 for(i=0; i<block_w; i++) | |
2102 memcpy(¤t_mb[0][0] + stride*i, new_l + instride*i, block_w); | |
2103 for(i=0; i<block_w>>1; i++) | |
2104 memcpy(¤t_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1); | |
2105 for(i=0; i<block_w>>1; i++) | |
2106 memcpy(¤t_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1); | |
2107 | 2103 |
2108 // clip predictors / edge ? | 2104 // clip predictors / edge ? |
2109 | 2105 |
2110 P_LEFT[0]= left->mx; | 2106 P_LEFT[0]= left->mx; |
2111 P_LEFT[1]= left->my; | 2107 P_LEFT[1]= left->my; |
2188 + (pc.outstanding_count - s->c.outstanding_count)*8 | 2184 + (pc.outstanding_count - s->c.outstanding_count)*8 |
2189 + (-av_log2(pc.range) + av_log2(s->c.range)) | 2185 + (-av_log2(pc.range) + av_log2(s->c.range)) |
2190 ))>>FF_LAMBDA_SHIFT; | 2186 ))>>FF_LAMBDA_SHIFT; |
2191 | 2187 |
2192 block_s= block_w*block_w; | 2188 block_s= block_w*block_w; |
2193 sum = pix_sum(¤t_mb[0][0], stride, block_w); | 2189 sum = pix_sum(current_data[0], stride, block_w); |
2194 l= (sum + block_s/2)/block_s; | 2190 l= (sum + block_s/2)/block_s; |
2195 iscore = pix_norm1(¤t_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s; | 2191 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; |
2196 | 2192 |
2197 block_s= block_w*block_w>>2; | 2193 block_s= block_w*block_w>>2; |
2198 sum = pix_sum(¤t_mb[1][0], uvstride, block_w>>1); | 2194 sum = pix_sum(current_data[1], uvstride, block_w>>1); |
2199 cb= (sum + block_s/2)/block_s; | 2195 cb= (sum + block_s/2)/block_s; |
2200 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; | 2196 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; |
2201 sum = pix_sum(¤t_mb[2][0], uvstride, block_w>>1); | 2197 sum = pix_sum(current_data[2], uvstride, block_w>>1); |
2202 cr= (sum + block_s/2)/block_s; | 2198 cr= (sum + block_s/2)/block_s; |
2203 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; | 2199 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; |
2204 | 2200 |
2205 ic= s->c; | 2201 ic= s->c; |
2206 ic.bytestream_start= | 2202 ic.bytestream_start= |
2262 memcpy(s->block_state, p_state, sizeof(s->block_state)); | 2258 memcpy(s->block_state, p_state, sizeof(s->block_state)); |
2263 return score; | 2259 return score; |
2264 } | 2260 } |
2265 } | 2261 } |
2266 | 2262 |
2263 static always_inline int same_block(BlockNode *a, BlockNode *b){ | |
2264 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ | |
2265 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); | |
2266 }else{ | |
2267 return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA)); | |
2268 } | |
2269 } | |
2270 | |
2271 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ | |
2272 const int w= s->b_width << s->block_max_depth; | |
2273 const int rem_depth= s->block_max_depth - level; | |
2274 const int index= (x + y*w) << rem_depth; | |
2275 int trx= (x+1)<<rem_depth; | |
2276 BlockNode *b= &s->block[index]; | |
2277 BlockNode *left = x ? &s->block[index-1] : &null_block; | |
2278 BlockNode *top = y ? &s->block[index-w] : &null_block; | |
2279 BlockNode *tl = y && x ? &s->block[index-w-1] : left; | |
2280 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt | |
2281 int pl = left->color[0]; | |
2282 int pcb= left->color[1]; | |
2283 int pcr= left->color[2]; | |
2284 int pmx= mid_pred(left->mx, top->mx, tr->mx); | |
2285 int pmy= mid_pred(left->my, top->my, tr->my); | |
2286 int mx_context= av_log2(2*ABS(left->mx - top->mx)); | |
2287 int my_context= av_log2(2*ABS(left->my - top->my)); | |
2288 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; | |
2289 | |
2290 if(s->keyframe){ | |
2291 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); | |
2292 return; | |
2293 } | |
2294 | |
2295 if(level!=s->block_max_depth){ | |
2296 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ | |
2297 put_rac(&s->c, &s->block_state[4 + s_context], 0); | |
2298 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); | |
2299 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); | |
2300 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); | |
2301 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); | |
2302 return; | |
2303 }else{ | |
2304 put_rac(&s->c, &s->block_state[4 + s_context], 1); | |
2305 } | |
2306 } | |
2307 if(b->type & BLOCK_INTRA){ | |
2308 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); | |
2309 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); | |
2310 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); | |
2311 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); | |
2312 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA); | |
2313 }else{ | |
2314 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); | |
2315 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); | |
2316 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); | |
2317 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0); | |
2318 } | |
2319 } | |
2320 | |
2267 static void decode_q_branch(SnowContext *s, int level, int x, int y){ | 2321 static void decode_q_branch(SnowContext *s, int level, int x, int y){ |
2268 const int w= s->b_width << s->block_max_depth; | 2322 const int w= s->b_width << s->block_max_depth; |
2269 const int rem_depth= s->block_max_depth - level; | 2323 const int rem_depth= s->block_max_depth - level; |
2270 const int index= (x + y*w) << rem_depth; | 2324 const int index= (x + y*w) << rem_depth; |
2271 static BlockNode null_block= { //FIXME add border maybe | |
2272 .color= {128,128,128}, | |
2273 .mx= 0, | |
2274 .my= 0, | |
2275 .type= 0, | |
2276 .level= 0, | |
2277 }; | |
2278 int trx= (x+1)<<rem_depth; | 2325 int trx= (x+1)<<rem_depth; |
2279 BlockNode *left = x ? &s->block[index-1] : &null_block; | 2326 BlockNode *left = x ? &s->block[index-1] : &null_block; |
2280 BlockNode *top = y ? &s->block[index-w] : &null_block; | 2327 BlockNode *top = y ? &s->block[index-w] : &null_block; |
2281 BlockNode *tl = y && x ? &s->block[index-w-1] : left; | 2328 BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
2282 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt | 2329 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2319 static void encode_blocks(SnowContext *s){ | 2366 static void encode_blocks(SnowContext *s){ |
2320 int x, y; | 2367 int x, y; |
2321 int w= s->b_width; | 2368 int w= s->b_width; |
2322 int h= s->b_height; | 2369 int h= s->b_height; |
2323 | 2370 |
2371 if(s->avctx->me_method == ME_ITER && !s->keyframe) | |
2372 iterative_me(s); | |
2373 | |
2324 for(y=0; y<h; y++){ | 2374 for(y=0; y<h; y++){ |
2325 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit | 2375 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit |
2326 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); | 2376 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
2327 return; | 2377 return; |
2328 } | 2378 } |
2329 for(x=0; x<w; x++){ | 2379 for(x=0; x<w; x++){ |
2330 encode_q_branch(s, 0, x, y); | 2380 if(s->avctx->me_method == ME_ITER) |
2381 encode_q_branch2(s, 0, x, y); | |
2382 else | |
2383 encode_q_branch (s, 0, x, y); | |
2331 } | 2384 } |
2332 } | 2385 } |
2333 } | 2386 } |
2334 | 2387 |
2335 static void decode_blocks(SnowContext *s){ | 2388 static void decode_blocks(SnowContext *s){ |
2428 mca( 8, 0,8) | 2481 mca( 8, 0,8) |
2429 mca( 0, 8,8) | 2482 mca( 0, 8,8) |
2430 mca( 8, 8,8) | 2483 mca( 8, 8,8) |
2431 | 2484 |
2432 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ | 2485 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
2433 if(block->type){ | 2486 if(block->type & BLOCK_INTRA){ |
2434 int x, y; | 2487 int x, y; |
2435 const int color= block->color[plane_index]; | 2488 const int color= block->color[plane_index]; |
2436 for(y=0; y < b_h; y++){ | 2489 for(y=0; y < b_h; y++){ |
2437 for(x=0; x < b_w; x++){ | 2490 for(x=0; x < b_w; x++){ |
2438 dst[x + y*stride]= color; | 2491 dst[x + y*stride]= color; |
2455 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16)) | 2508 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16)) |
2456 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); | 2509 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); |
2457 else | 2510 else |
2458 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); | 2511 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); |
2459 } | 2512 } |
2460 } | |
2461 | |
2462 static always_inline int same_block(BlockNode *a, BlockNode *b){ | |
2463 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type); | |
2464 } | 2513 } |
2465 | 2514 |
2466 //FIXME name clenup (b_w, block_w, b_width stuff) | 2515 //FIXME name clenup (b_w, block_w, b_width stuff) |
2467 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ | 2516 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ |
2468 DWTELEM * dst = NULL; | 2517 DWTELEM * dst = NULL; |
2845 const int mb_h= s->b_height << s->block_max_depth; | 2894 const int mb_h= s->b_height << s->block_max_depth; |
2846 int x, y, mb_x; | 2895 int x, y, mb_x; |
2847 int block_size = MB_SIZE >> s->block_max_depth; | 2896 int block_size = MB_SIZE >> s->block_max_depth; |
2848 int block_w = plane_index ? block_size/2 : block_size; | 2897 int block_w = plane_index ? block_size/2 : block_size; |
2849 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | 2898 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
2850 int obmc_stride= plane_index ? block_size : 2*block_size; | 2899 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2851 int ref_stride= s->current_picture.linesize[plane_index]; | 2900 int ref_stride= s->current_picture.linesize[plane_index]; |
2852 uint8_t *ref = s->last_picture.data[plane_index]; | 2901 uint8_t *ref = s->last_picture.data[plane_index]; |
2853 uint8_t *dst8= s->current_picture.data[plane_index]; | 2902 uint8_t *dst8= s->current_picture.data[plane_index]; |
2854 int w= p->width; | 2903 int w= p->width; |
2855 int h= p->height; | 2904 int h= p->height; |
2900 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ | 2949 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ |
2901 const int mb_h= s->b_height << s->block_max_depth; | 2950 const int mb_h= s->b_height << s->block_max_depth; |
2902 int mb_y; | 2951 int mb_y; |
2903 for(mb_y=0; mb_y<=mb_h; mb_y++) | 2952 for(mb_y=0; mb_y<=mb_h; mb_y++) |
2904 predict_slice(s, buf, plane_index, add, mb_y); | 2953 predict_slice(s, buf, plane_index, add, mb_y); |
2954 } | |
2955 | |
2956 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ | |
2957 int i, x2, y2; | |
2958 Plane *p= &s->plane[plane_index]; | |
2959 const int block_size = MB_SIZE >> s->block_max_depth; | |
2960 const int block_w = plane_index ? block_size/2 : block_size; | |
2961 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2962 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
2963 const int ref_stride= s->current_picture.linesize[plane_index]; | |
2964 uint8_t *ref= s-> last_picture.data[plane_index]; | |
2965 uint8_t *dst= s->current_picture.data[plane_index]; | |
2966 uint8_t *src= s-> input_picture.data[plane_index]; | |
2967 const static DWTELEM zero_dst[4096]; //FIXME | |
2968 const int b_stride = s->b_width << s->block_max_depth; | |
2969 const int w= p->width; | |
2970 const int h= p->height; | |
2971 int index= mb_x + mb_y*b_stride; | |
2972 BlockNode *b= &s->block[index]; | |
2973 BlockNode backup= *b; | |
2974 int ab=0; | |
2975 int aa=0; | |
2976 | |
2977 b->type|= BLOCK_INTRA; | |
2978 b->color[plane_index]= 0; | |
2979 | |
2980 for(i=0; i<4; i++){ | |
2981 int mb_x2= mb_x + (i &1) - 1; | |
2982 int mb_y2= mb_y + (i>>1) - 1; | |
2983 int x= block_w*mb_x2 + block_w/2; | |
2984 int y= block_w*mb_y2 + block_w/2; | |
2985 | |
2986 add_yblock(s, zero_dst, dst, ref, obmc, | |
2987 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index); | |
2988 | |
2989 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ | |
2990 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ | |
2991 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; | |
2992 int obmc_v= obmc[index]; | |
2993 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; | |
2994 if(x<0) obmc_v += obmc[index + block_w]; | |
2995 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; | |
2996 if(x+block_w>w) obmc_v += obmc[index - block_w]; | |
2997 //FIXME precalc this or simplify it somehow else | |
2998 | |
2999 ab += (src[x2 + y2*ref_stride] - dst[x2 + y2*ref_stride]) * obmc_v; | |
3000 aa += obmc_v * obmc_v; //FIXME precalclate this | |
3001 } | |
3002 } | |
3003 } | |
3004 *b= backup; | |
3005 | |
3006 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping | |
3007 } | |
3008 | |
3009 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ | |
3010 int i, y2; | |
3011 Plane *p= &s->plane[plane_index]; | |
3012 const int block_size = MB_SIZE >> s->block_max_depth; | |
3013 const int block_w = plane_index ? block_size/2 : block_size; | |
3014 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
3015 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
3016 const int ref_stride= s->current_picture.linesize[plane_index]; | |
3017 uint8_t *ref= s-> last_picture.data[plane_index]; | |
3018 uint8_t *dst= s->current_picture.data[plane_index]; | |
3019 uint8_t *src= s-> input_picture.data[plane_index]; | |
3020 const static DWTELEM zero_dst[4096]; //FIXME | |
3021 const int b_stride = s->b_width << s->block_max_depth; | |
3022 const int b_height = s->b_height<< s->block_max_depth; | |
3023 const int w= p->width; | |
3024 const int h= p->height; | |
3025 int distortion= 0; | |
3026 int rate= 0; | |
3027 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | |
3028 | |
3029 for(i=0; i<4; i++){ | |
3030 int mb_x2= mb_x + (i &1) - 1; | |
3031 int mb_y2= mb_y + (i>>1) - 1; | |
3032 int x= block_w*mb_x2 + block_w/2; | |
3033 int y= block_w*mb_y2 + block_w/2; | |
3034 | |
3035 add_yblock(s, zero_dst, dst, ref, obmc, | |
3036 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index); | |
3037 | |
3038 //FIXME find a cleaner/simpler way to skip the outside stuff | |
3039 for(y2= y; y2<0; y2++) | |
3040 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
3041 for(y2= h; y2<y+block_w; y2++) | |
3042 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
3043 if(x<0){ | |
3044 for(y2= y; y2<y+block_w; y2++) | |
3045 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); | |
3046 } | |
3047 if(x+block_w > w){ | |
3048 for(y2= y; y2<y+block_w; y2++) | |
3049 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); | |
3050 } | |
3051 | |
3052 assert(block_w== 8 || block_w==16); | |
3053 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); | |
3054 } | |
3055 | |
3056 if(plane_index==0){ | |
3057 for(i=0; i<4; i++){ | |
3058 /* ..RRr | |
3059 * .RXx. | |
3060 * rxx.. | |
3061 */ | |
3062 int x= mb_x + (i&1) - (i>>1); | |
3063 int y= mb_y + (i>>1); | |
3064 int index= x + y*b_stride; | |
3065 BlockNode *b = &s->block[index]; | |
3066 BlockNode *left = x ? &s->block[index-1] : &null_block; | |
3067 BlockNode *top = y ? &s->block[index-b_stride] : &null_block; | |
3068 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; | |
3069 BlockNode *tr = y && x+1<b_stride ? &s->block[index-b_stride+1] : tl; | |
3070 int dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx); | |
3071 int dmy= b->my - mid_pred(left->my, top->my, tr->my); | |
3072 // int mx_context= av_log2(2*ABS(left->mx - top->mx)); | |
3073 // int my_context= av_log2(2*ABS(left->my - top->my)); | |
3074 | |
3075 if(x<0 || x>=b_stride || y>=b_height) | |
3076 continue; | |
3077 /* | |
3078 1 0 0 | |
3079 01X 1-2 1 | |
3080 001XX 3-6 2-3 | |
3081 0001XXX 7-14 4-7 | |
3082 00001XXXX 15-30 8-15 | |
3083 */ | |
3084 //FIXME try accurate rate | |
3085 //FIXME intra and inter predictors if surrounding blocks arent the same type | |
3086 if(b->type & BLOCK_INTRA){ | |
3087 rate += 3+2*( av_log2(2*ABS(left->color[0] - b->color[0])) | |
3088 + av_log2(2*ABS(left->color[1] - b->color[1])) | |
3089 + av_log2(2*ABS(left->color[2] - b->color[2]))); | |
3090 }else | |
3091 rate += 2*(1 + av_log2(2*ABS(dmx)) | |
3092 + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda | |
3093 } | |
3094 } | |
3095 | |
3096 return distortion + rate*penalty_factor; | |
3097 } | |
3098 | |
3099 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, int *best_rd){ | |
3100 const int b_stride= s->b_width << s->block_max_depth; | |
3101 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | |
3102 BlockNode backup= *block; | |
3103 int rd, index, value; | |
3104 | |
3105 assert(mb_x>=0 && mb_y>=0); | |
3106 assert(mb_y<b_stride); | |
3107 | |
3108 if(intra){ | |
3109 block->color[0] = p[0]; | |
3110 block->color[1] = p[1]; | |
3111 block->color[2] = p[2]; | |
3112 block->type |= BLOCK_INTRA; | |
3113 }else{ | |
3114 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); | |
3115 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6); | |
3116 if(s->me_cache[index] == value) | |
3117 return 0; | |
3118 s->me_cache[index]= value; | |
3119 | |
3120 block->mx= p[0]; | |
3121 block->my= p[1]; | |
3122 block->type &= ~BLOCK_INTRA; | |
3123 } | |
3124 | |
3125 rd= get_block_rd(s, mb_x, mb_y, 0); | |
3126 | |
3127 //FIXME chroma | |
3128 if(rd < *best_rd){ | |
3129 *best_rd= rd; | |
3130 return 1; | |
3131 }else{ | |
3132 *block= backup; | |
3133 return 0; | |
3134 } | |
3135 } | |
3136 | |
3137 static void iterative_me(SnowContext *s){ | |
3138 int pass, mb_x, mb_y; | |
3139 const int b_width = s->b_width << s->block_max_depth; | |
3140 const int b_height= s->b_height << s->block_max_depth; | |
3141 const int b_stride= b_width; | |
3142 int color[3]; | |
3143 | |
3144 for(pass=0; pass<50; pass++){ | |
3145 int change= 0; | |
3146 | |
3147 for(mb_y= 0; mb_y<b_height; mb_y++){ | |
3148 for(mb_x= 0; mb_x<b_width; mb_x++){ | |
3149 int dia_change, i, j; | |
3150 int best_rd= INT_MAX; | |
3151 BlockNode backup; | |
3152 const int index= mb_x + mb_y * b_stride; | |
3153 BlockNode *block= &s->block[index]; | |
3154 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block; | |
3155 BlockNode *lb = mb_x ? &s->block[index -1] : &null_block; | |
3156 BlockNode *rb = mb_x<b_width ? &s->block[index +1] : &null_block; | |
3157 BlockNode *bb = mb_y<b_height ? &s->block[index+b_stride ] : &null_block; | |
3158 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block; | |
3159 BlockNode *trb= mb_x<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block; | |
3160 BlockNode *blb= mb_x && mb_y<b_height ? &s->block[index+b_stride-1] : &null_block; | |
3161 BlockNode *brb= mb_x<b_width && mb_y<b_height ? &s->block[index+b_stride+1] : &null_block; | |
3162 | |
3163 if(pass && (block->type & BLOCK_OPT)) | |
3164 continue; | |
3165 block->type |= BLOCK_OPT; | |
3166 | |
3167 backup= *block; | |
3168 | |
3169 if(!s->me_cache_generation) | |
3170 memset(s->me_cache, 0, sizeof(s->me_cache)); | |
3171 s->me_cache_generation += 1<<22; | |
3172 | |
3173 // get previous score (cant be cached due to OBMC) | |
3174 check_block(s, mb_x, mb_y, (int[2]){block->mx, block->my}, 0, &best_rd); | |
3175 check_block(s, mb_x, mb_y, (int[2]){0, 0}, 0, &best_rd); | |
3176 check_block(s, mb_x, mb_y, (int[2]){tb->mx, tb->my}, 0, &best_rd); | |
3177 check_block(s, mb_x, mb_y, (int[2]){lb->mx, lb->my}, 0, &best_rd); | |
3178 check_block(s, mb_x, mb_y, (int[2]){rb->mx, rb->my}, 0, &best_rd); | |
3179 check_block(s, mb_x, mb_y, (int[2]){bb->mx, bb->my}, 0, &best_rd); | |
3180 | |
3181 /* fullpel ME */ | |
3182 //FIXME avoid subpel interpol / round to nearest integer | |
3183 do{ | |
3184 dia_change=0; | |
3185 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ | |
3186 for(j=0; j<i; j++){ | |
3187 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+4*(i-j), block->my+(4*j)}, 0, &best_rd); | |
3188 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx-4*(i-j), block->my-(4*j)}, 0, &best_rd); | |
3189 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+4*(i-j), block->my-(4*j)}, 0, &best_rd); | |
3190 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx-4*(i-j), block->my+(4*j)}, 0, &best_rd); | |
3191 } | |
3192 } | |
3193 }while(dia_change); | |
3194 /* subpel ME */ | |
3195 do{ | |
3196 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; | |
3197 dia_change=0; | |
3198 for(i=0; i<8; i++) | |
3199 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+square[i][0], block->my+square[i][1]}, 0, &best_rd); | |
3200 }while(dia_change); | |
3201 //FIXME or try the standard 2 pass qpel or similar | |
3202 | |
3203 for(i=0; i<3; i++){ | |
3204 color[i]= get_dc(s, mb_x, mb_y, i); | |
3205 } | |
3206 check_block(s, mb_x, mb_y, color, 1, &best_rd); | |
3207 //FIXME RD style color selection | |
3208 | |
3209 if(!same_block(block, &backup)){ | |
3210 if(tb != &null_block) tb ->type &= ~BLOCK_OPT; | |
3211 if(lb != &null_block) lb ->type &= ~BLOCK_OPT; | |
3212 if(rb != &null_block) rb ->type &= ~BLOCK_OPT; | |
3213 if(bb != &null_block) bb ->type &= ~BLOCK_OPT; | |
3214 if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT; | |
3215 if(trb!= &null_block) trb->type &= ~BLOCK_OPT; | |
3216 if(blb!= &null_block) blb->type &= ~BLOCK_OPT; | |
3217 if(brb!= &null_block) brb->type &= ~BLOCK_OPT; | |
3218 change ++; | |
3219 } | |
3220 } | |
3221 } | |
3222 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); | |
3223 if(!change) | |
3224 break; | |
3225 } | |
2905 } | 3226 } |
2906 | 3227 |
2907 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){ | 3228 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){ |
2908 const int level= b->level; | 3229 const int level= b->level; |
2909 const int w= b->width; | 3230 const int w= b->width; |
3410 return -1; | 3731 return -1; |
3411 } | 3732 } |
3412 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | 3733 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); |
3413 s->chroma_h_shift= 1; | 3734 s->chroma_h_shift= 1; |
3414 s->chroma_v_shift= 1; | 3735 s->chroma_v_shift= 1; |
3736 | |
3737 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); | |
3738 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); | |
3739 | |
3740 s->avctx->get_buffer(s->avctx, &s->input_picture); | |
3741 | |
3415 return 0; | 3742 return 0; |
3416 } | 3743 } |
3417 | 3744 |
3418 static int frame_start(SnowContext *s){ | 3745 static int frame_start(SnowContext *s){ |
3419 AVFrame tmp; | 3746 AVFrame tmp; |
3443 SnowContext *s = avctx->priv_data; | 3770 SnowContext *s = avctx->priv_data; |
3444 RangeCoder * const c= &s->c; | 3771 RangeCoder * const c= &s->c; |
3445 AVFrame *pict = data; | 3772 AVFrame *pict = data; |
3446 const int width= s->avctx->width; | 3773 const int width= s->avctx->width; |
3447 const int height= s->avctx->height; | 3774 const int height= s->avctx->height; |
3448 int level, orientation, plane_index; | 3775 int level, orientation, plane_index, i, y; |
3449 | 3776 |
3450 ff_init_range_encoder(c, buf, buf_size); | 3777 ff_init_range_encoder(c, buf, buf_size); |
3451 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | 3778 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); |
3452 | 3779 |
3453 s->input_picture = *pict; | 3780 for(i=0; i<3; i++){ |
3781 int shift= !!i; | |
3782 for(y=0; y<(height>>shift); y++) | |
3783 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]], | |
3784 &pict->data[i][y * pict->linesize[i]], | |
3785 width>>shift); | |
3786 } | |
3787 s->new_picture = *pict; | |
3454 | 3788 |
3455 if(avctx->flags&CODEC_FLAG_PASS2){ | 3789 if(avctx->flags&CODEC_FLAG_PASS2){ |
3456 s->m.pict_type = | 3790 s->m.pict_type = |
3457 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; | 3791 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; |
3458 s->keyframe= pict->pict_type==FF_I_TYPE; | 3792 s->keyframe= pict->pict_type==FF_I_TYPE; |
3512 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); | 3846 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); |
3513 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; | 3847 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; |
3514 | 3848 |
3515 s->m.dsp= s->dsp; //move | 3849 s->m.dsp= s->dsp; //move |
3516 ff_init_me(&s->m); | 3850 ff_init_me(&s->m); |
3851 s->dsp= s->m.dsp; | |
3517 } | 3852 } |
3518 | 3853 |
3519 redo_frame: | 3854 redo_frame: |
3520 | 3855 |
3521 s->qbias= pict->pict_type == P_TYPE ? 2 : 0; | 3856 s->qbias= pict->pict_type == P_TYPE ? 2 : 0; |