comparison snow.c @ 2993:cb0e26759cca libavcodec

iterative overlapped block based motion estimation for snow
author michael
date Wed, 28 Dec 2005 15:43:53 +0000
parents bfabfdf9ce55
children 657e8546090d
comparison
equal deleted inserted replaced
2992:f74ae8aff2a9 2993:cb0e26759cca
381 int16_t my; 381 int16_t my;
382 uint8_t color[3]; 382 uint8_t color[3];
383 uint8_t type; 383 uint8_t type;
384 //#define TYPE_SPLIT 1 384 //#define TYPE_SPLIT 1
385 #define BLOCK_INTRA 1 385 #define BLOCK_INTRA 1
386 #define BLOCK_OPT 2
386 //#define TYPE_NOCOLOR 4 387 //#define TYPE_NOCOLOR 4
387 uint8_t level; //FIXME merge into type? 388 uint8_t level; //FIXME merge into type?
388 }BlockNode; 389 }BlockNode;
390
391 static const BlockNode null_block= { //FIXME add border maybe
392 .color= {128,128,128},
393 .mx= 0,
394 .my= 0,
395 .type= 0,
396 .level= 0,
397 };
389 398
390 #define LOG2_MB_SIZE 4 399 #define LOG2_MB_SIZE 4
391 #define MB_SIZE (1<<LOG2_MB_SIZE) 400 #define MB_SIZE (1<<LOG2_MB_SIZE)
392 401
393 typedef struct x_and_coeff{ 402 typedef struct x_and_coeff{
431 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) 440 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
432 441
433 AVCodecContext *avctx; 442 AVCodecContext *avctx;
434 RangeCoder c; 443 RangeCoder c;
435 DSPContext dsp; 444 DSPContext dsp;
436 AVFrame input_picture; 445 AVFrame new_picture;
446 AVFrame input_picture; ///< new_picture with the internal linesizes
437 AVFrame current_picture; 447 AVFrame current_picture;
438 AVFrame last_picture; 448 AVFrame last_picture;
439 AVFrame mconly_picture; 449 AVFrame mconly_picture;
440 // uint8_t q_context[16]; 450 // uint8_t q_context[16];
441 uint8_t header_state[32]; 451 uint8_t header_state[32];
461 int b_width; 471 int b_width;
462 int b_height; 472 int b_height;
463 int block_max_depth; 473 int block_max_depth;
464 Plane plane[MAX_PLANES]; 474 Plane plane[MAX_PLANES];
465 BlockNode *block; 475 BlockNode *block;
476 #define ME_CACHE_SIZE 1024
477 int me_cache[ME_CACHE_SIZE];
478 int me_cache_generation;
466 slice_buffer sb; 479 slice_buffer sb;
467 480
468 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) 481 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
469 }SnowContext; 482 }SnowContext;
470 483
476 int y; 489 int y;
477 } dwt_compose_t; 490 } dwt_compose_t;
478 491
479 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) 492 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
480 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) 493 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
494
495 static void iterative_me(SnowContext *s);
481 496
482 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer) 497 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
483 { 498 {
484 int i; 499 int i;
485 500
2049 const int w= s->b_width << s->block_max_depth; 2064 const int w= s->b_width << s->block_max_depth;
2050 const int h= s->b_height << s->block_max_depth; 2065 const int h= s->b_height << s->block_max_depth;
2051 const int rem_depth= s->block_max_depth - level; 2066 const int rem_depth= s->block_max_depth - level;
2052 const int index= (x + y*w) << rem_depth; 2067 const int index= (x + y*w) << rem_depth;
2053 const int block_w= 1<<(LOG2_MB_SIZE - level); 2068 const int block_w= 1<<(LOG2_MB_SIZE - level);
2054 static BlockNode null_block= { //FIXME add border maybe
2055 .color= {128,128,128},
2056 .mx= 0,
2057 .my= 0,
2058 .type= 0,
2059 .level= 0,
2060 };
2061 int trx= (x+1)<<rem_depth; 2069 int trx= (x+1)<<rem_depth;
2062 int try= (y+1)<<rem_depth; 2070 int try= (y+1)<<rem_depth;
2063 BlockNode *left = x ? &s->block[index-1] : &null_block; 2071 BlockNode *left = x ? &s->block[index-1] : &null_block;
2064 BlockNode *top = y ? &s->block[index-w] : &null_block; 2072 BlockNode *top = y ? &s->block[index-w] : &null_block;
2065 BlockNode *right = trx<w ? &s->block[index+1] : &null_block; 2073 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2070 int pcb= left->color[1]; 2078 int pcb= left->color[1];
2071 int pcr= left->color[2]; 2079 int pcr= left->color[2];
2072 int pmx= mid_pred(left->mx, top->mx, tr->mx); 2080 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2073 int pmy= mid_pred(left->my, top->my, tr->my); 2081 int pmy= mid_pred(left->my, top->my, tr->my);
2074 int mx=0, my=0; 2082 int mx=0, my=0;
2075 int l,cr,cb, i; 2083 int l,cr,cb;
2076 const int stride= s->current_picture.linesize[0]; 2084 const int stride= s->current_picture.linesize[0];
2077 const int uvstride= s->current_picture.linesize[1]; 2085 const int uvstride= s->current_picture.linesize[1];
2078 const int instride= s->input_picture.linesize[0]; 2086 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2079 const int uvinstride= s->input_picture.linesize[1]; 2087 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2080 uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w; 2088 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2081 uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2082 uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2083 uint8_t current_mb[3][stride*block_w];
2084 uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2085 int P[10][2]; 2089 int P[10][2];
2086 int16_t last_mv[3][2]; 2090 int16_t last_mv[3][2];
2087 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused 2091 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2088 const int shift= 1+qpel; 2092 const int shift= 1+qpel;
2089 MotionEstContext *c= &s->m.me; 2093 MotionEstContext *c= &s->m.me;
2094 assert(sizeof(s->block_state) >= 256); 2098 assert(sizeof(s->block_state) >= 256);
2095 if(s->keyframe){ 2099 if(s->keyframe){
2096 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); 2100 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2097 return 0; 2101 return 0;
2098 } 2102 }
2099
2100 //FIXME optimize
2101 for(i=0; i<block_w; i++)
2102 memcpy(&current_mb[0][0] + stride*i, new_l + instride*i, block_w);
2103 for(i=0; i<block_w>>1; i++)
2104 memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2105 for(i=0; i<block_w>>1; i++)
2106 memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2107 2103
2108 // clip predictors / edge ? 2104 // clip predictors / edge ?
2109 2105
2110 P_LEFT[0]= left->mx; 2106 P_LEFT[0]= left->mx;
2111 P_LEFT[1]= left->my; 2107 P_LEFT[1]= left->my;
2188 + (pc.outstanding_count - s->c.outstanding_count)*8 2184 + (pc.outstanding_count - s->c.outstanding_count)*8
2189 + (-av_log2(pc.range) + av_log2(s->c.range)) 2185 + (-av_log2(pc.range) + av_log2(s->c.range))
2190 ))>>FF_LAMBDA_SHIFT; 2186 ))>>FF_LAMBDA_SHIFT;
2191 2187
2192 block_s= block_w*block_w; 2188 block_s= block_w*block_w;
2193 sum = pix_sum(&current_mb[0][0], stride, block_w); 2189 sum = pix_sum(current_data[0], stride, block_w);
2194 l= (sum + block_s/2)/block_s; 2190 l= (sum + block_s/2)/block_s;
2195 iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s; 2191 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2196 2192
2197 block_s= block_w*block_w>>2; 2193 block_s= block_w*block_w>>2;
2198 sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1); 2194 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2199 cb= (sum + block_s/2)/block_s; 2195 cb= (sum + block_s/2)/block_s;
2200 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; 2196 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2201 sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1); 2197 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2202 cr= (sum + block_s/2)/block_s; 2198 cr= (sum + block_s/2)/block_s;
2203 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; 2199 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2204 2200
2205 ic= s->c; 2201 ic= s->c;
2206 ic.bytestream_start= 2202 ic.bytestream_start=
2262 memcpy(s->block_state, p_state, sizeof(s->block_state)); 2258 memcpy(s->block_state, p_state, sizeof(s->block_state));
2263 return score; 2259 return score;
2264 } 2260 }
2265 } 2261 }
2266 2262
2263 static always_inline int same_block(BlockNode *a, BlockNode *b){
2264 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2265 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2266 }else{
2267 return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2268 }
2269 }
2270
2271 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2272 const int w= s->b_width << s->block_max_depth;
2273 const int rem_depth= s->block_max_depth - level;
2274 const int index= (x + y*w) << rem_depth;
2275 int trx= (x+1)<<rem_depth;
2276 BlockNode *b= &s->block[index];
2277 BlockNode *left = x ? &s->block[index-1] : &null_block;
2278 BlockNode *top = y ? &s->block[index-w] : &null_block;
2279 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2280 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2281 int pl = left->color[0];
2282 int pcb= left->color[1];
2283 int pcr= left->color[2];
2284 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2285 int pmy= mid_pred(left->my, top->my, tr->my);
2286 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2287 int my_context= av_log2(2*ABS(left->my - top->my));
2288 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2289
2290 if(s->keyframe){
2291 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2292 return;
2293 }
2294
2295 if(level!=s->block_max_depth){
2296 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2297 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2298 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2299 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2300 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2301 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2302 return;
2303 }else{
2304 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2305 }
2306 }
2307 if(b->type & BLOCK_INTRA){
2308 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2309 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2310 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2311 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2312 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2313 }else{
2314 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2315 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2316 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2317 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2318 }
2319 }
2320
2267 static void decode_q_branch(SnowContext *s, int level, int x, int y){ 2321 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2268 const int w= s->b_width << s->block_max_depth; 2322 const int w= s->b_width << s->block_max_depth;
2269 const int rem_depth= s->block_max_depth - level; 2323 const int rem_depth= s->block_max_depth - level;
2270 const int index= (x + y*w) << rem_depth; 2324 const int index= (x + y*w) << rem_depth;
2271 static BlockNode null_block= { //FIXME add border maybe
2272 .color= {128,128,128},
2273 .mx= 0,
2274 .my= 0,
2275 .type= 0,
2276 .level= 0,
2277 };
2278 int trx= (x+1)<<rem_depth; 2325 int trx= (x+1)<<rem_depth;
2279 BlockNode *left = x ? &s->block[index-1] : &null_block; 2326 BlockNode *left = x ? &s->block[index-1] : &null_block;
2280 BlockNode *top = y ? &s->block[index-w] : &null_block; 2327 BlockNode *top = y ? &s->block[index-w] : &null_block;
2281 BlockNode *tl = y && x ? &s->block[index-w-1] : left; 2328 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2282 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt 2329 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2319 static void encode_blocks(SnowContext *s){ 2366 static void encode_blocks(SnowContext *s){
2320 int x, y; 2367 int x, y;
2321 int w= s->b_width; 2368 int w= s->b_width;
2322 int h= s->b_height; 2369 int h= s->b_height;
2323 2370
2371 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2372 iterative_me(s);
2373
2324 for(y=0; y<h; y++){ 2374 for(y=0; y<h; y++){
2325 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit 2375 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2326 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); 2376 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2327 return; 2377 return;
2328 } 2378 }
2329 for(x=0; x<w; x++){ 2379 for(x=0; x<w; x++){
2330 encode_q_branch(s, 0, x, y); 2380 if(s->avctx->me_method == ME_ITER)
2381 encode_q_branch2(s, 0, x, y);
2382 else
2383 encode_q_branch (s, 0, x, y);
2331 } 2384 }
2332 } 2385 }
2333 } 2386 }
2334 2387
2335 static void decode_blocks(SnowContext *s){ 2388 static void decode_blocks(SnowContext *s){
2428 mca( 8, 0,8) 2481 mca( 8, 0,8)
2429 mca( 0, 8,8) 2482 mca( 0, 8,8)
2430 mca( 8, 8,8) 2483 mca( 8, 8,8)
2431 2484
2432 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ 2485 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2433 if(block->type){ 2486 if(block->type & BLOCK_INTRA){
2434 int x, y; 2487 int x, y;
2435 const int color= block->color[plane_index]; 2488 const int color= block->color[plane_index];
2436 for(y=0; y < b_h; y++){ 2489 for(y=0; y < b_h; y++){
2437 for(x=0; x < b_w; x++){ 2490 for(x=0; x < b_w; x++){
2438 dst[x + y*stride]= color; 2491 dst[x + y*stride]= color;
2455 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16)) 2508 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2456 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); 2509 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2457 else 2510 else
2458 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); 2511 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2459 } 2512 }
2460 }
2461
2462 static always_inline int same_block(BlockNode *a, BlockNode *b){
2463 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2464 } 2513 }
2465 2514
2466 //FIXME name clenup (b_w, block_w, b_width stuff) 2515 //FIXME name clenup (b_w, block_w, b_width stuff)
2467 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ 2516 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2468 DWTELEM * dst = NULL; 2517 DWTELEM * dst = NULL;
2845 const int mb_h= s->b_height << s->block_max_depth; 2894 const int mb_h= s->b_height << s->block_max_depth;
2846 int x, y, mb_x; 2895 int x, y, mb_x;
2847 int block_size = MB_SIZE >> s->block_max_depth; 2896 int block_size = MB_SIZE >> s->block_max_depth;
2848 int block_w = plane_index ? block_size/2 : block_size; 2897 int block_w = plane_index ? block_size/2 : block_size;
2849 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; 2898 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2850 int obmc_stride= plane_index ? block_size : 2*block_size; 2899 const int obmc_stride= plane_index ? block_size : 2*block_size;
2851 int ref_stride= s->current_picture.linesize[plane_index]; 2900 int ref_stride= s->current_picture.linesize[plane_index];
2852 uint8_t *ref = s->last_picture.data[plane_index]; 2901 uint8_t *ref = s->last_picture.data[plane_index];
2853 uint8_t *dst8= s->current_picture.data[plane_index]; 2902 uint8_t *dst8= s->current_picture.data[plane_index];
2854 int w= p->width; 2903 int w= p->width;
2855 int h= p->height; 2904 int h= p->height;
2900 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ 2949 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2901 const int mb_h= s->b_height << s->block_max_depth; 2950 const int mb_h= s->b_height << s->block_max_depth;
2902 int mb_y; 2951 int mb_y;
2903 for(mb_y=0; mb_y<=mb_h; mb_y++) 2952 for(mb_y=0; mb_y<=mb_h; mb_y++)
2904 predict_slice(s, buf, plane_index, add, mb_y); 2953 predict_slice(s, buf, plane_index, add, mb_y);
2954 }
2955
2956 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2957 int i, x2, y2;
2958 Plane *p= &s->plane[plane_index];
2959 const int block_size = MB_SIZE >> s->block_max_depth;
2960 const int block_w = plane_index ? block_size/2 : block_size;
2961 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2962 const int obmc_stride= plane_index ? block_size : 2*block_size;
2963 const int ref_stride= s->current_picture.linesize[plane_index];
2964 uint8_t *ref= s-> last_picture.data[plane_index];
2965 uint8_t *dst= s->current_picture.data[plane_index];
2966 uint8_t *src= s-> input_picture.data[plane_index];
2967 const static DWTELEM zero_dst[4096]; //FIXME
2968 const int b_stride = s->b_width << s->block_max_depth;
2969 const int w= p->width;
2970 const int h= p->height;
2971 int index= mb_x + mb_y*b_stride;
2972 BlockNode *b= &s->block[index];
2973 BlockNode backup= *b;
2974 int ab=0;
2975 int aa=0;
2976
2977 b->type|= BLOCK_INTRA;
2978 b->color[plane_index]= 0;
2979
2980 for(i=0; i<4; i++){
2981 int mb_x2= mb_x + (i &1) - 1;
2982 int mb_y2= mb_y + (i>>1) - 1;
2983 int x= block_w*mb_x2 + block_w/2;
2984 int y= block_w*mb_y2 + block_w/2;
2985
2986 add_yblock(s, zero_dst, dst, ref, obmc,
2987 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index);
2988
2989 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2990 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2991 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2992 int obmc_v= obmc[index];
2993 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2994 if(x<0) obmc_v += obmc[index + block_w];
2995 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2996 if(x+block_w>w) obmc_v += obmc[index - block_w];
2997 //FIXME precalc this or simplify it somehow else
2998
2999 ab += (src[x2 + y2*ref_stride] - dst[x2 + y2*ref_stride]) * obmc_v;
3000 aa += obmc_v * obmc_v; //FIXME precalclate this
3001 }
3002 }
3003 }
3004 *b= backup;
3005
3006 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3007 }
3008
3009 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3010 int i, y2;
3011 Plane *p= &s->plane[plane_index];
3012 const int block_size = MB_SIZE >> s->block_max_depth;
3013 const int block_w = plane_index ? block_size/2 : block_size;
3014 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3015 const int obmc_stride= plane_index ? block_size : 2*block_size;
3016 const int ref_stride= s->current_picture.linesize[plane_index];
3017 uint8_t *ref= s-> last_picture.data[plane_index];
3018 uint8_t *dst= s->current_picture.data[plane_index];
3019 uint8_t *src= s-> input_picture.data[plane_index];
3020 const static DWTELEM zero_dst[4096]; //FIXME
3021 const int b_stride = s->b_width << s->block_max_depth;
3022 const int b_height = s->b_height<< s->block_max_depth;
3023 const int w= p->width;
3024 const int h= p->height;
3025 int distortion= 0;
3026 int rate= 0;
3027 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3028
3029 for(i=0; i<4; i++){
3030 int mb_x2= mb_x + (i &1) - 1;
3031 int mb_y2= mb_y + (i>>1) - 1;
3032 int x= block_w*mb_x2 + block_w/2;
3033 int y= block_w*mb_y2 + block_w/2;
3034
3035 add_yblock(s, zero_dst, dst, ref, obmc,
3036 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index);
3037
3038 //FIXME find a cleaner/simpler way to skip the outside stuff
3039 for(y2= y; y2<0; y2++)
3040 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3041 for(y2= h; y2<y+block_w; y2++)
3042 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3043 if(x<0){
3044 for(y2= y; y2<y+block_w; y2++)
3045 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3046 }
3047 if(x+block_w > w){
3048 for(y2= y; y2<y+block_w; y2++)
3049 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3050 }
3051
3052 assert(block_w== 8 || block_w==16);
3053 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3054 }
3055
3056 if(plane_index==0){
3057 for(i=0; i<4; i++){
3058 /* ..RRr
3059 * .RXx.
3060 * rxx..
3061 */
3062 int x= mb_x + (i&1) - (i>>1);
3063 int y= mb_y + (i>>1);
3064 int index= x + y*b_stride;
3065 BlockNode *b = &s->block[index];
3066 BlockNode *left = x ? &s->block[index-1] : &null_block;
3067 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3068 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3069 BlockNode *tr = y && x+1<b_stride ? &s->block[index-b_stride+1] : tl;
3070 int dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3071 int dmy= b->my - mid_pred(left->my, top->my, tr->my);
3072 // int mx_context= av_log2(2*ABS(left->mx - top->mx));
3073 // int my_context= av_log2(2*ABS(left->my - top->my));
3074
3075 if(x<0 || x>=b_stride || y>=b_height)
3076 continue;
3077 /*
3078 1 0 0
3079 01X 1-2 1
3080 001XX 3-6 2-3
3081 0001XXX 7-14 4-7
3082 00001XXXX 15-30 8-15
3083 */
3084 //FIXME try accurate rate
3085 //FIXME intra and inter predictors if surrounding blocks arent the same type
3086 if(b->type & BLOCK_INTRA){
3087 rate += 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3088 + av_log2(2*ABS(left->color[1] - b->color[1]))
3089 + av_log2(2*ABS(left->color[2] - b->color[2])));
3090 }else
3091 rate += 2*(1 + av_log2(2*ABS(dmx))
3092 + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3093 }
3094 }
3095
3096 return distortion + rate*penalty_factor;
3097 }
3098
3099 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, int *best_rd){
3100 const int b_stride= s->b_width << s->block_max_depth;
3101 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3102 BlockNode backup= *block;
3103 int rd, index, value;
3104
3105 assert(mb_x>=0 && mb_y>=0);
3106 assert(mb_y<b_stride);
3107
3108 if(intra){
3109 block->color[0] = p[0];
3110 block->color[1] = p[1];
3111 block->color[2] = p[2];
3112 block->type |= BLOCK_INTRA;
3113 }else{
3114 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3115 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3116 if(s->me_cache[index] == value)
3117 return 0;
3118 s->me_cache[index]= value;
3119
3120 block->mx= p[0];
3121 block->my= p[1];
3122 block->type &= ~BLOCK_INTRA;
3123 }
3124
3125 rd= get_block_rd(s, mb_x, mb_y, 0);
3126
3127 //FIXME chroma
3128 if(rd < *best_rd){
3129 *best_rd= rd;
3130 return 1;
3131 }else{
3132 *block= backup;
3133 return 0;
3134 }
3135 }
3136
3137 static void iterative_me(SnowContext *s){
3138 int pass, mb_x, mb_y;
3139 const int b_width = s->b_width << s->block_max_depth;
3140 const int b_height= s->b_height << s->block_max_depth;
3141 const int b_stride= b_width;
3142 int color[3];
3143
3144 for(pass=0; pass<50; pass++){
3145 int change= 0;
3146
3147 for(mb_y= 0; mb_y<b_height; mb_y++){
3148 for(mb_x= 0; mb_x<b_width; mb_x++){
3149 int dia_change, i, j;
3150 int best_rd= INT_MAX;
3151 BlockNode backup;
3152 const int index= mb_x + mb_y * b_stride;
3153 BlockNode *block= &s->block[index];
3154 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block;
3155 BlockNode *lb = mb_x ? &s->block[index -1] : &null_block;
3156 BlockNode *rb = mb_x<b_width ? &s->block[index +1] : &null_block;
3157 BlockNode *bb = mb_y<b_height ? &s->block[index+b_stride ] : &null_block;
3158 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block;
3159 BlockNode *trb= mb_x<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block;
3160 BlockNode *blb= mb_x && mb_y<b_height ? &s->block[index+b_stride-1] : &null_block;
3161 BlockNode *brb= mb_x<b_width && mb_y<b_height ? &s->block[index+b_stride+1] : &null_block;
3162
3163 if(pass && (block->type & BLOCK_OPT))
3164 continue;
3165 block->type |= BLOCK_OPT;
3166
3167 backup= *block;
3168
3169 if(!s->me_cache_generation)
3170 memset(s->me_cache, 0, sizeof(s->me_cache));
3171 s->me_cache_generation += 1<<22;
3172
3173 // get previous score (cant be cached due to OBMC)
3174 check_block(s, mb_x, mb_y, (int[2]){block->mx, block->my}, 0, &best_rd);
3175 check_block(s, mb_x, mb_y, (int[2]){0, 0}, 0, &best_rd);
3176 check_block(s, mb_x, mb_y, (int[2]){tb->mx, tb->my}, 0, &best_rd);
3177 check_block(s, mb_x, mb_y, (int[2]){lb->mx, lb->my}, 0, &best_rd);
3178 check_block(s, mb_x, mb_y, (int[2]){rb->mx, rb->my}, 0, &best_rd);
3179 check_block(s, mb_x, mb_y, (int[2]){bb->mx, bb->my}, 0, &best_rd);
3180
3181 /* fullpel ME */
3182 //FIXME avoid subpel interpol / round to nearest integer
3183 do{
3184 dia_change=0;
3185 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3186 for(j=0; j<i; j++){
3187 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+4*(i-j), block->my+(4*j)}, 0, &best_rd);
3188 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx-4*(i-j), block->my-(4*j)}, 0, &best_rd);
3189 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+4*(i-j), block->my-(4*j)}, 0, &best_rd);
3190 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx-4*(i-j), block->my+(4*j)}, 0, &best_rd);
3191 }
3192 }
3193 }while(dia_change);
3194 /* subpel ME */
3195 do{
3196 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3197 dia_change=0;
3198 for(i=0; i<8; i++)
3199 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+square[i][0], block->my+square[i][1]}, 0, &best_rd);
3200 }while(dia_change);
3201 //FIXME or try the standard 2 pass qpel or similar
3202
3203 for(i=0; i<3; i++){
3204 color[i]= get_dc(s, mb_x, mb_y, i);
3205 }
3206 check_block(s, mb_x, mb_y, color, 1, &best_rd);
3207 //FIXME RD style color selection
3208
3209 if(!same_block(block, &backup)){
3210 if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3211 if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3212 if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3213 if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3214 if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3215 if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3216 if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3217 if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3218 change ++;
3219 }
3220 }
3221 }
3222 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3223 if(!change)
3224 break;
3225 }
2905 } 3226 }
2906 3227
2907 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){ 3228 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2908 const int level= b->level; 3229 const int level= b->level;
2909 const int w= b->width; 3230 const int w= b->width;
3410 return -1; 3731 return -1;
3411 } 3732 }
3412 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); 3733 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3413 s->chroma_h_shift= 1; 3734 s->chroma_h_shift= 1;
3414 s->chroma_v_shift= 1; 3735 s->chroma_v_shift= 1;
3736
3737 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3738 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3739
3740 s->avctx->get_buffer(s->avctx, &s->input_picture);
3741
3415 return 0; 3742 return 0;
3416 } 3743 }
3417 3744
3418 static int frame_start(SnowContext *s){ 3745 static int frame_start(SnowContext *s){
3419 AVFrame tmp; 3746 AVFrame tmp;
3443 SnowContext *s = avctx->priv_data; 3770 SnowContext *s = avctx->priv_data;
3444 RangeCoder * const c= &s->c; 3771 RangeCoder * const c= &s->c;
3445 AVFrame *pict = data; 3772 AVFrame *pict = data;
3446 const int width= s->avctx->width; 3773 const int width= s->avctx->width;
3447 const int height= s->avctx->height; 3774 const int height= s->avctx->height;
3448 int level, orientation, plane_index; 3775 int level, orientation, plane_index, i, y;
3449 3776
3450 ff_init_range_encoder(c, buf, buf_size); 3777 ff_init_range_encoder(c, buf, buf_size);
3451 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); 3778 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3452 3779
3453 s->input_picture = *pict; 3780 for(i=0; i<3; i++){
3781 int shift= !!i;
3782 for(y=0; y<(height>>shift); y++)
3783 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3784 &pict->data[i][y * pict->linesize[i]],
3785 width>>shift);
3786 }
3787 s->new_picture = *pict;
3454 3788
3455 if(avctx->flags&CODEC_FLAG_PASS2){ 3789 if(avctx->flags&CODEC_FLAG_PASS2){
3456 s->m.pict_type = 3790 s->m.pict_type =
3457 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; 3791 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3458 s->keyframe= pict->pict_type==FF_I_TYPE; 3792 s->keyframe= pict->pict_type==FF_I_TYPE;
3512 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); 3846 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3513 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; 3847 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3514 3848
3515 s->m.dsp= s->dsp; //move 3849 s->m.dsp= s->dsp; //move
3516 ff_init_me(&s->m); 3850 ff_init_me(&s->m);
3851 s->dsp= s->m.dsp;
3517 } 3852 }
3518 3853
3519 redo_frame: 3854 redo_frame:
3520 3855
3521 s->qbias= pict->pict_type == P_TYPE ? 2 : 0; 3856 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;