Mercurial > libavcodec.hg
comparison snow.c @ 3033:e8599ab02b38 libavcodec
faster iterative_me: avoid duplicate mc of neighboring blocks.
author | lorenm |
---|---|
date | Thu, 12 Jan 2006 05:47:52 +0000 |
parents | c75fb0747e74 |
children | d0f408fa01c7 |
comparison
equal
deleted
inserted
replaced
3032:63d7bab7b9ea | 3033:e8599ab02b38 |
---|---|
2469 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ | 2469 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
2470 if(block->type & BLOCK_INTRA){ | 2470 if(block->type & BLOCK_INTRA){ |
2471 int x, y; | 2471 int x, y; |
2472 const int color = block->color[plane_index]; | 2472 const int color = block->color[plane_index]; |
2473 const int color4= color*0x01010101; | 2473 const int color4= color*0x01010101; |
2474 if(b_w==16){ | 2474 if(b_w==32){ |
2475 for(y=0; y < b_h; y++){ | |
2476 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2477 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2478 *(uint32_t*)&dst[8 + y*stride]= color4; | |
2479 *(uint32_t*)&dst[12+ y*stride]= color4; | |
2480 *(uint32_t*)&dst[16+ y*stride]= color4; | |
2481 *(uint32_t*)&dst[20+ y*stride]= color4; | |
2482 *(uint32_t*)&dst[24+ y*stride]= color4; | |
2483 *(uint32_t*)&dst[28+ y*stride]= color4; | |
2484 } | |
2485 }else if(b_w==16){ | |
2475 for(y=0; y < b_h; y++){ | 2486 for(y=0; y < b_h; y++){ |
2476 *(uint32_t*)&dst[0 + y*stride]= color4; | 2487 *(uint32_t*)&dst[0 + y*stride]= color4; |
2477 *(uint32_t*)&dst[4 + y*stride]= color4; | 2488 *(uint32_t*)&dst[4 + y*stride]= color4; |
2478 *(uint32_t*)&dst[8 + y*stride]= color4; | 2489 *(uint32_t*)&dst[8 + y*stride]= color4; |
2479 *(uint32_t*)&dst[12+ y*stride]= color4; | 2490 *(uint32_t*)&dst[12+ y*stride]= color4; |
2510 src= tmp + MB_SIZE; | 2521 src= tmp + MB_SIZE; |
2511 } | 2522 } |
2512 assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); | 2523 assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); |
2513 assert(!(b_w&(b_w-1))); | 2524 assert(!(b_w&(b_w-1))); |
2514 assert(b_w>1 && b_h>1); | 2525 assert(b_w>1 && b_h>1); |
2515 assert(tab_index>=0 && tab_index<4); | 2526 assert(tab_index>=0 && tab_index<4 || b_w==32); |
2516 if((dx&3) || (dy&3)) | 2527 if((dx&3) || (dy&3)) |
2517 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); | 2528 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); |
2518 else if(b_w==b_h) | 2529 else if(b_w==32){ |
2530 int y; | |
2531 for(y=0; y<b_h; y+=16){ | |
2532 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride); | |
2533 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride); | |
2534 } | |
2535 }else if(b_w==b_h) | |
2519 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); | 2536 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); |
2520 else if(b_w==2*b_h){ | 2537 else if(b_w==2*b_h){ |
2521 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride); | 2538 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride); |
2522 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride); | 2539 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride); |
2523 }else{ | 2540 }else{ |
2686 } | 2703 } |
2687 #endif | 2704 #endif |
2688 } | 2705 } |
2689 | 2706 |
2690 //FIXME name clenup (b_w, block_w, b_width stuff) | 2707 //FIXME name clenup (b_w, block_w, b_width stuff) |
2691 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ | 2708 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
2692 const int b_width = s->b_width << s->block_max_depth; | 2709 const int b_width = s->b_width << s->block_max_depth; |
2693 const int b_height= s->b_height << s->block_max_depth; | 2710 const int b_height= s->b_height << s->block_max_depth; |
2694 const int b_stride= b_width; | 2711 const int b_stride= b_width; |
2695 BlockNode *lt= &s->block[b_x + b_y*b_stride]; | 2712 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
2696 BlockNode *rt= lt+1; | 2713 BlockNode *rt= lt+1; |
2718 } | 2735 } |
2719 | 2736 |
2720 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 | 2737 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 |
2721 obmc -= src_x; | 2738 obmc -= src_x; |
2722 b_w += src_x; | 2739 b_w += src_x; |
2740 if(!offset_dst) | |
2741 dst -= src_x; | |
2723 src_x=0; | 2742 src_x=0; |
2724 }else if(src_x + b_w > w){ | 2743 }else if(src_x + b_w > w){ |
2725 b_w = w - src_x; | 2744 b_w = w - src_x; |
2726 } | 2745 } |
2727 if(src_y<0){ | 2746 if(src_y<0){ |
2728 obmc -= src_y*obmc_stride; | 2747 obmc -= src_y*obmc_stride; |
2729 b_h += src_y; | 2748 b_h += src_y; |
2749 if(!offset_dst) | |
2750 dst -= src_y*dst_stride; | |
2730 src_y=0; | 2751 src_y=0; |
2731 }else if(src_y + b_h> h){ | 2752 }else if(src_y + b_h> h){ |
2732 b_h = h - src_y; | 2753 b_h = h - src_y; |
2733 } | 2754 } |
2734 | 2755 |
2735 if(b_w<=0 || b_h<=0) return; | 2756 if(b_w<=0 || b_h<=0) return; |
2736 | 2757 |
2737 assert(src_stride > 2*MB_SIZE + 5); | 2758 assert(src_stride > 2*MB_SIZE + 5); |
2738 dst += src_x + src_y*dst_stride; | 2759 if(offset_dst) |
2760 dst += src_x + src_y*dst_stride; | |
2739 dst8+= src_x + src_y*src_stride; | 2761 dst8+= src_x + src_y*src_stride; |
2740 // src += src_x + src_y*src_stride; | 2762 // src += src_x + src_y*src_stride; |
2741 | 2763 |
2742 ptmp= tmp + 3*tmp_step; | 2764 ptmp= tmp + 3*tmp_step; |
2743 block[0]= ptmp; | 2765 block[0]= ptmp; |
2952 block_w*mb_y - block_w/2, | 2974 block_w*mb_y - block_w/2, |
2953 block_w, block_w, | 2975 block_w, block_w, |
2954 w, h, | 2976 w, h, |
2955 w, ref_stride, obmc_stride, | 2977 w, ref_stride, obmc_stride, |
2956 mb_x - 1, mb_y - 1, | 2978 mb_x - 1, mb_y - 1, |
2957 add, plane_index); | 2979 add, 1, plane_index); |
2958 | 2980 |
2959 STOP_TIMER("add_yblock") | 2981 STOP_TIMER("add_yblock") |
2960 } | 2982 } |
2961 | 2983 |
2962 STOP_TIMER("predict_slice") | 2984 STOP_TIMER("predict_slice") |
2976 const int block_w = plane_index ? block_size/2 : block_size; | 2998 const int block_w = plane_index ? block_size/2 : block_size; |
2977 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | 2999 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
2978 const int obmc_stride= plane_index ? block_size : 2*block_size; | 3000 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2979 const int ref_stride= s->current_picture.linesize[plane_index]; | 3001 const int ref_stride= s->current_picture.linesize[plane_index]; |
2980 uint8_t *ref= s-> last_picture.data[plane_index]; | 3002 uint8_t *ref= s-> last_picture.data[plane_index]; |
2981 uint8_t *dst= s->current_picture.data[plane_index]; | |
2982 uint8_t *src= s-> input_picture.data[plane_index]; | 3003 uint8_t *src= s-> input_picture.data[plane_index]; |
2983 const static DWTELEM zero_dst[4096]; //FIXME | 3004 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
2984 const int b_stride = s->b_width << s->block_max_depth; | 3005 const int b_stride = s->b_width << s->block_max_depth; |
2985 const int w= p->width; | 3006 const int w= p->width; |
2986 const int h= p->height; | 3007 const int h= p->height; |
2987 int index= mb_x + mb_y*b_stride; | 3008 int index= mb_x + mb_y*b_stride; |
2988 BlockNode *b= &s->block[index]; | 3009 BlockNode *b= &s->block[index]; |
2990 int ab=0; | 3011 int ab=0; |
2991 int aa=0; | 3012 int aa=0; |
2992 | 3013 |
2993 b->type|= BLOCK_INTRA; | 3014 b->type|= BLOCK_INTRA; |
2994 b->color[plane_index]= 0; | 3015 b->color[plane_index]= 0; |
3016 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM)); | |
2995 | 3017 |
2996 for(i=0; i<4; i++){ | 3018 for(i=0; i<4; i++){ |
2997 int mb_x2= mb_x + (i &1) - 1; | 3019 int mb_x2= mb_x + (i &1) - 1; |
2998 int mb_y2= mb_y + (i>>1) - 1; | 3020 int mb_y2= mb_y + (i>>1) - 1; |
2999 int x= block_w*mb_x2 + block_w/2; | 3021 int x= block_w*mb_x2 + block_w/2; |
3000 int y= block_w*mb_y2 + block_w/2; | 3022 int y= block_w*mb_y2 + block_w/2; |
3001 | 3023 |
3002 add_yblock(s, zero_dst, dst, ref, obmc, | 3024 add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc, |
3003 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index); | 3025 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); |
3004 | 3026 |
3005 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ | 3027 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ |
3006 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ | 3028 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ |
3007 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; | 3029 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; |
3008 int obmc_v= obmc[index]; | 3030 int obmc_v= obmc[index]; |
3031 int d; | |
3009 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; | 3032 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; |
3010 if(x<0) obmc_v += obmc[index + block_w]; | 3033 if(x<0) obmc_v += obmc[index + block_w]; |
3011 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; | 3034 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; |
3012 if(x+block_w>w) obmc_v += obmc[index - block_w]; | 3035 if(x+block_w>w) obmc_v += obmc[index - block_w]; |
3013 //FIXME precalc this or simplify it somehow else | 3036 //FIXME precalc this or simplify it somehow else |
3014 | 3037 |
3015 ab += (src[x2 + y2*ref_stride] - dst[x2 + y2*ref_stride]) * obmc_v; | 3038 d = -dst[index] + (1<<(FRAC_BITS-1)); |
3039 dst[index] = d; | |
3040 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; | |
3016 aa += obmc_v * obmc_v; //FIXME precalclate this | 3041 aa += obmc_v * obmc_v; //FIXME precalclate this |
3017 } | 3042 } |
3018 } | 3043 } |
3019 } | 3044 } |
3020 *b= backup; | 3045 *b= backup; |
3021 | 3046 |
3022 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping | 3047 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping |
3023 } | 3048 } |
3024 | 3049 |
3025 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ | 3050 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ |
3026 int i, y2; | |
3027 Plane *p= &s->plane[plane_index]; | 3051 Plane *p= &s->plane[plane_index]; |
3028 const int block_size = MB_SIZE >> s->block_max_depth; | 3052 const int block_size = MB_SIZE >> s->block_max_depth; |
3029 const int block_w = plane_index ? block_size/2 : block_size; | 3053 const int block_w = plane_index ? block_size/2 : block_size; |
3030 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | 3054 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
3031 const int obmc_stride= plane_index ? block_size : 2*block_size; | 3055 const int obmc_stride= plane_index ? block_size : 2*block_size; |
3032 const int ref_stride= s->current_picture.linesize[plane_index]; | 3056 const int ref_stride= s->current_picture.linesize[plane_index]; |
3033 uint8_t *ref= s-> last_picture.data[plane_index]; | 3057 uint8_t *ref= s-> last_picture.data[plane_index]; |
3034 uint8_t *dst= s->current_picture.data[plane_index]; | 3058 uint8_t *dst= s->current_picture.data[plane_index]; |
3035 uint8_t *src= s-> input_picture.data[plane_index]; | 3059 uint8_t *src= s-> input_picture.data[plane_index]; |
3036 const static DWTELEM zero_dst[4096]; //FIXME | 3060 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
3061 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment | |
3062 uint8_t tmp[ref_stride*(2*MB_SIZE+5)]; | |
3037 const int b_stride = s->b_width << s->block_max_depth; | 3063 const int b_stride = s->b_width << s->block_max_depth; |
3038 const int b_height = s->b_height<< s->block_max_depth; | 3064 const int b_height = s->b_height<< s->block_max_depth; |
3039 const int w= p->width; | 3065 const int w= p->width; |
3040 const int h= p->height; | 3066 const int h= p->height; |
3041 int distortion= 0; | 3067 int distortion; |
3042 int rate= 0; | 3068 int rate= 0; |
3043 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | 3069 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); |
3044 | 3070 int sx= block_w*mb_x - block_w/2; |
3045 for(i=0; i<4; i++){ | 3071 int sy= block_w*mb_y - block_w/2; |
3046 int mb_x2= mb_x + (i &1) - 1; | 3072 const int x0= FFMAX(0,-sx); |
3047 int mb_y2= mb_y + (i>>1) - 1; | 3073 const int y0= FFMAX(0,-sy); |
3048 int x= block_w*mb_x2 + block_w/2; | 3074 const int x1= FFMIN(block_w*2, w-sx); |
3049 int y= block_w*mb_y2 + block_w/2; | 3075 const int y1= FFMIN(block_w*2, h-sy); |
3050 | 3076 int i,x,y; |
3051 add_yblock(s, zero_dst, dst, ref, obmc, | 3077 |
3052 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index); | 3078 pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); |
3053 | 3079 |
3054 //FIXME find a cleaner/simpler way to skip the outside stuff | 3080 for(y=y0; y<y1; y++){ |
3055 for(y2= y; y2<0; y2++) | 3081 const uint8_t *obmc1= obmc_edged + y*obmc_stride; |
3056 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | 3082 const DWTELEM *pred1 = pred + y*obmc_stride; |
3057 for(y2= h; y2<y+block_w; y2++) | 3083 uint8_t *cur1 = cur + y*ref_stride; |
3058 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | 3084 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; |
3059 if(x<0){ | 3085 for(x=x0; x<x1; x++){ |
3060 for(y2= y; y2<y+block_w; y2++) | 3086 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); |
3061 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); | 3087 v = (v + pred1[x]) >> FRAC_BITS; |
3062 } | 3088 if(v&(~255)) v= ~(v>>31); |
3063 if(x+block_w > w){ | 3089 dst1[x] = v; |
3064 for(y2= y; y2<y+block_w; y2++) | 3090 } |
3065 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); | 3091 } |
3066 } | 3092 |
3067 | 3093 //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block |
3068 assert(block_w== 8 || block_w==16); | 3094 if(block_w==16){ |
3069 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); | 3095 distortion = 0; |
3096 for(i=0; i<4; i++){ | |
3097 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; | |
3098 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); | |
3099 } | |
3100 }else{ | |
3101 assert(block_w==8); | |
3102 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); | |
3070 } | 3103 } |
3071 | 3104 |
3072 if(plane_index==0){ | 3105 if(plane_index==0){ |
3073 for(i=0; i<4; i++){ | 3106 for(i=0; i<4; i++){ |
3074 /* ..RRr | 3107 /* ..RRr |
3110 } | 3143 } |
3111 | 3144 |
3112 return distortion + rate*penalty_factor; | 3145 return distortion + rate*penalty_factor; |
3113 } | 3146 } |
3114 | 3147 |
3115 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, int *best_rd){ | 3148 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ |
3116 const int b_stride= s->b_width << s->block_max_depth; | 3149 const int b_stride= s->b_width << s->block_max_depth; |
3117 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | 3150 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; |
3118 BlockNode backup= *block; | 3151 BlockNode backup= *block; |
3119 int rd, index, value; | 3152 int rd, index, value; |
3120 | 3153 |
3136 block->mx= p[0]; | 3169 block->mx= p[0]; |
3137 block->my= p[1]; | 3170 block->my= p[1]; |
3138 block->type &= ~BLOCK_INTRA; | 3171 block->type &= ~BLOCK_INTRA; |
3139 } | 3172 } |
3140 | 3173 |
3141 rd= get_block_rd(s, mb_x, mb_y, 0); | 3174 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); |
3142 | 3175 |
3143 //FIXME chroma | 3176 //FIXME chroma |
3144 if(rd < *best_rd){ | 3177 if(rd < *best_rd){ |
3145 *best_rd= rd; | 3178 *best_rd= rd; |
3146 return 1; | 3179 return 1; |
3149 return 0; | 3182 return 0; |
3150 } | 3183 } |
3151 } | 3184 } |
3152 | 3185 |
3153 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */ | 3186 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */ |
3154 static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, int *best_rd){ | 3187 static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, const uint8_t *obmc_edged, int *best_rd){ |
3155 int p[2] = {p0, p1}; | 3188 int p[2] = {p0, p1}; |
3156 return check_block(s, mb_x, mb_y, p, intra, best_rd); | 3189 return check_block(s, mb_x, mb_y, p, intra, obmc_edged, best_rd); |
3157 } | 3190 } |
3158 | 3191 |
3159 static void iterative_me(SnowContext *s){ | 3192 static void iterative_me(SnowContext *s){ |
3160 int pass, mb_x, mb_y; | 3193 int pass, mb_x, mb_y; |
3161 const int b_width = s->b_width << s->block_max_depth; | 3194 const int b_width = s->b_width << s->block_max_depth; |
3179 BlockNode *bb = mb_y<b_height ? &s->block[index+b_stride ] : &null_block; | 3212 BlockNode *bb = mb_y<b_height ? &s->block[index+b_stride ] : &null_block; |
3180 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block; | 3213 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block; |
3181 BlockNode *trb= mb_x<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block; | 3214 BlockNode *trb= mb_x<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block; |
3182 BlockNode *blb= mb_x && mb_y<b_height ? &s->block[index+b_stride-1] : &null_block; | 3215 BlockNode *blb= mb_x && mb_y<b_height ? &s->block[index+b_stride-1] : &null_block; |
3183 BlockNode *brb= mb_x<b_width && mb_y<b_height ? &s->block[index+b_stride+1] : &null_block; | 3216 BlockNode *brb= mb_x<b_width && mb_y<b_height ? &s->block[index+b_stride+1] : &null_block; |
3217 const int b_w= (MB_SIZE >> s->block_max_depth); | |
3218 uint8_t obmc_edged[b_w*2][b_w*2]; | |
3184 | 3219 |
3185 if(pass && (block->type & BLOCK_OPT)) | 3220 if(pass && (block->type & BLOCK_OPT)) |
3186 continue; | 3221 continue; |
3187 block->type |= BLOCK_OPT; | 3222 block->type |= BLOCK_OPT; |
3188 | 3223 |
3190 | 3225 |
3191 if(!s->me_cache_generation) | 3226 if(!s->me_cache_generation) |
3192 memset(s->me_cache, 0, sizeof(s->me_cache)); | 3227 memset(s->me_cache, 0, sizeof(s->me_cache)); |
3193 s->me_cache_generation += 1<<22; | 3228 s->me_cache_generation += 1<<22; |
3194 | 3229 |
3230 //FIXME precalc | |
3231 { | |
3232 int x, y; | |
3233 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); | |
3234 if(mb_x==0) | |
3235 for(y=0; y<b_w*2; y++) | |
3236 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); | |
3237 if(mb_x==b_stride-1) | |
3238 for(y=0; y<b_w*2; y++) | |
3239 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); | |
3240 if(mb_y==0){ | |
3241 for(x=0; x<b_w*2; x++) | |
3242 obmc_edged[0][x] += obmc_edged[b_w-1][x]; | |
3243 for(y=1; y<b_w; y++) | |
3244 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); | |
3245 } | |
3246 if(mb_y==b_height-1){ | |
3247 for(x=0; x<b_w*2; x++) | |
3248 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; | |
3249 for(y=b_w; y<b_w*2-1; y++) | |
3250 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); | |
3251 } | |
3252 } | |
3253 | |
3254 //skip stuff outside the picture | |
3255 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1) | |
3256 { | |
3257 uint8_t *src= s-> input_picture.data[0]; | |
3258 uint8_t *dst= s->current_picture.data[0]; | |
3259 const int stride= s->current_picture.linesize[0]; | |
3260 const int block_w= MB_SIZE >> s->block_max_depth; | |
3261 const int sx= block_w*mb_x - block_w/2; | |
3262 const int sy= block_w*mb_y - block_w/2; | |
3263 const int w= s->plane[0].width; | |
3264 const int h= s->plane[0].height; | |
3265 int y; | |
3266 | |
3267 for(y=sy; y<0; y++) | |
3268 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); | |
3269 for(y=h; y<sy+block_w*2; y++) | |
3270 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); | |
3271 if(sx<0){ | |
3272 for(y=sy; y<sy+block_w*2; y++) | |
3273 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); | |
3274 } | |
3275 if(sx+block_w*2 > w){ | |
3276 for(y=sy; y<sy+block_w*2; y++) | |
3277 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); | |
3278 } | |
3279 } | |
3280 | |
3281 // intra(black) = neighbors' contribution to the current block | |
3282 for(i=0; i<3; i++) | |
3283 color[i]= get_dc(s, mb_x, mb_y, i); | |
3284 | |
3195 // get previous score (cant be cached due to OBMC) | 3285 // get previous score (cant be cached due to OBMC) |
3196 check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, &best_rd); | 3286 check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, *obmc_edged, &best_rd); |
3197 check_block_inter(s, mb_x, mb_y, 0, 0, 0, &best_rd); | 3287 check_block_inter(s, mb_x, mb_y, 0, 0, 0, *obmc_edged, &best_rd); |
3198 check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, &best_rd); | 3288 check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, *obmc_edged, &best_rd); |
3199 check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, &best_rd); | 3289 check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, *obmc_edged, &best_rd); |
3200 check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, &best_rd); | 3290 check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, *obmc_edged, &best_rd); |
3201 check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, &best_rd); | 3291 check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, *obmc_edged, &best_rd); |
3202 | 3292 |
3203 /* fullpel ME */ | 3293 /* fullpel ME */ |
3204 //FIXME avoid subpel interpol / round to nearest integer | 3294 //FIXME avoid subpel interpol / round to nearest integer |
3205 do{ | 3295 do{ |
3206 dia_change=0; | 3296 dia_change=0; |
3207 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ | 3297 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ |
3208 for(j=0; j<i; j++){ | 3298 for(j=0; j<i; j++){ |
3209 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, &best_rd); | 3299 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd); |
3210 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, &best_rd); | 3300 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd); |
3211 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, &best_rd); | 3301 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd); |
3212 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, &best_rd); | 3302 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd); |
3213 } | 3303 } |
3214 } | 3304 } |
3215 }while(dia_change); | 3305 }while(dia_change); |
3216 /* subpel ME */ | 3306 /* subpel ME */ |
3217 do{ | 3307 do{ |
3218 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; | 3308 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; |
3219 dia_change=0; | 3309 dia_change=0; |
3220 for(i=0; i<8; i++) | 3310 for(i=0; i<8; i++) |
3221 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, &best_rd); | 3311 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, *obmc_edged, &best_rd); |
3222 }while(dia_change); | 3312 }while(dia_change); |
3223 //FIXME or try the standard 2 pass qpel or similar | 3313 //FIXME or try the standard 2 pass qpel or similar |
3224 #if 1 | 3314 #if 1 |
3225 for(i=0; i<3; i++){ | 3315 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); |
3226 color[i]= get_dc(s, mb_x, mb_y, i); | |
3227 } | |
3228 check_block(s, mb_x, mb_y, color, 1, &best_rd); | |
3229 //FIXME RD style color selection | 3316 //FIXME RD style color selection |
3230 #endif | 3317 #endif |
3231 if(!same_block(block, &backup)){ | 3318 if(!same_block(block, &backup)){ |
3232 if(tb != &null_block) tb ->type &= ~BLOCK_OPT; | 3319 if(tb != &null_block) tb ->type &= ~BLOCK_OPT; |
3233 if(lb != &null_block) lb ->type &= ~BLOCK_OPT; | 3320 if(lb != &null_block) lb ->type &= ~BLOCK_OPT; |
3717 s->m.bit_rate= avctx->bit_rate; | 3804 s->m.bit_rate= avctx->bit_rate; |
3718 | 3805 |
3719 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); | 3806 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); |
3720 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | 3807 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); |
3721 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | 3808 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); |
3809 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); | |
3722 h263_encode_init(&s->m); //mv_penalty | 3810 h263_encode_init(&s->m); //mv_penalty |
3723 | 3811 |
3724 if(avctx->flags&CODEC_FLAG_PASS1){ | 3812 if(avctx->flags&CODEC_FLAG_PASS1){ |
3725 if(!avctx->stats_out) | 3813 if(!avctx->stats_out) |
3726 avctx->stats_out = av_mallocz(256); | 3814 avctx->stats_out = av_mallocz(256); |
3997 av_freep(&s->spatial_dwt_buffer); | 4085 av_freep(&s->spatial_dwt_buffer); |
3998 | 4086 |
3999 av_freep(&s->m.me.scratchpad); | 4087 av_freep(&s->m.me.scratchpad); |
4000 av_freep(&s->m.me.map); | 4088 av_freep(&s->m.me.map); |
4001 av_freep(&s->m.me.score_map); | 4089 av_freep(&s->m.me.score_map); |
4090 av_freep(&s->m.obmc_scratchpad); | |
4002 | 4091 |
4003 av_freep(&s->block); | 4092 av_freep(&s->block); |
4004 | 4093 |
4005 for(plane_index=0; plane_index<3; plane_index++){ | 4094 for(plane_index=0; plane_index<3; plane_index++){ |
4006 for(level=s->spatial_decomposition_count-1; level>=0; level--){ | 4095 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |