libavcodec.hg: snow.c comparison

comparison snow.c @ 10188:404026d9adb5 libavcodec

Reorder functions so that encoding functions are disabled by the preprocessor. Fixes compilation with disabled optimizations and enabled Snow decoder.

author	diego
date	Fri, 18 Sep 2009 19:45:09 +0000
parents	b911dbff811c
children	328e2a3171d2

comparison

equal deleted inserted replaced

-:b14d646fe719
+:404026d9adb5
 } DWTCompose;
 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
-static void iterative_me(SnowContext *s);
 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
 {
 int i;
 buf->base_buffer = base_buffer;
 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
 }
 }
 }
-static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
-int level;
-for(level=decomposition_count-1; level>=0; level--){
-switch(type){
-case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
-case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
-}
-}
-}
-static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
-const int support = type==1 ? 3 : 5;
-int level;
-if(type==2) return;
-for(level=decomposition_count-1; level>=0; level--){
-while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
-switch(type){
-case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
-break;
-case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
-break;
-}
-}
-}
-}
 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
 const int support = type==1 ? 3 : 5;
 int level;
 if(type==2) return;
 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
 break;
 }
 }
 }
-}
-static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
-DWTCompose cs[MAX_DECOMPOSITIONS];
-int y;
-ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
-for(y=0; y<height; y+=4)
-ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
-}
-static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
-const int w= b->width;
-const int h= b->height;
-int x, y;
-if(1){
-int run=0;
-int runs[w*h];
-int run_index=0;
-int max_index;
-for(y=0; y<h; y++){
-for(x=0; x<w; x++){
-int v, p=0;
-int /*ll=0, */l=0, lt=0, t=0, rt=0;
-v= src[x + y*stride];
-if(y){
-t= src[x + (y-1)*stride];
-if(x){
-lt= src[x - 1 + (y-1)*stride];
-}
-if(x + 1 < w){
-rt= src[x + 1 + (y-1)*stride];
-}
-}
-if(x){
-l= src[x - 1 + y*stride];
-/*if(x > 1){
-if(orientation==1) ll= src[y + (x-2)*stride];
-else               ll= src[x - 2 + y*stride];
-}*/
-}
-if(parent){
-int px= x>>1;
-int py= y>>1;
-if(px<b->parent->width && py<b->parent->height)
-p= parent[px + py*2*stride];
-}
-if(!(/*ll|*/l|lt|t|rt|p)){
-if(v){
-runs[run_index++]= run;
-run=0;
-}else{
-run++;
-}
-}
-}
-}
-max_index= run_index;
-runs[run_index++]= run;
-run_index=0;
-run= runs[run_index++];
-put_symbol2(&s->c, b->state[30], max_index, 0);
-if(run_index <= max_index)
-put_symbol2(&s->c, b->state[1], run, 3);
-for(y=0; y<h; y++){
-if(s->c.bytestream_end - s->c.bytestream < w*40){
-av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
-return -1;
-}
-for(x=0; x<w; x++){
-int v, p=0;
-int /*ll=0, */l=0, lt=0, t=0, rt=0;
-v= src[x + y*stride];
-if(y){
-t= src[x + (y-1)*stride];
-if(x){
-lt= src[x - 1 + (y-1)*stride];
-}
-if(x + 1 < w){
-rt= src[x + 1 + (y-1)*stride];
-}
-}
-if(x){
-l= src[x - 1 + y*stride];
-/*if(x > 1){
-if(orientation==1) ll= src[y + (x-2)*stride];
-else               ll= src[x - 2 + y*stride];
-}*/
-}
-if(parent){
-int px= x>>1;
-int py= y>>1;
-if(px<b->parent->width && py<b->parent->height)
-p= parent[px + py*2*stride];
-}
-if(/*ll|*/l|lt|t|rt|p){
-int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
-put_rac(&s->c, &b->state[0][context], !!v);
-}else{
-if(!run){
-run= runs[run_index++];
-if(run_index <= max_index)
-put_symbol2(&s->c, b->state[1], run, 3);
-assert(v);
-}else{
-run--;
-assert(!v);
-}
-}
-if(v){
-int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
-int l2= 2*FFABS(l) + (l<0);
-int t2= 2*FFABS(t) + (t<0);
-put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
-put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
-}
-}
-}
-}
-return 0;
-}
-static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
-//    encode_subband_qtree(s, b, src, parent, stride, orientation);
-//    encode_subband_z0run(s, b, src, parent, stride, orientation);
-return encode_subband_c0run(s, b, src, parent, stride, orientation);
-//    encode_subband_dzr(s, b, src, parent, stride, orientation);
 }
 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
 const int w= b->width;
 const int h= b->height;
 *d= *s;
 d->bytestream= bytestream;
 d->bytestream_start= bytestream_start;
 }
-//near copy & paste from dsputil, FIXME
-static int pix_sum(uint8_t * pix, int line_size, int w)
-{
-int s, i, j;
-s = 0;
-for (i = 0; i < w; i++) {
-for (j = 0; j < w; j++) {
-s += pix[0];
-pix ++;
-}
-pix += line_size - w;
-}
-return s;
-}
-//near copy & paste from dsputil, FIXME
-static int pix_norm1(uint8_t * pix, int line_size, int w)
-{
-int s, i, j;
-uint32_t *sq = ff_squareTbl + 256;
-s = 0;
-for (i = 0; i < w; i++) {
-for (j = 0; j < w; j ++) {
-s += sq[pix[0]];
-pix ++;
-}
-pix += line_size - w;
-}
-return s;
-}
 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
 const int w= s->b_width << s->block_max_depth;
 const int rem_depth= s->block_max_depth - level;
 const int index= (x + y*w) << rem_depth;
 const int block_w= 1<<rem_depth;
 (tr  ->mx * scale[tr  ->ref] + 128) >>8);
 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
 (top ->my * scale[top ->ref] + 128) >>8,
 (tr  ->my * scale[tr  ->ref] + 128) >>8);
 }
+}
+static av_always_inline int same_block(BlockNode *a, BlockNode *b){
+if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
+return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
+}else{
+return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
+}
+}
+static void decode_q_branch(SnowContext *s, int level, int x, int y){
+const int w= s->b_width << s->block_max_depth;
+const int rem_depth= s->block_max_depth - level;
+const int index= (x + y*w) << rem_depth;
+int trx= (x+1)<<rem_depth;
+const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+if(s->keyframe){
+set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
+return;
+}
+if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
+int type, mx, my;
+int l = left->color[0];
+int cb= left->color[1];
+int cr= left->color[2];
+int ref = 0;
+int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
+int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
+type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
+if(type){
+pred_mv(s, &mx, &my, 0, left, top, tr);
+l += get_symbol(&s->c, &s->block_state[32], 1);
+cb+= get_symbol(&s->c, &s->block_state[64], 1);
+cr+= get_symbol(&s->c, &s->block_state[96], 1);
+}else{
+if(s->ref_frames > 1)
+ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
+pred_mv(s, &mx, &my, ref, left, top, tr);
+mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
+my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
+}
+set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
+}else{
+decode_q_branch(s, level+1, 2*x+0, 2*y+0);
+decode_q_branch(s, level+1, 2*x+1, 2*y+0);
+decode_q_branch(s, level+1, 2*x+0, 2*y+1);
+decode_q_branch(s, level+1, 2*x+1, 2*y+1);
+}
+}
+static void decode_blocks(SnowContext *s){
+int x, y;
+int w= s->b_width;
+int h= s->b_height;
+for(y=0; y<h; y++){
+for(x=0; x<w; x++){
+decode_q_branch(s, 0, x, y);
+}
+}
+}
+static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
+static const uint8_t weight[64]={
+8,7,6,5,4,3,2,1,
+7,7,0,0,0,0,0,1,
+6,0,6,0,0,0,2,0,
+5,0,0,5,0,3,0,0,
+4,0,0,0,4,0,0,0,
+3,0,0,5,0,3,0,0,
+2,0,6,0,0,0,2,0,
+1,7,0,0,0,0,0,1,
+};
+static const uint8_t brane[256]={
+0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
+0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
+0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
+0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
+0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
+0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
+0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
+0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
+0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
+0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
+0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
+0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
+0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
+0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
+0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
+};
+static const uint8_t needs[16]={
+0,1,0,0,
+2,4,2,0,
+0,1,0,0,
+15
+};
+int x, y, b, r, l;
+int16_t tmpIt   [64*(32+HTAPS_MAX)];
+uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
+int16_t *tmpI= tmpIt;
+uint8_t *tmp2= tmp2t[0];
+const uint8_t *hpel[11];
+assert(dx<16 && dy<16);
+r= brane[dx + 16*dy]&15;
+l= brane[dx + 16*dy]>>4;
+b= needs[l] | needs[r];
+if(p && !p->diag_mc)
+b= 15;
+if(b&5){
+for(y=0; y < b_h+HTAPS_MAX-1; y++){
+for(x=0; x < b_w; x++){
+int a_1=src[x + HTAPS_MAX/2-4];
+int a0= src[x + HTAPS_MAX/2-3];
+int a1= src[x + HTAPS_MAX/2-2];
+int a2= src[x + HTAPS_MAX/2-1];
+int a3= src[x + HTAPS_MAX/2+0];
+int a4= src[x + HTAPS_MAX/2+1];
+int a5= src[x + HTAPS_MAX/2+2];
+int a6= src[x + HTAPS_MAX/2+3];
+int am=0;
+if(!p || p->fast_mc){
+am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+tmpI[x]= am;
+am= (am+16)>>5;
+}else{
+am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
+tmpI[x]= am;
+am= (am+32)>>6;
+}
+if(am&(~255)) am= ~(am>>31);
+tmp2[x]= am;
+}
+tmpI+= 64;
+tmp2+= stride;
+src += stride;
+}
+src -= stride*y;
+}
+src += HTAPS_MAX/2 - 1;
+tmp2= tmp2t[1];
+if(b&2){
+for(y=0; y < b_h; y++){
+for(x=0; x < b_w+1; x++){
+int a_1=src[x + (HTAPS_MAX/2-4)*stride];
+int a0= src[x + (HTAPS_MAX/2-3)*stride];
+int a1= src[x + (HTAPS_MAX/2-2)*stride];
+int a2= src[x + (HTAPS_MAX/2-1)*stride];
+int a3= src[x + (HTAPS_MAX/2+0)*stride];
+int a4= src[x + (HTAPS_MAX/2+1)*stride];
+int a5= src[x + (HTAPS_MAX/2+2)*stride];
+int a6= src[x + (HTAPS_MAX/2+3)*stride];
+int am=0;
+if(!p || p->fast_mc)
+am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
+else
+am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
+if(am&(~255)) am= ~(am>>31);
+tmp2[x]= am;
+}
+src += stride;
+tmp2+= stride;
+}
+src -= stride*y;
+}
+src += stride*(HTAPS_MAX/2 - 1);
+tmp2= tmp2t[2];
+tmpI= tmpIt;
+if(b&4){
+for(y=0; y < b_h; y++){
+for(x=0; x < b_w; x++){
+int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
+int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
+int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
+int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
+int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
+int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
+int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
+int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
+int am=0;
+if(!p || p->fast_mc)
+am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
+else
+am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
+if(am&(~255)) am= ~(am>>31);
+tmp2[x]= am;
+}
+tmpI+= 64;
+tmp2+= stride;
+}
+}
+hpel[ 0]= src;
+hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
+hpel[ 2]= src + 1;
+hpel[ 4]= tmp2t[1];
+hpel[ 5]= tmp2t[2];
+hpel[ 6]= tmp2t[1] + 1;
+hpel[ 8]= src + stride;
+hpel[ 9]= hpel[1] + stride;
+hpel[10]= hpel[8] + 1;
+if(b==15){
+const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
+const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
+const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
+const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
+dx&=7;
+dy&=7;
+for(y=0; y < b_h; y++){
+for(x=0; x < b_w; x++){
+dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
+(8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
+}
+src1+=stride;
+src2+=stride;
+src3+=stride;
+src4+=stride;
+dst +=stride;
+}
+}else{
+const uint8_t *src1= hpel[l];
+const uint8_t *src2= hpel[r];
+int a= weight[((dx&7) + (8*(dy&7)))];
+int b= 8-a;
+for(y=0; y < b_h; y++){
+for(x=0; x < b_w; x++){
+dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
+}
+src1+=stride;
+src2+=stride;
+dst +=stride;
+}
+}
+}
+#define mca(dx,dy,b_w)\
+static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
+uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
+assert(h==b_w);\
+mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
+}
+mca( 0, 0,16)
+mca( 8, 0,16)
+mca( 0, 8,16)
+mca( 8, 8,16)
+mca( 0, 0,8)
+mca( 8, 0,8)
+mca( 0, 8,8)
+mca( 8, 8,8)
+static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
+if(block->type & BLOCK_INTRA){
+int x, y;
+const int color = block->color[plane_index];
+const int color4= color*0x01010101;
+if(b_w==32){
+for(y=0; y < b_h; y++){
+*(uint32_t*)&dst[0 + y*stride]= color4;
+*(uint32_t*)&dst[4 + y*stride]= color4;
+*(uint32_t*)&dst[8 + y*stride]= color4;
+*(uint32_t*)&dst[12+ y*stride]= color4;
+*(uint32_t*)&dst[16+ y*stride]= color4;
+*(uint32_t*)&dst[20+ y*stride]= color4;
+*(uint32_t*)&dst[24+ y*stride]= color4;
+*(uint32_t*)&dst[28+ y*stride]= color4;
+}
+}else if(b_w==16){
+for(y=0; y < b_h; y++){
+*(uint32_t*)&dst[0 + y*stride]= color4;
+*(uint32_t*)&dst[4 + y*stride]= color4;
+*(uint32_t*)&dst[8 + y*stride]= color4;
+*(uint32_t*)&dst[12+ y*stride]= color4;
+}
+}else if(b_w==8){
+for(y=0; y < b_h; y++){
+*(uint32_t*)&dst[0 + y*stride]= color4;
+*(uint32_t*)&dst[4 + y*stride]= color4;
+}
+}else if(b_w==4){
+for(y=0; y < b_h; y++){
+*(uint32_t*)&dst[0 + y*stride]= color4;
+}
+}else{
+for(y=0; y < b_h; y++){
+for(x=0; x < b_w; x++){
+dst[x + y*stride]= color;
+}
+}
+}
+}else{
+uint8_t *src= s->last_picture[block->ref].data[plane_index];
+const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
+int mx= block->mx*scale;
+int my= block->my*scale;
+const int dx= mx&15;
+const int dy= my&15;
+const int tab_index= 3 - (b_w>>2) + (b_w>>4);
+sx += (mx>>4) - (HTAPS_MAX/2-1);
+sy += (my>>4) - (HTAPS_MAX/2-1);
+src += sx + sy*stride;
+if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
+|| (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
+ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
+src= tmp + MB_SIZE;
+}
+//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
+//        assert(!(b_w&(b_w-1)));
+assert(b_w>1 && b_h>1);
+assert((tab_index>=0 && tab_index<4) || b_w==32);
+if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
+mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
+else if(b_w==32){
+int y;
+for(y=0; y<b_h; y+=16){
+s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
+s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
+}
+}else if(b_w==b_h)
+s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
+else if(b_w==2*b_h){
+s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
+s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
+}else{
+assert(2*b_w==b_h);
+s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
+s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
+}
+}
+}
+void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+int y, x;
+IDWTELEM * dst;
+for(y=0; y<b_h; y++){
+//FIXME ugly misuse of obmc_stride
+const uint8_t *obmc1= obmc + y*obmc_stride;
+const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+dst = slice_buffer_get_line(sb, src_y + y);
+for(x=0; x<b_w; x++){
+int v=   obmc1[x] * block[3][x + y*src_stride]
++obmc2[x] * block[2][x + y*src_stride]
++obmc3[x] * block[1][x + y*src_stride]
++obmc4[x] * block[0][x + y*src_stride];
+v <<= 8 - LOG2_OBMC_MAX;
+if(FRAC_BITS != 8){
+v >>= 8 - FRAC_BITS;
+}
+if(add){
+v += dst[x + src_x];
+v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
+if(v&(~255)) v= ~(v>>31);
+dst8[x + y*src_stride] = v;
+}else{
+dst[x + src_x] -= v;
+}
+}
+}
+}
+//FIXME name cleanup (b_w, block_w, b_width stuff)
+static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
+const int b_width = s->b_width  << s->block_max_depth;
+const int b_height= s->b_height << s->block_max_depth;
+const int b_stride= b_width;
+BlockNode *lt= &s->block[b_x + b_y*b_stride];
+BlockNode *rt= lt+1;
+BlockNode *lb= lt+b_stride;
+BlockNode *rb= lb+1;
+uint8_t *block[4];
+int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
+uint8_t *tmp = s->scratchbuf;
+uint8_t *ptmp;
+int x,y;
+if(b_x<0){
+lt= rt;
+lb= rb;
+}else if(b_x + 1 >= b_width){
+rt= lt;
+rb= lb;
+}
+if(b_y<0){
+lt= lb;
+rt= rb;
+}else if(b_y + 1 >= b_height){
+lb= lt;
+rb= rt;
+}
+if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
+obmc -= src_x;
+b_w += src_x;
+if(!sliced && !offset_dst)
+dst -= src_x;
+src_x=0;
+}else if(src_x + b_w > w){
+b_w = w - src_x;
+}
+if(src_y<0){
+obmc -= src_y*obmc_stride;
+b_h += src_y;
+if(!sliced && !offset_dst)
+dst -= src_y*dst_stride;
+src_y=0;
+}else if(src_y + b_h> h){
+b_h = h - src_y;
+}
+if(b_w<=0 || b_h<=0) return;
+assert(src_stride > 2*MB_SIZE + 5);
+if(!sliced && offset_dst)
+dst += src_x + src_y*dst_stride;
+dst8+= src_x + src_y*src_stride;
+//    src += src_x + src_y*src_stride;
+ptmp= tmp + 3*tmp_step;
+block[0]= ptmp;
+ptmp+=tmp_step;
+pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
+if(same_block(lt, rt)){
+block[1]= block[0];
+}else{
+block[1]= ptmp;
+ptmp+=tmp_step;
+pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
+}
+if(same_block(lt, lb)){
+block[2]= block[0];
+}else if(same_block(rt, lb)){
+block[2]= block[1];
+}else{
+block[2]= ptmp;
+ptmp+=tmp_step;
+pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
+}
+if(same_block(lt, rb) ){
+block[3]= block[0];
+}else if(same_block(rt, rb)){
+block[3]= block[1];
+}else if(same_block(lb, rb)){
+block[3]= block[2];
+}else{
+block[3]= ptmp;
+pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
+}
+#if 0
+for(y=0; y<b_h; y++){
+for(x=0; x<b_w; x++){
+int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
+if(add) dst[x + y*dst_stride] += v;
+else    dst[x + y*dst_stride] -= v;
+}
+}
+for(y=0; y<b_h; y++){
+uint8_t *obmc2= obmc + (obmc_stride>>1);
+for(x=0; x<b_w; x++){
+int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
+if(add) dst[x + y*dst_stride] += v;
+else    dst[x + y*dst_stride] -= v;
+}
+}
+for(y=0; y<b_h; y++){
+uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
+for(x=0; x<b_w; x++){
+int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
+if(add) dst[x + y*dst_stride] += v;
+else    dst[x + y*dst_stride] -= v;
+}
+}
+for(y=0; y<b_h; y++){
+uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
+uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+for(x=0; x<b_w; x++){
+int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
+if(add) dst[x + y*dst_stride] += v;
+else    dst[x + y*dst_stride] -= v;
+}
+}
+#else
+if(sliced){
+s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+}else{
+for(y=0; y<b_h; y++){
+//FIXME ugly misuse of obmc_stride
+const uint8_t *obmc1= obmc + y*obmc_stride;
+const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+for(x=0; x<b_w; x++){
+int v=   obmc1[x] * block[3][x + y*src_stride]
++obmc2[x] * block[2][x + y*src_stride]
++obmc3[x] * block[1][x + y*src_stride]
++obmc4[x] * block[0][x + y*src_stride];
+v <<= 8 - LOG2_OBMC_MAX;
+if(FRAC_BITS != 8){
+v >>= 8 - FRAC_BITS;
+}
+if(add){
+v += dst[x + y*dst_stride];
+v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
+if(v&(~255)) v= ~(v>>31);
+dst8[x + y*src_stride] = v;
+}else{
+dst[x + y*dst_stride] -= v;
+}
+}
+}
+}
+#endif /* 0 */
+}
+static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
+Plane *p= &s->plane[plane_index];
+const int mb_w= s->b_width  << s->block_max_depth;
+const int mb_h= s->b_height << s->block_max_depth;
+int x, y, mb_x;
+int block_size = MB_SIZE >> s->block_max_depth;
+int block_w    = plane_index ? block_size/2 : block_size;
+const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
+int obmc_stride= plane_index ? block_size : 2*block_size;
+int ref_stride= s->current_picture.linesize[plane_index];
+uint8_t *dst8= s->current_picture.data[plane_index];
+int w= p->width;
+int h= p->height;
+if(s->keyframe || (s->avctx->debug&512)){
+if(mb_y==mb_h)
+return;
+if(add){
+for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
+//                DWTELEM * line = slice_buffer_get_line(sb, y);
+IDWTELEM * line = sb->line[y];
+for(x=0; x<w; x++){
+//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+v >>= FRAC_BITS;
+if(v&(~255)) v= ~(v>>31);
+dst8[x + y*ref_stride]= v;
+}
+}
+}else{
+for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
+//                DWTELEM * line = slice_buffer_get_line(sb, y);
+IDWTELEM * line = sb->line[y];
+for(x=0; x<w; x++){
+line[x] -= 128 << FRAC_BITS;
+//                    buf[x + y*w]-= 128<<FRAC_BITS;
+}
+}
+}
+return;
+}
+for(mb_x=0; mb_x<=mb_w; mb_x++){
+add_yblock(s, 1, sb, old_buffer, dst8, obmc,
+block_w*mb_x - block_w/2,
+block_w*mb_y - block_w/2,
+block_w, block_w,
+w, h,
+w, ref_stride, obmc_stride,
+mb_x - 1, mb_y - 1,
+add, 0, plane_index);
+}
+}
+static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
+Plane *p= &s->plane[plane_index];
+const int mb_w= s->b_width  << s->block_max_depth;
+const int mb_h= s->b_height << s->block_max_depth;
+int x, y, mb_x;
+int block_size = MB_SIZE >> s->block_max_depth;
+int block_w    = plane_index ? block_size/2 : block_size;
+const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
+const int obmc_stride= plane_index ? block_size : 2*block_size;
+int ref_stride= s->current_picture.linesize[plane_index];
+uint8_t *dst8= s->current_picture.data[plane_index];
+int w= p->width;
+int h= p->height;
+if(s->keyframe || (s->avctx->debug&512)){
+if(mb_y==mb_h)
+return;
+if(add){
+for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
+for(x=0; x<w; x++){
+int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+v >>= FRAC_BITS;
+if(v&(~255)) v= ~(v>>31);
+dst8[x + y*ref_stride]= v;
+}
+}
+}else{
+for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
+for(x=0; x<w; x++){
+buf[x + y*w]-= 128<<FRAC_BITS;
+}
+}
+}
+return;
+}
+for(mb_x=0; mb_x<=mb_w; mb_x++){
+add_yblock(s, 0, NULL, buf, dst8, obmc,
+block_w*mb_x - block_w/2,
+block_w*mb_y - block_w/2,
+block_w, block_w,
+w, h,
+w, ref_stride, obmc_stride,
+mb_x - 1, mb_y - 1,
+add, 1, plane_index);
+}
+}
+static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
+const int mb_h= s->b_height << s->block_max_depth;
+int mb_y;
+for(mb_y=0; mb_y<=mb_h; mb_y++)
+predict_slice(s, buf, plane_index, add, mb_y);
+}
+static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
+const int w= b->width;
+const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
+const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
+int x,y;
+if(s->qlog == LOSSLESS_QLOG) return;
+for(y=start_y; y<end_y; y++){
+//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
+IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
+for(x=0; x<w; x++){
+int i= line[x];
+if(i<0){
+line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
+}else if(i>0){
+line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
+}
+}
+}
+}
+static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
+const int w= b->width;
+int x,y;
+IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
+IDWTELEM * prev;
+if (start_y != 0)
+line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
+for(y=start_y; y<end_y; y++){
+prev = line;
+//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
+line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
+for(x=0; x<w; x++){
+if(x){
+if(use_median){
+if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
+else  line[x] += line[x - 1];
+}else{
+if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
+else  line[x] += line[x - 1];
+}
+}else{
+if(y) line[x] += prev[x];
+}
+}
+}
+}
+static void decode_qlogs(SnowContext *s){
+int plane_index, level, orientation;
+for(plane_index=0; plane_index<3; plane_index++){
+for(level=0; level<s->spatial_decomposition_count; level++){
+for(orientation=level ? 1:0; orientation<4; orientation++){
+int q;
+if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
+else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
+else                    q= get_symbol(&s->c, s->header_state, 1);
+s->plane[plane_index].band[level][orientation].qlog= q;
+}
+}
+}
+}
+#define GET_S(dst, check) \
+tmp= get_symbol(&s->c, s->header_state, 0);\
+if(!(check)){\
+av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
+return -1;\
+}\
+dst= tmp;
+static int decode_header(SnowContext *s){
+int plane_index, tmp;
+uint8_t kstate[32];
+memset(kstate, MID_STATE, sizeof(kstate));
+s->keyframe= get_rac(&s->c, kstate);
+if(s->keyframe || s->always_reset){
+reset_contexts(s);
+s->spatial_decomposition_type=
+s->qlog=
+s->qbias=
+s->mv_scale=
+s->block_max_depth= 0;
+}
+if(s->keyframe){
+GET_S(s->version, tmp <= 0U)
+s->always_reset= get_rac(&s->c, s->header_state);
+s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
+s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
+GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
+s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
+s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
+s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
+s->spatial_scalability= get_rac(&s->c, s->header_state);
+//        s->rate_scalability= get_rac(&s->c, s->header_state);
+GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
+s->max_ref_frames++;
+decode_qlogs(s);
+}
+if(!s->keyframe){
+if(get_rac(&s->c, s->header_state)){
+for(plane_index=0; plane_index<2; plane_index++){
+int htaps, i, sum=0;
+Plane *p= &s->plane[plane_index];
+p->diag_mc= get_rac(&s->c, s->header_state);
+htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
+if((unsigned)htaps > HTAPS_MAX || htaps==0)
+return -1;
+p->htaps= htaps;
+for(i= htaps/2; i; i--){
+p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
+sum += p->hcoeff[i];
+}
+p->hcoeff[0]= 32-sum;
+}
+s->plane[2].diag_mc= s->plane[1].diag_mc;
+s->plane[2].htaps  = s->plane[1].htaps;
+memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
+}
+if(get_rac(&s->c, s->header_state)){
+GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
+decode_qlogs(s);
+}
+}
+s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
+if(s->spatial_decomposition_type > 1U){
+av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
+return -1;
+}
+if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
+s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
+av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
+return -1;
+}
+s->qlog           += get_symbol(&s->c, s->header_state, 1);
+s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
+s->qbias          += get_symbol(&s->c, s->header_state, 1);
+s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
+if(s->block_max_depth > 1 || s->block_max_depth < 0){
+av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
+s->block_max_depth= 0;
+return -1;
+}
+return 0;
+}
+static void init_qexp(void){
+int i;
+double v=128;
+for(i=0; i<QROOT; i++){
+qexp[i]= lrintf(v);
+v *= pow(2, 1.0 / QROOT);
+}
+}
+static av_cold int common_init(AVCodecContext *avctx){
+SnowContext *s = avctx->priv_data;
+int width, height;
+int i, j;
+s->avctx= avctx;
+s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
+dsputil_init(&s->dsp, avctx);
+#define mcf(dx,dy)\
+s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
+s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
+s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
+s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
+s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
+s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
+mcf( 0, 0)
+mcf( 4, 0)
+mcf( 8, 0)
+mcf(12, 0)
+mcf( 0, 4)
+mcf( 4, 4)
+mcf( 8, 4)
+mcf(12, 4)
+mcf( 0, 8)
+mcf( 4, 8)
+mcf( 8, 8)
+mcf(12, 8)
+mcf( 0,12)
+mcf( 4,12)
+mcf( 8,12)
+mcf(12,12)
+#define mcfh(dx,dy)\
+s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
+s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
+mc_block_hpel ## dx ## dy ## 16;\
+s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
+s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
+mc_block_hpel ## dx ## dy ## 8;
+mcfh(0, 0)
+mcfh(8, 0)
+mcfh(0, 8)
+mcfh(8, 8)
+if(!qexp[0])
+init_qexp();
+//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
+width= s->avctx->width;
+height= s->avctx->height;
+s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
+s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
+for(i=0; i<MAX_REF_FRAMES; i++)
+for(j=0; j<MAX_REF_FRAMES; j++)
+scale_mv_ref[i][j] = 256*(i+1)/(j+1);
+s->avctx->get_buffer(s->avctx, &s->mconly_picture);
+s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
+return 0;
+}
+static int common_init_after_header(AVCodecContext *avctx){
+SnowContext *s = avctx->priv_data;
+int plane_index, level, orientation;
+for(plane_index=0; plane_index<3; plane_index++){
+int w= s->avctx->width;
+int h= s->avctx->height;
+if(plane_index){
+w>>= s->chroma_h_shift;
+h>>= s->chroma_v_shift;
+}
+s->plane[plane_index].width = w;
+s->plane[plane_index].height= h;
+for(level=s->spatial_decomposition_count-1; level>=0; level--){
+for(orientation=level ? 1 : 0; orientation<4; orientation++){
+SubBand *b= &s->plane[plane_index].band[level][orientation];
+b->buf= s->spatial_dwt_buffer;
+b->level= level;
+b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
+b->width = (w + !(orientation&1))>>1;
+b->height= (h + !(orientation>1))>>1;
+b->stride_line = 1 << (s->spatial_decomposition_count - level);
+b->buf_x_offset = 0;
+b->buf_y_offset = 0;
+if(orientation&1){
+b->buf += (w+1)>>1;
+b->buf_x_offset = (w+1)>>1;
+}
+if(orientation>1){
+b->buf += b->stride>>1;
+b->buf_y_offset = b->stride_line >> 1;
+}
+b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
+if(level)
+b->parent= &s->plane[plane_index].band[level-1][orientation];
+//FIXME avoid this realloc
+av_freep(&b->x_coeff);
+b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
+}
+w= (w+1)>>1;
+h= (h+1)>>1;
+}
+}
+return 0;
+}
+#define QUANTIZE2 0
+#if QUANTIZE2==1
+#define Q2_STEP 8
+static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
+SubBand *b= &p->band[level][orientation];
+int x, y;
+int xo=0;
+int yo=0;
+int step= 1 << (s->spatial_decomposition_count - level);
+if(orientation&1)
+xo= step>>1;
+if(orientation&2)
+yo= step>>1;
+//FIXME bias for nonzero ?
+//FIXME optimize
+memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
+for(y=0; y<p->height; y++){
+for(x=0; x<p->width; x++){
+int sx= (x-xo + step/2) / step / Q2_STEP;
+int sy= (y-yo + step/2) / step / Q2_STEP;
+int v= r0[x + y*p->width] - r1[x + y*p->width];
+assert(sx>=0 && sy>=0 && sx < score_stride);
+v= ((v+8)>>4)<<4;
+score[sx + sy*score_stride] += v*v;
+assert(score[sx + sy*score_stride] >= 0);
+}
+}
+}
+static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
+int level, orientation;
+for(level=0; level<s->spatial_decomposition_count; level++){
+for(orientation=level ? 1 : 0; orientation<4; orientation++){
+SubBand *b= &p->band[level][orientation];
+IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
+dequantize(s, b, dst, b->stride);
+}
+}
+}
+static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
+int level, orientation, ys, xs, x, y, pass;
+IDWTELEM best_dequant[height * stride];
+IDWTELEM idwt2_buffer[height * stride];
+const int score_stride= (width + 10)/Q2_STEP;
+int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
+int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
+int threshold= (s->m.lambda * s->m.lambda) >> 6;
+//FIXME pass the copy cleanly ?
+//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
+ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
+for(level=0; level<s->spatial_decomposition_count; level++){
+for(orientation=level ? 1 : 0; orientation<4; orientation++){
+SubBand *b= &p->band[level][orientation];
+IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
+DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
+assert(src == b->buf); // code does not depend on this but it is true currently
+quantize(s, b, dst, src, b->stride, s->qbias);
+}
+}
+for(pass=0; pass<1; pass++){
+if(s->qbias == 0) //keyframe
+continue;
+for(level=0; level<s->spatial_decomposition_count; level++){
+for(orientation=level ? 1 : 0; orientation<4; orientation++){
+SubBand *b= &p->band[level][orientation];
+IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
+IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
+for(ys= 0; ys<Q2_STEP; ys++){
+for(xs= 0; xs<Q2_STEP; xs++){
+memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
+dequantize_all(s, p, idwt2_buffer, width, height);
+ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
+find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
+memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
+for(y=ys; y<b->height; y+= Q2_STEP){
+for(x=xs; x<b->width; x+= Q2_STEP){
+if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
+if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
+//FIXME try more than just --
+}
+}
+dequantize_all(s, p, idwt2_buffer, width, height);
+ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
+find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
+for(y=ys; y<b->height; y+= Q2_STEP){
+for(x=xs; x<b->width; x+= Q2_STEP){
+int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
+if(score[score_idx] <= best_score[score_idx] + threshold){
+best_score[score_idx]= score[score_idx];
+if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
+if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
+//FIXME copy instead
+}
+}
+}
+}
+}
+}
+}
+}
+memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
+}
+#endif /* QUANTIZE2==1 */
+#define USE_HALFPEL_PLANE 0
+static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
+int p,x,y;
+assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
+for(p=0; p<3; p++){
+int is_chroma= !!p;
+int w= s->avctx->width  >>is_chroma;
+int h= s->avctx->height >>is_chroma;
+int ls= frame->linesize[p];
+uint8_t *src= frame->data[p];
+halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
+halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
+halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
+halfpel[0][p]= src;
+for(y=0; y<h; y++){
+for(x=0; x<w; x++){
+int i= y*ls + x;
+halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
+}
+}
+for(y=0; y<h; y++){
+for(x=0; x<w; x++){
+int i= y*ls + x;
+halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
+}
+}
+src= halfpel[1][p];
+for(y=0; y<h; y++){
+for(x=0; x<w; x++){
+int i= y*ls + x;
+halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
+}
+}
+//FIXME border!
+}
+}
+static void release_buffer(AVCodecContext *avctx){
+SnowContext *s = avctx->priv_data;
+int i;
+if(s->last_picture[s->max_ref_frames-1].data[0]){
+avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
+for(i=0; i<9; i++)
+if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
+av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
+}
+}
+static int frame_start(SnowContext *s){
+AVFrame tmp;
+int w= s->avctx->width; //FIXME round up to x16 ?
+int h= s->avctx->height;
+if(s->current_picture.data[0]){
+s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
+s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
+s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
+}
+release_buffer(s->avctx);
+tmp= s->last_picture[s->max_ref_frames-1];
+memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
+memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
+if(USE_HALFPEL_PLANE && s->current_picture.data[0])
+halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
+s->last_picture[0]= s->current_picture;
+s->current_picture= tmp;
+if(s->keyframe){
+s->ref_frames= 0;
+}else{
+int i;
+for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
+if(i && s->last_picture[i-1].key_frame)
+break;
+s->ref_frames= i;
+if(s->ref_frames==0){
+av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
+return -1;
+}
+}
+s->current_picture.reference= 1;
+if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
+av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+return -1;
+}
+s->current_picture.key_frame= s->keyframe;
+return 0;
+}
+static av_cold void common_end(SnowContext *s){
+int plane_index, level, orientation, i;
+av_freep(&s->spatial_dwt_buffer);
+av_freep(&s->spatial_idwt_buffer);
+s->m.me.temp= NULL;
+av_freep(&s->m.me.scratchpad);
+av_freep(&s->m.me.map);
+av_freep(&s->m.me.score_map);
+av_freep(&s->m.obmc_scratchpad);
+av_freep(&s->block);
+av_freep(&s->scratchbuf);
+for(i=0; i<MAX_REF_FRAMES; i++){
+av_freep(&s->ref_mvs[i]);
+av_freep(&s->ref_scores[i]);
+if(s->last_picture[i].data[0])
+s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
+}
+for(plane_index=0; plane_index<3; plane_index++){
+for(level=s->spatial_decomposition_count-1; level>=0; level--){
+for(orientation=level ? 1 : 0; orientation<4; orientation++){
+SubBand *b= &s->plane[plane_index].band[level][orientation];
+av_freep(&b->x_coeff);
+}
+}
+}
+}
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+avctx->pix_fmt= PIX_FMT_YUV420P;
+common_init(avctx);
+return 0;
+}
+static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
+const uint8_t *buf = avpkt->data;
+int buf_size = avpkt->size;
+SnowContext *s = avctx->priv_data;
+RangeCoder * const c= &s->c;
+int bytes_read;
+AVFrame *picture = data;
+int level, orientation, plane_index;
+ff_init_range_decoder(c, buf, buf_size);
+ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
+s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
+if(decode_header(s)<0)
+return -1;
+common_init_after_header(avctx);
+// realloc slice buffer for the case that spatial_decomposition_count changed
+slice_buffer_destroy(&s->sb);
+slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
+for(plane_index=0; plane_index<3; plane_index++){
+Plane *p= &s->plane[plane_index];
+p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
+&& p->hcoeff[1]==-10
+&& p->hcoeff[2]==2;
+}
+alloc_blocks(s);
+if(frame_start(s) < 0)
+return -1;
+//keyframe flag duplication mess FIXME
+if(avctx->debug&FF_DEBUG_PICT_INFO)
+av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
+decode_blocks(s);
+for(plane_index=0; plane_index<3; plane_index++){
+Plane *p= &s->plane[plane_index];
+int w= p->width;
+int h= p->height;
+int x, y;
+int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
+if(s->avctx->debug&2048){
+memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
+predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
+for(y=0; y<h; y++){
+for(x=0; x<w; x++){
+int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
+s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
+}
+}
+}
+{
+for(level=0; level<s->spatial_decomposition_count; level++){
+for(orientation=level ? 1 : 0; orientation<4; orientation++){
+SubBand *b= &p->band[level][orientation];
+unpack_coeffs(s, b, b->parent, orientation);
+}
+}
+}
+{
+const int mb_h= s->b_height << s->block_max_depth;
+const int block_size = MB_SIZE >> s->block_max_depth;
+const int block_w    = plane_index ? block_size/2 : block_size;
+int mb_y;
+DWTCompose cs[MAX_DECOMPOSITIONS];
+int yd=0, yq=0;
+int y;
+int end_y;
+ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
+for(mb_y=0; mb_y<=mb_h; mb_y++){
+int slice_starty = block_w*mb_y;
+int slice_h = block_w*(mb_y+1);
+if (!(s->keyframe || s->avctx->debug&512)){
+slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
+slice_h -= (block_w >> 1);
+}
+for(level=0; level<s->spatial_decomposition_count; level++){
+for(orientation=level ? 1 : 0; orientation<4; orientation++){
+SubBand *b= &p->band[level][orientation];
+int start_y;
+int end_y;
+int our_mb_start = mb_y;
+int our_mb_end = (mb_y + 1);
+const int extra= 3;
+start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
+end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
+if (!(s->keyframe || s->avctx->debug&512)){
+start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
+end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
+}
+start_y = FFMIN(b->height, start_y);
+end_y = FFMIN(b->height, end_y);
+if (start_y != end_y){
+if (orientation == 0){
+SubBand * correlate_band = &p->band[0][0];
+int correlate_end_y = FFMIN(b->height, end_y + 1);
+int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
+decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
+correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
+dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
+}
+else
+decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
+}
+}
+}
+for(; yd<slice_h; yd+=4){
+ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
+}
+if(s->qlog == LOSSLESS_QLOG){
+for(; yq<slice_h && yq<h; yq++){
+IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
+for(x=0; x<w; x++){
+line[x] <<= FRAC_BITS;
+}
+}
+}
+predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
+y = FFMIN(p->height, slice_starty);
+end_y = FFMIN(p->height, slice_h);
+while(y < end_y)
+slice_buffer_release(&s->sb, y++);
+}
+slice_buffer_flush(&s->sb);
+}
+}
+emms_c();
+release_buffer(avctx);
+if(!(s->avctx->debug&2048))
+*picture= s->current_picture;
+else
+*picture= s->mconly_picture;
+*data_size = sizeof(AVFrame);
+bytes_read= c->bytestream - c->bytestream_start;
+if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
+return bytes_read;
+}
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+SnowContext *s = avctx->priv_data;
+slice_buffer_destroy(&s->sb);
+common_end(s);
+return 0;
+}
+AVCodec snow_decoder = {
+"snow",
+CODEC_TYPE_VIDEO,
+CODEC_ID_SNOW,
+sizeof(SnowContext),
+decode_init,
+NULL,
+decode_end,
+decode_frame,
+CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
+NULL,
+.long_name = NULL_IF_CONFIG_SMALL("Snow"),
+};
+#if CONFIG_SNOW_ENCODER
+static av_cold int encode_init(AVCodecContext *avctx)
+{
+SnowContext *s = avctx->priv_data;
+int plane_index;
+if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
+av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
+"Use vstrict=-2 / -strict -2 to use it anyway.\n");
+return -1;
+}
+if(avctx->prediction_method == DWT_97
+&& (avctx->flags & CODEC_FLAG_QSCALE)
+&& avctx->global_quality == 0){
+av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
+return -1;
+}
+s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
+s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
+s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
+for(plane_index=0; plane_index<3; plane_index++){
+s->plane[plane_index].diag_mc= 1;
+s->plane[plane_index].htaps= 6;
+s->plane[plane_index].hcoeff[0]=  40;
+s->plane[plane_index].hcoeff[1]= -10;
+s->plane[plane_index].hcoeff[2]=   2;
+s->plane[plane_index].fast_mc= 1;
+}
+common_init(avctx);
+alloc_blocks(s);
+s->version=0;
+s->m.avctx   = avctx;
+s->m.flags   = avctx->flags;
+s->m.bit_rate= avctx->bit_rate;
+s->m.me.temp      =
+s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
+s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
+h263_encode_init(&s->m); //mv_penalty
+s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
+if(avctx->flags&CODEC_FLAG_PASS1){
+if(!avctx->stats_out)
+avctx->stats_out = av_mallocz(256);
+}
+if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
+if(ff_rate_control_init(&s->m) < 0)
+return -1;
+}
+s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
+avctx->coded_frame= &s->current_picture;
+switch(avctx->pix_fmt){
+//    case PIX_FMT_YUV444P:
+//    case PIX_FMT_YUV422P:
+case PIX_FMT_YUV420P:
+case PIX_FMT_GRAY8:
+//    case PIX_FMT_YUV411P:
+//    case PIX_FMT_YUV410P:
+s->colorspace_type= 0;
+break;
+/*    case PIX_FMT_RGB32:
+s->colorspace= 1;
+break;*/
+default:
+av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
+return -1;
+}
+//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
+s->chroma_h_shift= 1;
+s->chroma_v_shift= 1;
+ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
+ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
+s->avctx->get_buffer(s->avctx, &s->input_picture);
+if(s->avctx->me_method == ME_ITER){
+int i;
+int size= s->b_width * s->b_height << 2*s->block_max_depth;
+for(i=0; i<s->max_ref_frames; i++){
+s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
+s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
+}
+}
+return 0;
+}
+//near copy & paste from dsputil, FIXME
+static int pix_sum(uint8_t * pix, int line_size, int w)
+{
+int s, i, j;
+s = 0;
+for (i = 0; i < w; i++) {
+for (j = 0; j < w; j++) {
+s += pix[0];
+pix ++;
+}
+pix += line_size - w;
+}
+return s;
+}
+//near copy & paste from dsputil, FIXME
+static int pix_norm1(uint8_t * pix, int line_size, int w)
+{
+int s, i, j;
+uint32_t *sq = ff_squareTbl + 256;
+s = 0;
+for (i = 0; i < w; i++) {
+for (j = 0; j < w; j ++) {
+s += sq[pix[0]];
+pix ++;
+}
+pix += line_size - w;
+}
+return s;
 }
 //FIXME copy&paste
 #define P_LEFT P[1]
 #define P_TOP P[2]
 memcpy(s->block_state, p_state, sizeof(s->block_state));
 return score;
 }
 }
-static av_always_inline int same_block(BlockNode *a, BlockNode *b){
-if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
-return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
-}else{
-return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
-}
-}
 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
 const int w= s->b_width  << s->block_max_depth;
 const int rem_depth= s->block_max_depth - level;
 const int index= (x + y*w) << rem_depth;
 int trx= (x+1)<<rem_depth;
 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
 }
-}
-static void decode_q_branch(SnowContext *s, int level, int x, int y){
-const int w= s->b_width << s->block_max_depth;
-const int rem_depth= s->block_max_depth - level;
-const int index= (x + y*w) << rem_depth;
-int trx= (x+1)<<rem_depth;
-const BlockNode *left  = x ? &s->block[index-1] : &null_block;
-const BlockNode *top   = y ? &s->block[index-w] : &null_block;
-const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
-const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
-int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
-if(s->keyframe){
-set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
-return;
-}
-if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
-int type, mx, my;
-int l = left->color[0];
-int cb= left->color[1];
-int cr= left->color[2];
-int ref = 0;
-int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
-int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
-int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
-type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
-if(type){
-pred_mv(s, &mx, &my, 0, left, top, tr);
-l += get_symbol(&s->c, &s->block_state[32], 1);
-cb+= get_symbol(&s->c, &s->block_state[64], 1);
-cr+= get_symbol(&s->c, &s->block_state[96], 1);
-}else{
-if(s->ref_frames > 1)
-ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
-pred_mv(s, &mx, &my, ref, left, top, tr);
-mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
-my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
-}
-set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
-}else{
-decode_q_branch(s, level+1, 2*x+0, 2*y+0);
-decode_q_branch(s, level+1, 2*x+1, 2*y+0);
-decode_q_branch(s, level+1, 2*x+0, 2*y+1);
-decode_q_branch(s, level+1, 2*x+1, 2*y+1);
-}
-}
-static void encode_blocks(SnowContext *s, int search){
-int x, y;
-int w= s->b_width;
-int h= s->b_height;
-if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
-iterative_me(s);
-for(y=0; y<h; y++){
-if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
-av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
-return;
-}
-for(x=0; x<w; x++){
-if(s->avctx->me_method == ME_ITER || !search)
-encode_q_branch2(s, 0, x, y);
-else
-encode_q_branch (s, 0, x, y);
-}
-}
-}
-static void decode_blocks(SnowContext *s){
-int x, y;
-int w= s->b_width;
-int h= s->b_height;
-for(y=0; y<h; y++){
-for(x=0; x<w; x++){
-decode_q_branch(s, 0, x, y);
-}
-}
-}
-static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
-static const uint8_t weight[64]={
-8,7,6,5,4,3,2,1,
-7,7,0,0,0,0,0,1,
-6,0,6,0,0,0,2,0,
-5,0,0,5,0,3,0,0,
-4,0,0,0,4,0,0,0,
-3,0,0,5,0,3,0,0,
-2,0,6,0,0,0,2,0,
-1,7,0,0,0,0,0,1,
-};
-static const uint8_t brane[256]={
-0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
-0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
-0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
-0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
-0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
-0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
-0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
-0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
-0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
-0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
-0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
-0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
-0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
-0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
-0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
-0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
-};
-static const uint8_t needs[16]={
-0,1,0,0,
-2,4,2,0,
-0,1,0,0,
-15
-};
-int x, y, b, r, l;
-int16_t tmpIt   [64*(32+HTAPS_MAX)];
-uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
-int16_t *tmpI= tmpIt;
-uint8_t *tmp2= tmp2t[0];
-const uint8_t *hpel[11];
-assert(dx<16 && dy<16);
-r= brane[dx + 16*dy]&15;
-l= brane[dx + 16*dy]>>4;
-b= needs[l] | needs[r];
-if(p && !p->diag_mc)
-b= 15;
-if(b&5){
-for(y=0; y < b_h+HTAPS_MAX-1; y++){
-for(x=0; x < b_w; x++){
-int a_1=src[x + HTAPS_MAX/2-4];
-int a0= src[x + HTAPS_MAX/2-3];
-int a1= src[x + HTAPS_MAX/2-2];
-int a2= src[x + HTAPS_MAX/2-1];
-int a3= src[x + HTAPS_MAX/2+0];
-int a4= src[x + HTAPS_MAX/2+1];
-int a5= src[x + HTAPS_MAX/2+2];
-int a6= src[x + HTAPS_MAX/2+3];
-int am=0;
-if(!p || p->fast_mc){
-am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
-tmpI[x]= am;
-am= (am+16)>>5;
-}else{
-am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
-tmpI[x]= am;
-am= (am+32)>>6;
-}
-if(am&(~255)) am= ~(am>>31);
-tmp2[x]= am;
-}
-tmpI+= 64;
-tmp2+= stride;
-src += stride;
-}
-src -= stride*y;
-}
-src += HTAPS_MAX/2 - 1;
-tmp2= tmp2t[1];
-if(b&2){
-for(y=0; y < b_h; y++){
-for(x=0; x < b_w+1; x++){
-int a_1=src[x + (HTAPS_MAX/2-4)*stride];
-int a0= src[x + (HTAPS_MAX/2-3)*stride];
-int a1= src[x + (HTAPS_MAX/2-2)*stride];
-int a2= src[x + (HTAPS_MAX/2-1)*stride];
-int a3= src[x + (HTAPS_MAX/2+0)*stride];
-int a4= src[x + (HTAPS_MAX/2+1)*stride];
-int a5= src[x + (HTAPS_MAX/2+2)*stride];
-int a6= src[x + (HTAPS_MAX/2+3)*stride];
-int am=0;
-if(!p || p->fast_mc)
-am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
-else
-am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
-if(am&(~255)) am= ~(am>>31);
-tmp2[x]= am;
-}
-src += stride;
-tmp2+= stride;
-}
-src -= stride*y;
-}
-src += stride*(HTAPS_MAX/2 - 1);
-tmp2= tmp2t[2];
-tmpI= tmpIt;
-if(b&4){
-for(y=0; y < b_h; y++){
-for(x=0; x < b_w; x++){
-int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
-int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
-int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
-int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
-int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
-int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
-int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
-int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
-int am=0;
-if(!p || p->fast_mc)
-am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
-else
-am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
-if(am&(~255)) am= ~(am>>31);
-tmp2[x]= am;
-}
-tmpI+= 64;
-tmp2+= stride;
-}
-}
-hpel[ 0]= src;
-hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
-hpel[ 2]= src + 1;
-hpel[ 4]= tmp2t[1];
-hpel[ 5]= tmp2t[2];
-hpel[ 6]= tmp2t[1] + 1;
-hpel[ 8]= src + stride;
-hpel[ 9]= hpel[1] + stride;
-hpel[10]= hpel[8] + 1;
-if(b==15){
-const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
-const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
-const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
-const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
-dx&=7;
-dy&=7;
-for(y=0; y < b_h; y++){
-for(x=0; x < b_w; x++){
-dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
-(8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
-}
-src1+=stride;
-src2+=stride;
-src3+=stride;
-src4+=stride;
-dst +=stride;
-}
-}else{
-const uint8_t *src1= hpel[l];
-const uint8_t *src2= hpel[r];
-int a= weight[((dx&7) + (8*(dy&7)))];
-int b= 8-a;
-for(y=0; y < b_h; y++){
-for(x=0; x < b_w; x++){
-dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
-}
-src1+=stride;
-src2+=stride;
-dst +=stride;
-}
-}
-}
-#define mca(dx,dy,b_w)\
-static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
-uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
-assert(h==b_w);\
-mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
-}
-mca( 0, 0,16)
-mca( 8, 0,16)
-mca( 0, 8,16)
-mca( 8, 8,16)
-mca( 0, 0,8)
-mca( 8, 0,8)
-mca( 0, 8,8)
-mca( 8, 8,8)
-static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
-if(block->type & BLOCK_INTRA){
-int x, y;
-const int color = block->color[plane_index];
-const int color4= color*0x01010101;
-if(b_w==32){
-for(y=0; y < b_h; y++){
-*(uint32_t*)&dst[0 + y*stride]= color4;
-*(uint32_t*)&dst[4 + y*stride]= color4;
-*(uint32_t*)&dst[8 + y*stride]= color4;
-*(uint32_t*)&dst[12+ y*stride]= color4;
-*(uint32_t*)&dst[16+ y*stride]= color4;
-*(uint32_t*)&dst[20+ y*stride]= color4;
-*(uint32_t*)&dst[24+ y*stride]= color4;
-*(uint32_t*)&dst[28+ y*stride]= color4;
-}
-}else if(b_w==16){
-for(y=0; y < b_h; y++){
-*(uint32_t*)&dst[0 + y*stride]= color4;
-*(uint32_t*)&dst[4 + y*stride]= color4;
-*(uint32_t*)&dst[8 + y*stride]= color4;
-*(uint32_t*)&dst[12+ y*stride]= color4;
-}
-}else if(b_w==8){
-for(y=0; y < b_h; y++){
-*(uint32_t*)&dst[0 + y*stride]= color4;
-*(uint32_t*)&dst[4 + y*stride]= color4;
-}
-}else if(b_w==4){
-for(y=0; y < b_h; y++){
-*(uint32_t*)&dst[0 + y*stride]= color4;
-}
-}else{
-for(y=0; y < b_h; y++){
-for(x=0; x < b_w; x++){
-dst[x + y*stride]= color;
-}
-}
-}
-}else{
-uint8_t *src= s->last_picture[block->ref].data[plane_index];
-const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
-int mx= block->mx*scale;
-int my= block->my*scale;
-const int dx= mx&15;
-const int dy= my&15;
-const int tab_index= 3 - (b_w>>2) + (b_w>>4);
-sx += (mx>>4) - (HTAPS_MAX/2-1);
-sy += (my>>4) - (HTAPS_MAX/2-1);
-src += sx + sy*stride;
-if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
-|| (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
-ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
-src= tmp + MB_SIZE;
-}
-//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
-//        assert(!(b_w&(b_w-1)));
-assert(b_w>1 && b_h>1);
-assert((tab_index>=0 && tab_index<4) || b_w==32);
-if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
-mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
-else if(b_w==32){
-int y;
-for(y=0; y<b_h; y+=16){
-s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
-s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
-}
-}else if(b_w==b_h)
-s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
-else if(b_w==2*b_h){
-s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
-s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
-}else{
-assert(2*b_w==b_h);
-s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
-s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
-}
-}
-}
-void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
-int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
-int y, x;
-IDWTELEM * dst;
-for(y=0; y<b_h; y++){
-//FIXME ugly misuse of obmc_stride
-const uint8_t *obmc1= obmc + y*obmc_stride;
-const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
-const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
-const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
-dst = slice_buffer_get_line(sb, src_y + y);
-for(x=0; x<b_w; x++){
-int v=   obmc1[x] * block[3][x + y*src_stride]
-+obmc2[x] * block[2][x + y*src_stride]
-+obmc3[x] * block[1][x + y*src_stride]
-+obmc4[x] * block[0][x + y*src_stride];
-v <<= 8 - LOG2_OBMC_MAX;
-if(FRAC_BITS != 8){
-v >>= 8 - FRAC_BITS;
-}
-if(add){
-v += dst[x + src_x];
-v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
-if(v&(~255)) v= ~(v>>31);
-dst8[x + y*src_stride] = v;
-}else{
-dst[x + src_x] -= v;
-}
-}
-}
-}
-//FIXME name cleanup (b_w, block_w, b_width stuff)
-static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
-const int b_width = s->b_width  << s->block_max_depth;
-const int b_height= s->b_height << s->block_max_depth;
-const int b_stride= b_width;
-BlockNode *lt= &s->block[b_x + b_y*b_stride];
-BlockNode *rt= lt+1;
-BlockNode *lb= lt+b_stride;
-BlockNode *rb= lb+1;
-uint8_t *block[4];
-int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
-uint8_t *tmp = s->scratchbuf;
-uint8_t *ptmp;
-int x,y;
-if(b_x<0){
-lt= rt;
-lb= rb;
-}else if(b_x + 1 >= b_width){
-rt= lt;
-rb= lb;
-}
-if(b_y<0){
-lt= lb;
-rt= rb;
-}else if(b_y + 1 >= b_height){
-lb= lt;
-rb= rt;
-}
-if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
-obmc -= src_x;
-b_w += src_x;
-if(!sliced && !offset_dst)
-dst -= src_x;
-src_x=0;
-}else if(src_x + b_w > w){
-b_w = w - src_x;
-}
-if(src_y<0){
-obmc -= src_y*obmc_stride;
-b_h += src_y;
-if(!sliced && !offset_dst)
-dst -= src_y*dst_stride;
-src_y=0;
-}else if(src_y + b_h> h){
-b_h = h - src_y;
-}
-if(b_w<=0 || b_h<=0) return;
-assert(src_stride > 2*MB_SIZE + 5);
-if(!sliced && offset_dst)
-dst += src_x + src_y*dst_stride;
-dst8+= src_x + src_y*src_stride;
-//    src += src_x + src_y*src_stride;
-ptmp= tmp + 3*tmp_step;
-block[0]= ptmp;
-ptmp+=tmp_step;
-pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
-if(same_block(lt, rt)){
-block[1]= block[0];
-}else{
-block[1]= ptmp;
-ptmp+=tmp_step;
-pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
-}
-if(same_block(lt, lb)){
-block[2]= block[0];
-}else if(same_block(rt, lb)){
-block[2]= block[1];
-}else{
-block[2]= ptmp;
-ptmp+=tmp_step;
-pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
-}
-if(same_block(lt, rb) ){
-block[3]= block[0];
-}else if(same_block(rt, rb)){
-block[3]= block[1];
-}else if(same_block(lb, rb)){
-block[3]= block[2];
-}else{
-block[3]= ptmp;
-pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
-}
-#if 0
-for(y=0; y<b_h; y++){
-for(x=0; x<b_w; x++){
-int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
-if(add) dst[x + y*dst_stride] += v;
-else    dst[x + y*dst_stride] -= v;
-}
-}
-for(y=0; y<b_h; y++){
-uint8_t *obmc2= obmc + (obmc_stride>>1);
-for(x=0; x<b_w; x++){
-int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
-if(add) dst[x + y*dst_stride] += v;
-else    dst[x + y*dst_stride] -= v;
-}
-}
-for(y=0; y<b_h; y++){
-uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
-for(x=0; x<b_w; x++){
-int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
-if(add) dst[x + y*dst_stride] += v;
-else    dst[x + y*dst_stride] -= v;
-}
-}
-for(y=0; y<b_h; y++){
-uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
-uint8_t *obmc4= obmc3+ (obmc_stride>>1);
-for(x=0; x<b_w; x++){
-int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
-if(add) dst[x + y*dst_stride] += v;
-else    dst[x + y*dst_stride] -= v;
-}
-}
-#else
-if(sliced){
-s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
-}else{
-for(y=0; y<b_h; y++){
-//FIXME ugly misuse of obmc_stride
-const uint8_t *obmc1= obmc + y*obmc_stride;
-const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
-const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
-const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
-for(x=0; x<b_w; x++){
-int v=   obmc1[x] * block[3][x + y*src_stride]
-+obmc2[x] * block[2][x + y*src_stride]
-+obmc3[x] * block[1][x + y*src_stride]
-+obmc4[x] * block[0][x + y*src_stride];
-v <<= 8 - LOG2_OBMC_MAX;
-if(FRAC_BITS != 8){
-v >>= 8 - FRAC_BITS;
-}
-if(add){
-v += dst[x + y*dst_stride];
-v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
-if(v&(~255)) v= ~(v>>31);
-dst8[x + y*src_stride] = v;
-}else{
-dst[x + y*dst_stride] -= v;
-}
-}
-}
-}
-#endif /* 0 */
-}
-static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
-Plane *p= &s->plane[plane_index];
-const int mb_w= s->b_width  << s->block_max_depth;
-const int mb_h= s->b_height << s->block_max_depth;
-int x, y, mb_x;
-int block_size = MB_SIZE >> s->block_max_depth;
-int block_w    = plane_index ? block_size/2 : block_size;
-const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
-int obmc_stride= plane_index ? block_size : 2*block_size;
-int ref_stride= s->current_picture.linesize[plane_index];
-uint8_t *dst8= s->current_picture.data[plane_index];
-int w= p->width;
-int h= p->height;
-if(s->keyframe || (s->avctx->debug&512)){
-if(mb_y==mb_h)
-return;
-if(add){
-for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
-//                DWTELEM * line = slice_buffer_get_line(sb, y);
-IDWTELEM * line = sb->line[y];
-for(x=0; x<w; x++){
-//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
-int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
-v >>= FRAC_BITS;
-if(v&(~255)) v= ~(v>>31);
-dst8[x + y*ref_stride]= v;
-}
-}
-}else{
-for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
-//                DWTELEM * line = slice_buffer_get_line(sb, y);
-IDWTELEM * line = sb->line[y];
-for(x=0; x<w; x++){
-line[x] -= 128 << FRAC_BITS;
-//                    buf[x + y*w]-= 128<<FRAC_BITS;
-}
-}
-}
-return;
-}
-for(mb_x=0; mb_x<=mb_w; mb_x++){
-add_yblock(s, 1, sb, old_buffer, dst8, obmc,
-block_w*mb_x - block_w/2,
-block_w*mb_y - block_w/2,
-block_w, block_w,
-w, h,
-w, ref_stride, obmc_stride,
-mb_x - 1, mb_y - 1,
-add, 0, plane_index);
-}
-}
-static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
-Plane *p= &s->plane[plane_index];
-const int mb_w= s->b_width  << s->block_max_depth;
-const int mb_h= s->b_height << s->block_max_depth;
-int x, y, mb_x;
-int block_size = MB_SIZE >> s->block_max_depth;
-int block_w    = plane_index ? block_size/2 : block_size;
-const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
-const int obmc_stride= plane_index ? block_size : 2*block_size;
-int ref_stride= s->current_picture.linesize[plane_index];
-uint8_t *dst8= s->current_picture.data[plane_index];
-int w= p->width;
-int h= p->height;
-if(s->keyframe || (s->avctx->debug&512)){
-if(mb_y==mb_h)
-return;
-if(add){
-for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
-for(x=0; x<w; x++){
-int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
-v >>= FRAC_BITS;
-if(v&(~255)) v= ~(v>>31);
-dst8[x + y*ref_stride]= v;
-}
-}
-}else{
-for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
-for(x=0; x<w; x++){
-buf[x + y*w]-= 128<<FRAC_BITS;
-}
-}
-}
-return;
-}
-for(mb_x=0; mb_x<=mb_w; mb_x++){
-add_yblock(s, 0, NULL, buf, dst8, obmc,
-block_w*mb_x - block_w/2,
-block_w*mb_y - block_w/2,
-block_w, block_w,
-w, h,
-w, ref_stride, obmc_stride,
-mb_x - 1, mb_y - 1,
-add, 1, plane_index);
-}
-}
-static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
-const int mb_h= s->b_height << s->block_max_depth;
-int mb_y;
-for(mb_y=0; mb_y<=mb_h; mb_y++)
-predict_slice(s, buf, plane_index, add, mb_y);
 }
 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
 int i, x2, y2;
 Plane *p= &s->plane[plane_index];
 }
 }
 return distortion + rate*penalty_factor;
 }
+static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
+int level;
+for(level=decomposition_count-1; level>=0; level--){
+switch(type){
+case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
+case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
+}
+}
+}
+static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
+const int support = type==1 ? 3 : 5;
+int level;
+if(type==2) return;
+for(level=decomposition_count-1; level>=0; level--){
+while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
+switch(type){
+case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
+break;
+case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
+break;
+}
+}
+}
+}
+static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
+DWTCompose cs[MAX_DECOMPOSITIONS];
+int y;
+ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
+for(y=0; y<height; y+=4)
+ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
+}
+static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
+const int w= b->width;
+const int h= b->height;
+int x, y;
+if(1){
+int run=0;
+int runs[w*h];
+int run_index=0;
+int max_index;
+for(y=0; y<h; y++){
+for(x=0; x<w; x++){
+int v, p=0;
+int /*ll=0, */l=0, lt=0, t=0, rt=0;
+v= src[x + y*stride];
+if(y){
+t= src[x + (y-1)*stride];
+if(x){
+lt= src[x - 1 + (y-1)*stride];
+}
+if(x + 1 < w){
+rt= src[x + 1 + (y-1)*stride];
+}
+}
+if(x){
+l= src[x - 1 + y*stride];
+/*if(x > 1){
+if(orientation==1) ll= src[y + (x-2)*stride];
+else               ll= src[x - 2 + y*stride];
+}*/
+}
+if(parent){
+int px= x>>1;
+int py= y>>1;
+if(px<b->parent->width && py<b->parent->height)
+p= parent[px + py*2*stride];
+}
+if(!(/*ll|*/l|lt|t|rt|p)){
+if(v){
+runs[run_index++]= run;
+run=0;
+}else{
+run++;
+}
+}
+}
+}
+max_index= run_index;
+runs[run_index++]= run;
+run_index=0;
+run= runs[run_index++];
+put_symbol2(&s->c, b->state[30], max_index, 0);
+if(run_index <= max_index)
+put_symbol2(&s->c, b->state[1], run, 3);
+for(y=0; y<h; y++){
+if(s->c.bytestream_end - s->c.bytestream < w*40){
+av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+return -1;
+}
+for(x=0; x<w; x++){
+int v, p=0;
+int /*ll=0, */l=0, lt=0, t=0, rt=0;
+v= src[x + y*stride];
+if(y){
+t= src[x + (y-1)*stride];
+if(x){
+lt= src[x - 1 + (y-1)*stride];
+}
+if(x + 1 < w){
+rt= src[x + 1 + (y-1)*stride];
+}
+}
+if(x){
+l= src[x - 1 + y*stride];
+/*if(x > 1){
+if(orientation==1) ll= src[y + (x-2)*stride];
+else               ll= src[x - 2 + y*stride];
+}*/
+}
+if(parent){
+int px= x>>1;
+int py= y>>1;
+if(px<b->parent->width && py<b->parent->height)
+p= parent[px + py*2*stride];
+}
+if(/*ll|*/l|lt|t|rt|p){
+int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
+put_rac(&s->c, &b->state[0][context], !!v);
+}else{
+if(!run){
+run= runs[run_index++];
+if(run_index <= max_index)
+put_symbol2(&s->c, b->state[1], run, 3);
+assert(v);
+}else{
+run--;
+assert(!v);
+}
+}
+if(v){
+int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
+int l2= 2*FFABS(l) + (l<0);
+int t2= 2*FFABS(t) + (t<0);
+put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
+put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
+}
+}
+}
+}
+return 0;
+}
+static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
+//    encode_subband_qtree(s, b, src, parent, stride, orientation);
+//    encode_subband_z0run(s, b, src, parent, stride, orientation);
+return encode_subband_c0run(s, b, src, parent, stride, orientation);
+//    encode_subband_dzr(s, b, src, parent, stride, orientation);
+}
 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
 const int b_stride= s->b_width << s->block_max_depth;
 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
 BlockNode backup= *block;
 int rd, index, value;
 }
 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
 }
 }
+static void encode_blocks(SnowContext *s, int search){
+int x, y;
+int w= s->b_width;
+int h= s->b_height;
+if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
+iterative_me(s);
+for(y=0; y<h; y++){
+if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
+av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+return;
+}
+for(x=0; x<w; x++){
+if(s->avctx->me_method == ME_ITER || !search)
+encode_q_branch2(s, 0, x, y);
+else
+encode_q_branch (s, 0, x, y);
+}
+}
+}
 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
 const int w= b->width;
 const int h= b->height;
 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
 }
 }
 }
 }
-static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
-const int w= b->width;
-const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
-const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
-const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
-int x,y;
-if(s->qlog == LOSSLESS_QLOG) return;
-for(y=start_y; y<end_y; y++){
-//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
-IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
-for(x=0; x<w; x++){
-int i= line[x];
-if(i<0){
-line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
-}else if(i>0){
-line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
-}
-}
-}
-}
 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
 const int w= b->width;
 const int h= b->height;
 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
 else  src[i] -= src[i - 1];
 }
 }else{
 if(y) src[i] -= src[i - stride];
-}
-}
-}
-}
-static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
-const int w= b->width;
-int x,y;
-IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
-IDWTELEM * prev;
-if (start_y != 0)
-line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
-for(y=start_y; y<end_y; y++){
-prev = line;
-//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
-line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
-for(x=0; x<w; x++){
-if(x){
-if(use_median){
-if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
-else  line[x] += line[x - 1];
-}else{
-if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
-else  line[x] += line[x - 1];
-}
-}else{
-if(y) line[x] += prev[x];
 }
 }
 }
 }
 s->last_qlog                        = s->qlog;
 s->last_qbias                       = s->qbias;
 s->last_mv_scale                    = s->mv_scale;
 s->last_block_max_depth             = s->block_max_depth;
 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
-}
-static void decode_qlogs(SnowContext *s){
-int plane_index, level, orientation;
-for(plane_index=0; plane_index<3; plane_index++){
-for(level=0; level<s->spatial_decomposition_count; level++){
-for(orientation=level ? 1:0; orientation<4; orientation++){
-int q;
-if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
-else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
-else                    q= get_symbol(&s->c, s->header_state, 1);
-s->plane[plane_index].band[level][orientation].qlog= q;
-}
-}
-}
-}
-#define GET_S(dst, check) \
-tmp= get_symbol(&s->c, s->header_state, 0);\
-if(!(check)){\
-av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
-return -1;\
-}\
-dst= tmp;
-static int decode_header(SnowContext *s){
-int plane_index, tmp;
-uint8_t kstate[32];
-memset(kstate, MID_STATE, sizeof(kstate));
-s->keyframe= get_rac(&s->c, kstate);
-if(s->keyframe || s->always_reset){
-reset_contexts(s);
-s->spatial_decomposition_type=
-s->qlog=
-s->qbias=
-s->mv_scale=
-s->block_max_depth= 0;
-}
-if(s->keyframe){
-GET_S(s->version, tmp <= 0U)
-s->always_reset= get_rac(&s->c, s->header_state);
-s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
-s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
-GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
-s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
-s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
-s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
-s->spatial_scalability= get_rac(&s->c, s->header_state);
-//        s->rate_scalability= get_rac(&s->c, s->header_state);
-GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
-s->max_ref_frames++;
-decode_qlogs(s);
-}
-if(!s->keyframe){
-if(get_rac(&s->c, s->header_state)){
-for(plane_index=0; plane_index<2; plane_index++){
-int htaps, i, sum=0;
-Plane *p= &s->plane[plane_index];
-p->diag_mc= get_rac(&s->c, s->header_state);
-htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
-if((unsigned)htaps > HTAPS_MAX || htaps==0)
-return -1;
-p->htaps= htaps;
-for(i= htaps/2; i; i--){
-p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
-sum += p->hcoeff[i];
-}
-p->hcoeff[0]= 32-sum;
-}
-s->plane[2].diag_mc= s->plane[1].diag_mc;
-s->plane[2].htaps  = s->plane[1].htaps;
-memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
-}
-if(get_rac(&s->c, s->header_state)){
-GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
-decode_qlogs(s);
-}
-}
-s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
-if(s->spatial_decomposition_type > 1U){
-av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
-return -1;
-}
-if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
-s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
-av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
-return -1;
-}
-s->qlog           += get_symbol(&s->c, s->header_state, 1);
-s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
-s->qbias          += get_symbol(&s->c, s->header_state, 1);
-s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
-if(s->block_max_depth > 1 || s->block_max_depth < 0){
-av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
-s->block_max_depth= 0;
-return -1;
-}
-return 0;
-}
-static void init_qexp(void){
-int i;
-double v=128;
-for(i=0; i<QROOT; i++){
-qexp[i]= lrintf(v);
-v *= pow(2, 1.0 / QROOT);
-}
-}
-static av_cold int common_init(AVCodecContext *avctx){
-SnowContext *s = avctx->priv_data;
-int width, height;
-int i, j;
-s->avctx= avctx;
-s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
-dsputil_init(&s->dsp, avctx);
-#define mcf(dx,dy)\
-s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
-s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
-s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
-s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
-s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
-s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
-mcf( 0, 0)
-mcf( 4, 0)
-mcf( 8, 0)
-mcf(12, 0)
-mcf( 0, 4)
-mcf( 4, 4)
-mcf( 8, 4)
-mcf(12, 4)
-mcf( 0, 8)
-mcf( 4, 8)
-mcf( 8, 8)
-mcf(12, 8)
-mcf( 0,12)
-mcf( 4,12)
-mcf( 8,12)
-mcf(12,12)
-#define mcfh(dx,dy)\
-s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
-s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
-mc_block_hpel ## dx ## dy ## 16;\
-s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
-s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
-mc_block_hpel ## dx ## dy ## 8;
-mcfh(0, 0)
-mcfh(8, 0)
-mcfh(0, 8)
-mcfh(8, 8)
-if(!qexp[0])
-init_qexp();
-//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
-width= s->avctx->width;
-height= s->avctx->height;
-s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
-s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
-for(i=0; i<MAX_REF_FRAMES; i++)
-for(j=0; j<MAX_REF_FRAMES; j++)
-scale_mv_ref[i][j] = 256*(i+1)/(j+1);
-s->avctx->get_buffer(s->avctx, &s->mconly_picture);
-s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
-return 0;
-}
-static int common_init_after_header(AVCodecContext *avctx){
-SnowContext *s = avctx->priv_data;
-int plane_index, level, orientation;
-for(plane_index=0; plane_index<3; plane_index++){
-int w= s->avctx->width;
-int h= s->avctx->height;
-if(plane_index){
-w>>= s->chroma_h_shift;
-h>>= s->chroma_v_shift;
-}
-s->plane[plane_index].width = w;
-s->plane[plane_index].height= h;
-for(level=s->spatial_decomposition_count-1; level>=0; level--){
-for(orientation=level ? 1 : 0; orientation<4; orientation++){
-SubBand *b= &s->plane[plane_index].band[level][orientation];
-b->buf= s->spatial_dwt_buffer;
-b->level= level;
-b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
-b->width = (w + !(orientation&1))>>1;
-b->height= (h + !(orientation>1))>>1;
-b->stride_line = 1 << (s->spatial_decomposition_count - level);
-b->buf_x_offset = 0;
-b->buf_y_offset = 0;
-if(orientation&1){
-b->buf += (w+1)>>1;
-b->buf_x_offset = (w+1)>>1;
-}
-if(orientation>1){
-b->buf += b->stride>>1;
-b->buf_y_offset = b->stride_line >> 1;
-}
-b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
-if(level)
-b->parent= &s->plane[plane_index].band[level-1][orientation];
-//FIXME avoid this realloc
-av_freep(&b->x_coeff);
-b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
-}
-w= (w+1)>>1;
-h= (h+1)>>1;
-}
-}
-return 0;
 }
 static int qscale2qlog(int qscale){
 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
 + 61*QROOT/8; //<64 >60
 }
 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
 }
 }
-}
-#define QUANTIZE2 0
-#if QUANTIZE2==1
-#define Q2_STEP 8
-static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
-SubBand *b= &p->band[level][orientation];
-int x, y;
-int xo=0;
-int yo=0;
-int step= 1 << (s->spatial_decomposition_count - level);
-if(orientation&1)
-xo= step>>1;
-if(orientation&2)
-yo= step>>1;
-//FIXME bias for nonzero ?
-//FIXME optimize
-memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
-for(y=0; y<p->height; y++){
-for(x=0; x<p->width; x++){
-int sx= (x-xo + step/2) / step / Q2_STEP;
-int sy= (y-yo + step/2) / step / Q2_STEP;
-int v= r0[x + y*p->width] - r1[x + y*p->width];
-assert(sx>=0 && sy>=0 && sx < score_stride);
-v= ((v+8)>>4)<<4;
-score[sx + sy*score_stride] += v*v;
-assert(score[sx + sy*score_stride] >= 0);
-}
-}
-}
-static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
-int level, orientation;
-for(level=0; level<s->spatial_decomposition_count; level++){
-for(orientation=level ? 1 : 0; orientation<4; orientation++){
-SubBand *b= &p->band[level][orientation];
-IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
-dequantize(s, b, dst, b->stride);
-}
-}
-}
-static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
-int level, orientation, ys, xs, x, y, pass;
-IDWTELEM best_dequant[height * stride];
-IDWTELEM idwt2_buffer[height * stride];
-const int score_stride= (width + 10)/Q2_STEP;
-int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
-int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
-int threshold= (s->m.lambda * s->m.lambda) >> 6;
-//FIXME pass the copy cleanly ?
-//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
-ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
-for(level=0; level<s->spatial_decomposition_count; level++){
-for(orientation=level ? 1 : 0; orientation<4; orientation++){
-SubBand *b= &p->band[level][orientation];
-IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
-DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
-assert(src == b->buf); // code does not depend on this but it is true currently
-quantize(s, b, dst, src, b->stride, s->qbias);
-}
-}
-for(pass=0; pass<1; pass++){
-if(s->qbias == 0) //keyframe
-continue;
-for(level=0; level<s->spatial_decomposition_count; level++){
-for(orientation=level ? 1 : 0; orientation<4; orientation++){
-SubBand *b= &p->band[level][orientation];
-IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
-IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
-for(ys= 0; ys<Q2_STEP; ys++){
-for(xs= 0; xs<Q2_STEP; xs++){
-memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
-dequantize_all(s, p, idwt2_buffer, width, height);
-ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
-find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
-memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
-for(y=ys; y<b->height; y+= Q2_STEP){
-for(x=xs; x<b->width; x+= Q2_STEP){
-if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
-if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
-//FIXME try more than just --
-}
-}
-dequantize_all(s, p, idwt2_buffer, width, height);
-ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
-find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
-for(y=ys; y<b->height; y+= Q2_STEP){
-for(x=xs; x<b->width; x+= Q2_STEP){
-int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
-if(score[score_idx] <= best_score[score_idx] + threshold){
-best_score[score_idx]= score[score_idx];
-if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
-if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
-//FIXME copy instead
-}
-}
-}
-}
-}
-}
-}
-}
-memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
-}
-#endif /* QUANTIZE2==1 */
-static av_cold int encode_init(AVCodecContext *avctx)
-{
-SnowContext *s = avctx->priv_data;
-int plane_index;
-if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
-av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
-"Use vstrict=-2 / -strict -2 to use it anyway.\n");
-return -1;
-}
-if(avctx->prediction_method == DWT_97
-&& (avctx->flags & CODEC_FLAG_QSCALE)
-&& avctx->global_quality == 0){
-av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
-return -1;
-}
-s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
-s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
-s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
-for(plane_index=0; plane_index<3; plane_index++){
-s->plane[plane_index].diag_mc= 1;
-s->plane[plane_index].htaps= 6;
-s->plane[plane_index].hcoeff[0]=  40;
-s->plane[plane_index].hcoeff[1]= -10;
-s->plane[plane_index].hcoeff[2]=   2;
-s->plane[plane_index].fast_mc= 1;
-}
-common_init(avctx);
-alloc_blocks(s);
-s->version=0;
-s->m.avctx   = avctx;
-s->m.flags   = avctx->flags;
-s->m.bit_rate= avctx->bit_rate;
-s->m.me.temp      =
-s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
-s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
-s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
-s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
-h263_encode_init(&s->m); //mv_penalty
-s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
-if(avctx->flags&CODEC_FLAG_PASS1){
-if(!avctx->stats_out)
-avctx->stats_out = av_mallocz(256);
-}
-if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
-if(ff_rate_control_init(&s->m) < 0)
-return -1;
-}
-s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
-avctx->coded_frame= &s->current_picture;
-switch(avctx->pix_fmt){
-//    case PIX_FMT_YUV444P:
-//    case PIX_FMT_YUV422P:
-case PIX_FMT_YUV420P:
-case PIX_FMT_GRAY8:
-//    case PIX_FMT_YUV411P:
-//    case PIX_FMT_YUV410P:
-s->colorspace_type= 0;
-break;
-/*    case PIX_FMT_RGB32:
-s->colorspace= 1;
-break;*/
-default:
-av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
-return -1;
-}
-//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
-s->chroma_h_shift= 1;
-s->chroma_v_shift= 1;
-ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
-ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
-s->avctx->get_buffer(s->avctx, &s->input_picture);
-if(s->avctx->me_method == ME_ITER){
-int i;
-int size= s->b_width * s->b_height << 2*s->block_max_depth;
-for(i=0; i<s->max_ref_frames; i++){
-s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
-s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
-}
-}
-return 0;
-}
-#define USE_HALFPEL_PLANE 0
-static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
-int p,x,y;
-assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
-for(p=0; p<3; p++){
-int is_chroma= !!p;
-int w= s->avctx->width  >>is_chroma;
-int h= s->avctx->height >>is_chroma;
-int ls= frame->linesize[p];
-uint8_t *src= frame->data[p];
-halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
-halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
-halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
-halfpel[0][p]= src;
-for(y=0; y<h; y++){
-for(x=0; x<w; x++){
-int i= y*ls + x;
-halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
-}
-}
-for(y=0; y<h; y++){
-for(x=0; x<w; x++){
-int i= y*ls + x;
-halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
-}
-}
-src= halfpel[1][p];
-for(y=0; y<h; y++){
-for(x=0; x<w; x++){
-int i= y*ls + x;
-halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
-}
-}
-//FIXME border!
-}
-}
-static void release_buffer(AVCodecContext *avctx){
-SnowContext *s = avctx->priv_data;
-int i;
-if(s->last_picture[s->max_ref_frames-1].data[0]){
-avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
-for(i=0; i<9; i++)
-if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
-av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
-}
-}
-static int frame_start(SnowContext *s){
-AVFrame tmp;
-int w= s->avctx->width; //FIXME round up to x16 ?
-int h= s->avctx->height;
-if(s->current_picture.data[0]){
-s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
-s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
-s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
-}
-release_buffer(s->avctx);
-tmp= s->last_picture[s->max_ref_frames-1];
-memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
-memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
-if(USE_HALFPEL_PLANE && s->current_picture.data[0])
-halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
-s->last_picture[0]= s->current_picture;
-s->current_picture= tmp;
-if(s->keyframe){
-s->ref_frames= 0;
-}else{
-int i;
-for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
-if(i && s->last_picture[i-1].key_frame)
-break;
-s->ref_frames= i;
-if(s->ref_frames==0){
-av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
-return -1;
-}
-}
-s->current_picture.reference= 1;
-if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
-av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-return -1;
-}
-s->current_picture.key_frame= s->keyframe;
-return 0;
 }
 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
 SnowContext *s = avctx->priv_data;
 RangeCoder * const c= &s->c;
 emms_c();
 return ff_rac_terminate(c);
 }
-static av_cold void common_end(SnowContext *s){
-int plane_index, level, orientation, i;
-av_freep(&s->spatial_dwt_buffer);
-av_freep(&s->spatial_idwt_buffer);
-s->m.me.temp= NULL;
-av_freep(&s->m.me.scratchpad);
-av_freep(&s->m.me.map);
-av_freep(&s->m.me.score_map);
-av_freep(&s->m.obmc_scratchpad);
-av_freep(&s->block);
-av_freep(&s->scratchbuf);
-for(i=0; i<MAX_REF_FRAMES; i++){
-av_freep(&s->ref_mvs[i]);
-av_freep(&s->ref_scores[i]);
-if(s->last_picture[i].data[0])
-s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
-}
-for(plane_index=0; plane_index<3; plane_index++){
-for(level=s->spatial_decomposition_count-1; level>=0; level--){
-for(orientation=level ? 1 : 0; orientation<4; orientation++){
-SubBand *b= &s->plane[plane_index].band[level][orientation];
-av_freep(&b->x_coeff);
-}
-}
-}
-}
 static av_cold int encode_end(AVCodecContext *avctx)
 {
 SnowContext *s = avctx->priv_data;
 common_end(s);
 av_free(avctx->stats_out);
 return 0;
 }
-static av_cold int decode_init(AVCodecContext *avctx)
-{
-avctx->pix_fmt= PIX_FMT_YUV420P;
-common_init(avctx);
-return 0;
-}
-static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
-const uint8_t *buf = avpkt->data;
-int buf_size = avpkt->size;
-SnowContext *s = avctx->priv_data;
-RangeCoder * const c= &s->c;
-int bytes_read;
-AVFrame *picture = data;
-int level, orientation, plane_index;
-ff_init_range_decoder(c, buf, buf_size);
-ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
-s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
-if(decode_header(s)<0)
-return -1;
-common_init_after_header(avctx);
-// realloc slice buffer for the case that spatial_decomposition_count changed
-slice_buffer_destroy(&s->sb);
-slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
-for(plane_index=0; plane_index<3; plane_index++){
-Plane *p= &s->plane[plane_index];
-p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
-&& p->hcoeff[1]==-10
-&& p->hcoeff[2]==2;
-}
-alloc_blocks(s);
-if(frame_start(s) < 0)
-return -1;
-//keyframe flag duplication mess FIXME
-if(avctx->debug&FF_DEBUG_PICT_INFO)
-av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
-decode_blocks(s);
-for(plane_index=0; plane_index<3; plane_index++){
-Plane *p= &s->plane[plane_index];
-int w= p->width;
-int h= p->height;
-int x, y;
-int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
-if(s->avctx->debug&2048){
-memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
-predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
-for(y=0; y<h; y++){
-for(x=0; x<w; x++){
-int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
-s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
-}
-}
-}
-{
-for(level=0; level<s->spatial_decomposition_count; level++){
-for(orientation=level ? 1 : 0; orientation<4; orientation++){
-SubBand *b= &p->band[level][orientation];
-unpack_coeffs(s, b, b->parent, orientation);
-}
-}
-}
-{
-const int mb_h= s->b_height << s->block_max_depth;
-const int block_size = MB_SIZE >> s->block_max_depth;
-const int block_w    = plane_index ? block_size/2 : block_size;
-int mb_y;
-DWTCompose cs[MAX_DECOMPOSITIONS];
-int yd=0, yq=0;
-int y;
-int end_y;
-ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
-for(mb_y=0; mb_y<=mb_h; mb_y++){
-int slice_starty = block_w*mb_y;
-int slice_h = block_w*(mb_y+1);
-if (!(s->keyframe || s->avctx->debug&512)){
-slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
-slice_h -= (block_w >> 1);
-}
-for(level=0; level<s->spatial_decomposition_count; level++){
-for(orientation=level ? 1 : 0; orientation<4; orientation++){
-SubBand *b= &p->band[level][orientation];
-int start_y;
-int end_y;
-int our_mb_start = mb_y;
-int our_mb_end = (mb_y + 1);
-const int extra= 3;
-start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
-end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
-if (!(s->keyframe || s->avctx->debug&512)){
-start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
-end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
-}
-start_y = FFMIN(b->height, start_y);
-end_y = FFMIN(b->height, end_y);
-if (start_y != end_y){
-if (orientation == 0){
-SubBand * correlate_band = &p->band[0][0];
-int correlate_end_y = FFMIN(b->height, end_y + 1);
-int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
-decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
-correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
-dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
-}
-else
-decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
-}
-}
-}
-for(; yd<slice_h; yd+=4){
-ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
-}
-if(s->qlog == LOSSLESS_QLOG){
-for(; yq<slice_h && yq<h; yq++){
-IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
-for(x=0; x<w; x++){
-line[x] <<= FRAC_BITS;
-}
-}
-}
-predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
-y = FFMIN(p->height, slice_starty);
-end_y = FFMIN(p->height, slice_h);
-while(y < end_y)
-slice_buffer_release(&s->sb, y++);
-}
-slice_buffer_flush(&s->sb);
-}
-}
-emms_c();
-release_buffer(avctx);
-if(!(s->avctx->debug&2048))
-*picture= s->current_picture;
-else
-*picture= s->mconly_picture;
-*data_size = sizeof(AVFrame);
-bytes_read= c->bytestream - c->bytestream_start;
-if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
-return bytes_read;
-}
-static av_cold int decode_end(AVCodecContext *avctx)
-{
-SnowContext *s = avctx->priv_data;
-slice_buffer_destroy(&s->sb);
-common_end(s);
-return 0;
-}
-AVCodec snow_decoder = {
-"snow",
-CODEC_TYPE_VIDEO,
-CODEC_ID_SNOW,
-sizeof(SnowContext),
-decode_init,
-NULL,
-decode_end,
-decode_frame,
-CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
-NULL,
-.long_name = NULL_IF_CONFIG_SMALL("Snow"),
-};
-#if CONFIG_SNOW_ENCODER
 AVCodec snow_encoder = {
 "snow",
 CODEC_TYPE_VIDEO,
 CODEC_ID_SNOW,
 sizeof(SnowContext),

Mercurial > libavcodec.hg

comparison snow.c @ 10188:404026d9adb5 libavcodec