comparison snow.c @ 10188:404026d9adb5 libavcodec

Reorder functions so that encoding functions are disabled by the preprocessor. Fixes compilation with disabled optimizations and enabled Snow decoder.
author diego
date Fri, 18 Sep 2009 19:45:09 +0000
parents b911dbff811c
children 328e2a3171d2
comparison
equal deleted inserted replaced
10187:b14d646fe719 10188:404026d9adb5
502 } DWTCompose; 502 } DWTCompose;
503 503
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) 504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) 505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
506 506
507 static void iterative_me(SnowContext *s);
508
509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) 507 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
510 { 508 {
511 int i; 509 int i;
512 510
513 buf->base_buffer = base_buffer; 511 buf->base_buffer = base_buffer;
1277 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; 1275 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1278 } 1276 }
1279 } 1277 }
1280 } 1278 }
1281 1279
1282 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1283 int level;
1284 for(level=decomposition_count-1; level>=0; level--){
1285 switch(type){
1286 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1287 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1288 }
1289 }
1290 }
1291
1292 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1293 const int support = type==1 ? 3 : 5;
1294 int level;
1295 if(type==2) return;
1296
1297 for(level=decomposition_count-1; level>=0; level--){
1298 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1299 switch(type){
1300 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1301 break;
1302 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1303 break;
1304 }
1305 }
1306 }
1307 }
1308
1309 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ 1280 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1310 const int support = type==1 ? 3 : 5; 1281 const int support = type==1 ? 3 : 5;
1311 int level; 1282 int level;
1312 if(type==2) return; 1283 if(type==2) return;
1313 1284
1319 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); 1290 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1320 break; 1291 break;
1321 } 1292 }
1322 } 1293 }
1323 } 1294 }
1324 }
1325
1326 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1327 DWTCompose cs[MAX_DECOMPOSITIONS];
1328 int y;
1329 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1330 for(y=0; y<height; y+=4)
1331 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1332 }
1333
1334 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1335 const int w= b->width;
1336 const int h= b->height;
1337 int x, y;
1338
1339 if(1){
1340 int run=0;
1341 int runs[w*h];
1342 int run_index=0;
1343 int max_index;
1344
1345 for(y=0; y<h; y++){
1346 for(x=0; x<w; x++){
1347 int v, p=0;
1348 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1349 v= src[x + y*stride];
1350
1351 if(y){
1352 t= src[x + (y-1)*stride];
1353 if(x){
1354 lt= src[x - 1 + (y-1)*stride];
1355 }
1356 if(x + 1 < w){
1357 rt= src[x + 1 + (y-1)*stride];
1358 }
1359 }
1360 if(x){
1361 l= src[x - 1 + y*stride];
1362 /*if(x > 1){
1363 if(orientation==1) ll= src[y + (x-2)*stride];
1364 else ll= src[x - 2 + y*stride];
1365 }*/
1366 }
1367 if(parent){
1368 int px= x>>1;
1369 int py= y>>1;
1370 if(px<b->parent->width && py<b->parent->height)
1371 p= parent[px + py*2*stride];
1372 }
1373 if(!(/*ll|*/l|lt|t|rt|p)){
1374 if(v){
1375 runs[run_index++]= run;
1376 run=0;
1377 }else{
1378 run++;
1379 }
1380 }
1381 }
1382 }
1383 max_index= run_index;
1384 runs[run_index++]= run;
1385 run_index=0;
1386 run= runs[run_index++];
1387
1388 put_symbol2(&s->c, b->state[30], max_index, 0);
1389 if(run_index <= max_index)
1390 put_symbol2(&s->c, b->state[1], run, 3);
1391
1392 for(y=0; y<h; y++){
1393 if(s->c.bytestream_end - s->c.bytestream < w*40){
1394 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1395 return -1;
1396 }
1397 for(x=0; x<w; x++){
1398 int v, p=0;
1399 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1400 v= src[x + y*stride];
1401
1402 if(y){
1403 t= src[x + (y-1)*stride];
1404 if(x){
1405 lt= src[x - 1 + (y-1)*stride];
1406 }
1407 if(x + 1 < w){
1408 rt= src[x + 1 + (y-1)*stride];
1409 }
1410 }
1411 if(x){
1412 l= src[x - 1 + y*stride];
1413 /*if(x > 1){
1414 if(orientation==1) ll= src[y + (x-2)*stride];
1415 else ll= src[x - 2 + y*stride];
1416 }*/
1417 }
1418 if(parent){
1419 int px= x>>1;
1420 int py= y>>1;
1421 if(px<b->parent->width && py<b->parent->height)
1422 p= parent[px + py*2*stride];
1423 }
1424 if(/*ll|*/l|lt|t|rt|p){
1425 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1426
1427 put_rac(&s->c, &b->state[0][context], !!v);
1428 }else{
1429 if(!run){
1430 run= runs[run_index++];
1431
1432 if(run_index <= max_index)
1433 put_symbol2(&s->c, b->state[1], run, 3);
1434 assert(v);
1435 }else{
1436 run--;
1437 assert(!v);
1438 }
1439 }
1440 if(v){
1441 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1442 int l2= 2*FFABS(l) + (l<0);
1443 int t2= 2*FFABS(t) + (t<0);
1444
1445 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1446 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1447 }
1448 }
1449 }
1450 }
1451 return 0;
1452 }
1453
1454 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1455 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1456 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1457 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1458 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1459 } 1295 }
1460 1296
1461 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ 1297 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1462 const int w= b->width; 1298 const int w= b->width;
1463 const int h= b->height; 1299 const int h= b->height;
1631 *d= *s; 1467 *d= *s;
1632 d->bytestream= bytestream; 1468 d->bytestream= bytestream;
1633 d->bytestream_start= bytestream_start; 1469 d->bytestream_start= bytestream_start;
1634 } 1470 }
1635 1471
1636 //near copy & paste from dsputil, FIXME
1637 static int pix_sum(uint8_t * pix, int line_size, int w)
1638 {
1639 int s, i, j;
1640
1641 s = 0;
1642 for (i = 0; i < w; i++) {
1643 for (j = 0; j < w; j++) {
1644 s += pix[0];
1645 pix ++;
1646 }
1647 pix += line_size - w;
1648 }
1649 return s;
1650 }
1651
1652 //near copy & paste from dsputil, FIXME
1653 static int pix_norm1(uint8_t * pix, int line_size, int w)
1654 {
1655 int s, i, j;
1656 uint32_t *sq = ff_squareTbl + 256;
1657
1658 s = 0;
1659 for (i = 0; i < w; i++) {
1660 for (j = 0; j < w; j ++) {
1661 s += sq[pix[0]];
1662 pix ++;
1663 }
1664 pix += line_size - w;
1665 }
1666 return s;
1667 }
1668
1669 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ 1472 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1670 const int w= s->b_width << s->block_max_depth; 1473 const int w= s->b_width << s->block_max_depth;
1671 const int rem_depth= s->block_max_depth - level; 1474 const int rem_depth= s->block_max_depth - level;
1672 const int index= (x + y*w) << rem_depth; 1475 const int index= (x + y*w) << rem_depth;
1673 const int block_w= 1<<rem_depth; 1476 const int block_w= 1<<rem_depth;
1716 (tr ->mx * scale[tr ->ref] + 128) >>8); 1519 (tr ->mx * scale[tr ->ref] + 128) >>8);
1717 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, 1520 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1718 (top ->my * scale[top ->ref] + 128) >>8, 1521 (top ->my * scale[top ->ref] + 128) >>8,
1719 (tr ->my * scale[tr ->ref] + 128) >>8); 1522 (tr ->my * scale[tr ->ref] + 128) >>8);
1720 } 1523 }
1524 }
1525
1526 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1527 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1528 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1529 }else{
1530 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1531 }
1532 }
1533
1534 static void decode_q_branch(SnowContext *s, int level, int x, int y){
1535 const int w= s->b_width << s->block_max_depth;
1536 const int rem_depth= s->block_max_depth - level;
1537 const int index= (x + y*w) << rem_depth;
1538 int trx= (x+1)<<rem_depth;
1539 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1540 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1541 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1542 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1543 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1544
1545 if(s->keyframe){
1546 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
1547 return;
1548 }
1549
1550 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1551 int type, mx, my;
1552 int l = left->color[0];
1553 int cb= left->color[1];
1554 int cr= left->color[2];
1555 int ref = 0;
1556 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1557 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
1558 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
1559
1560 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1561
1562 if(type){
1563 pred_mv(s, &mx, &my, 0, left, top, tr);
1564 l += get_symbol(&s->c, &s->block_state[32], 1);
1565 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1566 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1567 }else{
1568 if(s->ref_frames > 1)
1569 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
1570 pred_mv(s, &mx, &my, ref, left, top, tr);
1571 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
1572 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
1573 }
1574 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
1575 }else{
1576 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1577 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1578 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1579 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1580 }
1581 }
1582
1583 static void decode_blocks(SnowContext *s){
1584 int x, y;
1585 int w= s->b_width;
1586 int h= s->b_height;
1587
1588 for(y=0; y<h; y++){
1589 for(x=0; x<w; x++){
1590 decode_q_branch(s, 0, x, y);
1591 }
1592 }
1593 }
1594
1595 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1596 static const uint8_t weight[64]={
1597 8,7,6,5,4,3,2,1,
1598 7,7,0,0,0,0,0,1,
1599 6,0,6,0,0,0,2,0,
1600 5,0,0,5,0,3,0,0,
1601 4,0,0,0,4,0,0,0,
1602 3,0,0,5,0,3,0,0,
1603 2,0,6,0,0,0,2,0,
1604 1,7,0,0,0,0,0,1,
1605 };
1606
1607 static const uint8_t brane[256]={
1608 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
1609 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
1610 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
1611 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
1612 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
1613 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
1614 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
1615 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
1616 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
1617 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
1618 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
1619 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
1620 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
1621 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
1622 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
1623 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
1624 };
1625
1626 static const uint8_t needs[16]={
1627 0,1,0,0,
1628 2,4,2,0,
1629 0,1,0,0,
1630 15
1631 };
1632
1633 int x, y, b, r, l;
1634 int16_t tmpIt [64*(32+HTAPS_MAX)];
1635 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
1636 int16_t *tmpI= tmpIt;
1637 uint8_t *tmp2= tmp2t[0];
1638 const uint8_t *hpel[11];
1639 assert(dx<16 && dy<16);
1640 r= brane[dx + 16*dy]&15;
1641 l= brane[dx + 16*dy]>>4;
1642
1643 b= needs[l] | needs[r];
1644 if(p && !p->diag_mc)
1645 b= 15;
1646
1647 if(b&5){
1648 for(y=0; y < b_h+HTAPS_MAX-1; y++){
1649 for(x=0; x < b_w; x++){
1650 int a_1=src[x + HTAPS_MAX/2-4];
1651 int a0= src[x + HTAPS_MAX/2-3];
1652 int a1= src[x + HTAPS_MAX/2-2];
1653 int a2= src[x + HTAPS_MAX/2-1];
1654 int a3= src[x + HTAPS_MAX/2+0];
1655 int a4= src[x + HTAPS_MAX/2+1];
1656 int a5= src[x + HTAPS_MAX/2+2];
1657 int a6= src[x + HTAPS_MAX/2+3];
1658 int am=0;
1659 if(!p || p->fast_mc){
1660 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1661 tmpI[x]= am;
1662 am= (am+16)>>5;
1663 }else{
1664 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
1665 tmpI[x]= am;
1666 am= (am+32)>>6;
1667 }
1668
1669 if(am&(~255)) am= ~(am>>31);
1670 tmp2[x]= am;
1671 }
1672 tmpI+= 64;
1673 tmp2+= stride;
1674 src += stride;
1675 }
1676 src -= stride*y;
1677 }
1678 src += HTAPS_MAX/2 - 1;
1679 tmp2= tmp2t[1];
1680
1681 if(b&2){
1682 for(y=0; y < b_h; y++){
1683 for(x=0; x < b_w+1; x++){
1684 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1685 int a0= src[x + (HTAPS_MAX/2-3)*stride];
1686 int a1= src[x + (HTAPS_MAX/2-2)*stride];
1687 int a2= src[x + (HTAPS_MAX/2-1)*stride];
1688 int a3= src[x + (HTAPS_MAX/2+0)*stride];
1689 int a4= src[x + (HTAPS_MAX/2+1)*stride];
1690 int a5= src[x + (HTAPS_MAX/2+2)*stride];
1691 int a6= src[x + (HTAPS_MAX/2+3)*stride];
1692 int am=0;
1693 if(!p || p->fast_mc)
1694 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1695 else
1696 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1697
1698 if(am&(~255)) am= ~(am>>31);
1699 tmp2[x]= am;
1700 }
1701 src += stride;
1702 tmp2+= stride;
1703 }
1704 src -= stride*y;
1705 }
1706 src += stride*(HTAPS_MAX/2 - 1);
1707 tmp2= tmp2t[2];
1708 tmpI= tmpIt;
1709 if(b&4){
1710 for(y=0; y < b_h; y++){
1711 for(x=0; x < b_w; x++){
1712 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1713 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1714 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1715 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1716 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1717 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1718 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1719 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1720 int am=0;
1721 if(!p || p->fast_mc)
1722 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1723 else
1724 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1725 if(am&(~255)) am= ~(am>>31);
1726 tmp2[x]= am;
1727 }
1728 tmpI+= 64;
1729 tmp2+= stride;
1730 }
1731 }
1732
1733 hpel[ 0]= src;
1734 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1735 hpel[ 2]= src + 1;
1736
1737 hpel[ 4]= tmp2t[1];
1738 hpel[ 5]= tmp2t[2];
1739 hpel[ 6]= tmp2t[1] + 1;
1740
1741 hpel[ 8]= src + stride;
1742 hpel[ 9]= hpel[1] + stride;
1743 hpel[10]= hpel[8] + 1;
1744
1745 if(b==15){
1746 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
1747 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1748 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1749 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1750 dx&=7;
1751 dy&=7;
1752 for(y=0; y < b_h; y++){
1753 for(x=0; x < b_w; x++){
1754 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1755 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
1756 }
1757 src1+=stride;
1758 src2+=stride;
1759 src3+=stride;
1760 src4+=stride;
1761 dst +=stride;
1762 }
1763 }else{
1764 const uint8_t *src1= hpel[l];
1765 const uint8_t *src2= hpel[r];
1766 int a= weight[((dx&7) + (8*(dy&7)))];
1767 int b= 8-a;
1768 for(y=0; y < b_h; y++){
1769 for(x=0; x < b_w; x++){
1770 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1771 }
1772 src1+=stride;
1773 src2+=stride;
1774 dst +=stride;
1775 }
1776 }
1777 }
1778
1779 #define mca(dx,dy,b_w)\
1780 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1781 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
1782 assert(h==b_w);\
1783 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
1784 }
1785
1786 mca( 0, 0,16)
1787 mca( 8, 0,16)
1788 mca( 0, 8,16)
1789 mca( 8, 8,16)
1790 mca( 0, 0,8)
1791 mca( 8, 0,8)
1792 mca( 0, 8,8)
1793 mca( 8, 8,8)
1794
1795 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1796 if(block->type & BLOCK_INTRA){
1797 int x, y;
1798 const int color = block->color[plane_index];
1799 const int color4= color*0x01010101;
1800 if(b_w==32){
1801 for(y=0; y < b_h; y++){
1802 *(uint32_t*)&dst[0 + y*stride]= color4;
1803 *(uint32_t*)&dst[4 + y*stride]= color4;
1804 *(uint32_t*)&dst[8 + y*stride]= color4;
1805 *(uint32_t*)&dst[12+ y*stride]= color4;
1806 *(uint32_t*)&dst[16+ y*stride]= color4;
1807 *(uint32_t*)&dst[20+ y*stride]= color4;
1808 *(uint32_t*)&dst[24+ y*stride]= color4;
1809 *(uint32_t*)&dst[28+ y*stride]= color4;
1810 }
1811 }else if(b_w==16){
1812 for(y=0; y < b_h; y++){
1813 *(uint32_t*)&dst[0 + y*stride]= color4;
1814 *(uint32_t*)&dst[4 + y*stride]= color4;
1815 *(uint32_t*)&dst[8 + y*stride]= color4;
1816 *(uint32_t*)&dst[12+ y*stride]= color4;
1817 }
1818 }else if(b_w==8){
1819 for(y=0; y < b_h; y++){
1820 *(uint32_t*)&dst[0 + y*stride]= color4;
1821 *(uint32_t*)&dst[4 + y*stride]= color4;
1822 }
1823 }else if(b_w==4){
1824 for(y=0; y < b_h; y++){
1825 *(uint32_t*)&dst[0 + y*stride]= color4;
1826 }
1827 }else{
1828 for(y=0; y < b_h; y++){
1829 for(x=0; x < b_w; x++){
1830 dst[x + y*stride]= color;
1831 }
1832 }
1833 }
1834 }else{
1835 uint8_t *src= s->last_picture[block->ref].data[plane_index];
1836 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
1837 int mx= block->mx*scale;
1838 int my= block->my*scale;
1839 const int dx= mx&15;
1840 const int dy= my&15;
1841 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1842 sx += (mx>>4) - (HTAPS_MAX/2-1);
1843 sy += (my>>4) - (HTAPS_MAX/2-1);
1844 src += sx + sy*stride;
1845 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1846 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1847 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1848 src= tmp + MB_SIZE;
1849 }
1850 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1851 // assert(!(b_w&(b_w-1)));
1852 assert(b_w>1 && b_h>1);
1853 assert((tab_index>=0 && tab_index<4) || b_w==32);
1854 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1855 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1856 else if(b_w==32){
1857 int y;
1858 for(y=0; y<b_h; y+=16){
1859 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1860 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1861 }
1862 }else if(b_w==b_h)
1863 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1864 else if(b_w==2*b_h){
1865 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
1866 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1867 }else{
1868 assert(2*b_w==b_h);
1869 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
1870 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1871 }
1872 }
1873 }
1874
1875 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1876 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1877 int y, x;
1878 IDWTELEM * dst;
1879 for(y=0; y<b_h; y++){
1880 //FIXME ugly misuse of obmc_stride
1881 const uint8_t *obmc1= obmc + y*obmc_stride;
1882 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1883 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1884 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1885 dst = slice_buffer_get_line(sb, src_y + y);
1886 for(x=0; x<b_w; x++){
1887 int v= obmc1[x] * block[3][x + y*src_stride]
1888 +obmc2[x] * block[2][x + y*src_stride]
1889 +obmc3[x] * block[1][x + y*src_stride]
1890 +obmc4[x] * block[0][x + y*src_stride];
1891
1892 v <<= 8 - LOG2_OBMC_MAX;
1893 if(FRAC_BITS != 8){
1894 v >>= 8 - FRAC_BITS;
1895 }
1896 if(add){
1897 v += dst[x + src_x];
1898 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1899 if(v&(~255)) v= ~(v>>31);
1900 dst8[x + y*src_stride] = v;
1901 }else{
1902 dst[x + src_x] -= v;
1903 }
1904 }
1905 }
1906 }
1907
1908 //FIXME name cleanup (b_w, block_w, b_width stuff)
1909 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1910 const int b_width = s->b_width << s->block_max_depth;
1911 const int b_height= s->b_height << s->block_max_depth;
1912 const int b_stride= b_width;
1913 BlockNode *lt= &s->block[b_x + b_y*b_stride];
1914 BlockNode *rt= lt+1;
1915 BlockNode *lb= lt+b_stride;
1916 BlockNode *rb= lb+1;
1917 uint8_t *block[4];
1918 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
1919 uint8_t *tmp = s->scratchbuf;
1920 uint8_t *ptmp;
1921 int x,y;
1922
1923 if(b_x<0){
1924 lt= rt;
1925 lb= rb;
1926 }else if(b_x + 1 >= b_width){
1927 rt= lt;
1928 rb= lb;
1929 }
1930 if(b_y<0){
1931 lt= lb;
1932 rt= rb;
1933 }else if(b_y + 1 >= b_height){
1934 lb= lt;
1935 rb= rt;
1936 }
1937
1938 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
1939 obmc -= src_x;
1940 b_w += src_x;
1941 if(!sliced && !offset_dst)
1942 dst -= src_x;
1943 src_x=0;
1944 }else if(src_x + b_w > w){
1945 b_w = w - src_x;
1946 }
1947 if(src_y<0){
1948 obmc -= src_y*obmc_stride;
1949 b_h += src_y;
1950 if(!sliced && !offset_dst)
1951 dst -= src_y*dst_stride;
1952 src_y=0;
1953 }else if(src_y + b_h> h){
1954 b_h = h - src_y;
1955 }
1956
1957 if(b_w<=0 || b_h<=0) return;
1958
1959 assert(src_stride > 2*MB_SIZE + 5);
1960
1961 if(!sliced && offset_dst)
1962 dst += src_x + src_y*dst_stride;
1963 dst8+= src_x + src_y*src_stride;
1964 // src += src_x + src_y*src_stride;
1965
1966 ptmp= tmp + 3*tmp_step;
1967 block[0]= ptmp;
1968 ptmp+=tmp_step;
1969 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1970
1971 if(same_block(lt, rt)){
1972 block[1]= block[0];
1973 }else{
1974 block[1]= ptmp;
1975 ptmp+=tmp_step;
1976 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1977 }
1978
1979 if(same_block(lt, lb)){
1980 block[2]= block[0];
1981 }else if(same_block(rt, lb)){
1982 block[2]= block[1];
1983 }else{
1984 block[2]= ptmp;
1985 ptmp+=tmp_step;
1986 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1987 }
1988
1989 if(same_block(lt, rb) ){
1990 block[3]= block[0];
1991 }else if(same_block(rt, rb)){
1992 block[3]= block[1];
1993 }else if(same_block(lb, rb)){
1994 block[3]= block[2];
1995 }else{
1996 block[3]= ptmp;
1997 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1998 }
1999 #if 0
2000 for(y=0; y<b_h; y++){
2001 for(x=0; x<b_w; x++){
2002 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2003 if(add) dst[x + y*dst_stride] += v;
2004 else dst[x + y*dst_stride] -= v;
2005 }
2006 }
2007 for(y=0; y<b_h; y++){
2008 uint8_t *obmc2= obmc + (obmc_stride>>1);
2009 for(x=0; x<b_w; x++){
2010 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2011 if(add) dst[x + y*dst_stride] += v;
2012 else dst[x + y*dst_stride] -= v;
2013 }
2014 }
2015 for(y=0; y<b_h; y++){
2016 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2017 for(x=0; x<b_w; x++){
2018 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2019 if(add) dst[x + y*dst_stride] += v;
2020 else dst[x + y*dst_stride] -= v;
2021 }
2022 }
2023 for(y=0; y<b_h; y++){
2024 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2025 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2026 for(x=0; x<b_w; x++){
2027 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2028 if(add) dst[x + y*dst_stride] += v;
2029 else dst[x + y*dst_stride] -= v;
2030 }
2031 }
2032 #else
2033 if(sliced){
2034 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2035 }else{
2036 for(y=0; y<b_h; y++){
2037 //FIXME ugly misuse of obmc_stride
2038 const uint8_t *obmc1= obmc + y*obmc_stride;
2039 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2040 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2041 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2042 for(x=0; x<b_w; x++){
2043 int v= obmc1[x] * block[3][x + y*src_stride]
2044 +obmc2[x] * block[2][x + y*src_stride]
2045 +obmc3[x] * block[1][x + y*src_stride]
2046 +obmc4[x] * block[0][x + y*src_stride];
2047
2048 v <<= 8 - LOG2_OBMC_MAX;
2049 if(FRAC_BITS != 8){
2050 v >>= 8 - FRAC_BITS;
2051 }
2052 if(add){
2053 v += dst[x + y*dst_stride];
2054 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2055 if(v&(~255)) v= ~(v>>31);
2056 dst8[x + y*src_stride] = v;
2057 }else{
2058 dst[x + y*dst_stride] -= v;
2059 }
2060 }
2061 }
2062 }
2063 #endif /* 0 */
2064 }
2065
2066 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2067 Plane *p= &s->plane[plane_index];
2068 const int mb_w= s->b_width << s->block_max_depth;
2069 const int mb_h= s->b_height << s->block_max_depth;
2070 int x, y, mb_x;
2071 int block_size = MB_SIZE >> s->block_max_depth;
2072 int block_w = plane_index ? block_size/2 : block_size;
2073 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2074 int obmc_stride= plane_index ? block_size : 2*block_size;
2075 int ref_stride= s->current_picture.linesize[plane_index];
2076 uint8_t *dst8= s->current_picture.data[plane_index];
2077 int w= p->width;
2078 int h= p->height;
2079
2080 if(s->keyframe || (s->avctx->debug&512)){
2081 if(mb_y==mb_h)
2082 return;
2083
2084 if(add){
2085 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2086 // DWTELEM * line = slice_buffer_get_line(sb, y);
2087 IDWTELEM * line = sb->line[y];
2088 for(x=0; x<w; x++){
2089 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2090 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2091 v >>= FRAC_BITS;
2092 if(v&(~255)) v= ~(v>>31);
2093 dst8[x + y*ref_stride]= v;
2094 }
2095 }
2096 }else{
2097 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2098 // DWTELEM * line = slice_buffer_get_line(sb, y);
2099 IDWTELEM * line = sb->line[y];
2100 for(x=0; x<w; x++){
2101 line[x] -= 128 << FRAC_BITS;
2102 // buf[x + y*w]-= 128<<FRAC_BITS;
2103 }
2104 }
2105 }
2106
2107 return;
2108 }
2109
2110 for(mb_x=0; mb_x<=mb_w; mb_x++){
2111 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2112 block_w*mb_x - block_w/2,
2113 block_w*mb_y - block_w/2,
2114 block_w, block_w,
2115 w, h,
2116 w, ref_stride, obmc_stride,
2117 mb_x - 1, mb_y - 1,
2118 add, 0, plane_index);
2119 }
2120 }
2121
2122 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2123 Plane *p= &s->plane[plane_index];
2124 const int mb_w= s->b_width << s->block_max_depth;
2125 const int mb_h= s->b_height << s->block_max_depth;
2126 int x, y, mb_x;
2127 int block_size = MB_SIZE >> s->block_max_depth;
2128 int block_w = plane_index ? block_size/2 : block_size;
2129 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2130 const int obmc_stride= plane_index ? block_size : 2*block_size;
2131 int ref_stride= s->current_picture.linesize[plane_index];
2132 uint8_t *dst8= s->current_picture.data[plane_index];
2133 int w= p->width;
2134 int h= p->height;
2135
2136 if(s->keyframe || (s->avctx->debug&512)){
2137 if(mb_y==mb_h)
2138 return;
2139
2140 if(add){
2141 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2142 for(x=0; x<w; x++){
2143 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2144 v >>= FRAC_BITS;
2145 if(v&(~255)) v= ~(v>>31);
2146 dst8[x + y*ref_stride]= v;
2147 }
2148 }
2149 }else{
2150 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2151 for(x=0; x<w; x++){
2152 buf[x + y*w]-= 128<<FRAC_BITS;
2153 }
2154 }
2155 }
2156
2157 return;
2158 }
2159
2160 for(mb_x=0; mb_x<=mb_w; mb_x++){
2161 add_yblock(s, 0, NULL, buf, dst8, obmc,
2162 block_w*mb_x - block_w/2,
2163 block_w*mb_y - block_w/2,
2164 block_w, block_w,
2165 w, h,
2166 w, ref_stride, obmc_stride,
2167 mb_x - 1, mb_y - 1,
2168 add, 1, plane_index);
2169 }
2170 }
2171
2172 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2173 const int mb_h= s->b_height << s->block_max_depth;
2174 int mb_y;
2175 for(mb_y=0; mb_y<=mb_h; mb_y++)
2176 predict_slice(s, buf, plane_index, add, mb_y);
2177 }
2178
2179 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
2180 const int w= b->width;
2181 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
2182 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2183 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2184 int x,y;
2185
2186 if(s->qlog == LOSSLESS_QLOG) return;
2187
2188 for(y=start_y; y<end_y; y++){
2189 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2190 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2191 for(x=0; x<w; x++){
2192 int i= line[x];
2193 if(i<0){
2194 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2195 }else if(i>0){
2196 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
2197 }
2198 }
2199 }
2200 }
2201
2202 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
2203 const int w= b->width;
2204 int x,y;
2205
2206 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
2207 IDWTELEM * prev;
2208
2209 if (start_y != 0)
2210 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2211
2212 for(y=start_y; y<end_y; y++){
2213 prev = line;
2214 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2215 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2216 for(x=0; x<w; x++){
2217 if(x){
2218 if(use_median){
2219 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
2220 else line[x] += line[x - 1];
2221 }else{
2222 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
2223 else line[x] += line[x - 1];
2224 }
2225 }else{
2226 if(y) line[x] += prev[x];
2227 }
2228 }
2229 }
2230 }
2231
2232 static void decode_qlogs(SnowContext *s){
2233 int plane_index, level, orientation;
2234
2235 for(plane_index=0; plane_index<3; plane_index++){
2236 for(level=0; level<s->spatial_decomposition_count; level++){
2237 for(orientation=level ? 1:0; orientation<4; orientation++){
2238 int q;
2239 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2240 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2241 else q= get_symbol(&s->c, s->header_state, 1);
2242 s->plane[plane_index].band[level][orientation].qlog= q;
2243 }
2244 }
2245 }
2246 }
2247
2248 #define GET_S(dst, check) \
2249 tmp= get_symbol(&s->c, s->header_state, 0);\
2250 if(!(check)){\
2251 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
2252 return -1;\
2253 }\
2254 dst= tmp;
2255
2256 static int decode_header(SnowContext *s){
2257 int plane_index, tmp;
2258 uint8_t kstate[32];
2259
2260 memset(kstate, MID_STATE, sizeof(kstate));
2261
2262 s->keyframe= get_rac(&s->c, kstate);
2263 if(s->keyframe || s->always_reset){
2264 reset_contexts(s);
2265 s->spatial_decomposition_type=
2266 s->qlog=
2267 s->qbias=
2268 s->mv_scale=
2269 s->block_max_depth= 0;
2270 }
2271 if(s->keyframe){
2272 GET_S(s->version, tmp <= 0U)
2273 s->always_reset= get_rac(&s->c, s->header_state);
2274 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2275 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2276 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
2277 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2278 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2279 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2280 s->spatial_scalability= get_rac(&s->c, s->header_state);
2281 // s->rate_scalability= get_rac(&s->c, s->header_state);
2282 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
2283 s->max_ref_frames++;
2284
2285 decode_qlogs(s);
2286 }
2287
2288 if(!s->keyframe){
2289 if(get_rac(&s->c, s->header_state)){
2290 for(plane_index=0; plane_index<2; plane_index++){
2291 int htaps, i, sum=0;
2292 Plane *p= &s->plane[plane_index];
2293 p->diag_mc= get_rac(&s->c, s->header_state);
2294 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
2295 if((unsigned)htaps > HTAPS_MAX || htaps==0)
2296 return -1;
2297 p->htaps= htaps;
2298 for(i= htaps/2; i; i--){
2299 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
2300 sum += p->hcoeff[i];
2301 }
2302 p->hcoeff[0]= 32-sum;
2303 }
2304 s->plane[2].diag_mc= s->plane[1].diag_mc;
2305 s->plane[2].htaps = s->plane[1].htaps;
2306 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
2307 }
2308 if(get_rac(&s->c, s->header_state)){
2309 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
2310 decode_qlogs(s);
2311 }
2312 }
2313
2314 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
2315 if(s->spatial_decomposition_type > 1U){
2316 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2317 return -1;
2318 }
2319 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
2320 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
2321 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
2322 return -1;
2323 }
2324
2325 s->qlog += get_symbol(&s->c, s->header_state, 1);
2326 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
2327 s->qbias += get_symbol(&s->c, s->header_state, 1);
2328 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
2329 if(s->block_max_depth > 1 || s->block_max_depth < 0){
2330 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
2331 s->block_max_depth= 0;
2332 return -1;
2333 }
2334
2335 return 0;
2336 }
2337
2338 static void init_qexp(void){
2339 int i;
2340 double v=128;
2341
2342 for(i=0; i<QROOT; i++){
2343 qexp[i]= lrintf(v);
2344 v *= pow(2, 1.0 / QROOT);
2345 }
2346 }
2347
2348 static av_cold int common_init(AVCodecContext *avctx){
2349 SnowContext *s = avctx->priv_data;
2350 int width, height;
2351 int i, j;
2352
2353 s->avctx= avctx;
2354 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
2355
2356 dsputil_init(&s->dsp, avctx);
2357
2358 #define mcf(dx,dy)\
2359 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
2360 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2361 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
2362 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
2363 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
2364 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
2365
2366 mcf( 0, 0)
2367 mcf( 4, 0)
2368 mcf( 8, 0)
2369 mcf(12, 0)
2370 mcf( 0, 4)
2371 mcf( 4, 4)
2372 mcf( 8, 4)
2373 mcf(12, 4)
2374 mcf( 0, 8)
2375 mcf( 4, 8)
2376 mcf( 8, 8)
2377 mcf(12, 8)
2378 mcf( 0,12)
2379 mcf( 4,12)
2380 mcf( 8,12)
2381 mcf(12,12)
2382
2383 #define mcfh(dx,dy)\
2384 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
2385 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2386 mc_block_hpel ## dx ## dy ## 16;\
2387 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
2388 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
2389 mc_block_hpel ## dx ## dy ## 8;
2390
2391 mcfh(0, 0)
2392 mcfh(8, 0)
2393 mcfh(0, 8)
2394 mcfh(8, 8)
2395
2396 if(!qexp[0])
2397 init_qexp();
2398
2399 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2400
2401 width= s->avctx->width;
2402 height= s->avctx->height;
2403
2404 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
2405 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
2406
2407 for(i=0; i<MAX_REF_FRAMES; i++)
2408 for(j=0; j<MAX_REF_FRAMES; j++)
2409 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
2410
2411 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2412 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
2413
2414 return 0;
2415 }
2416
2417 static int common_init_after_header(AVCodecContext *avctx){
2418 SnowContext *s = avctx->priv_data;
2419 int plane_index, level, orientation;
2420
2421 for(plane_index=0; plane_index<3; plane_index++){
2422 int w= s->avctx->width;
2423 int h= s->avctx->height;
2424
2425 if(plane_index){
2426 w>>= s->chroma_h_shift;
2427 h>>= s->chroma_v_shift;
2428 }
2429 s->plane[plane_index].width = w;
2430 s->plane[plane_index].height= h;
2431
2432 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2433 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2434 SubBand *b= &s->plane[plane_index].band[level][orientation];
2435
2436 b->buf= s->spatial_dwt_buffer;
2437 b->level= level;
2438 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2439 b->width = (w + !(orientation&1))>>1;
2440 b->height= (h + !(orientation>1))>>1;
2441
2442 b->stride_line = 1 << (s->spatial_decomposition_count - level);
2443 b->buf_x_offset = 0;
2444 b->buf_y_offset = 0;
2445
2446 if(orientation&1){
2447 b->buf += (w+1)>>1;
2448 b->buf_x_offset = (w+1)>>1;
2449 }
2450 if(orientation>1){
2451 b->buf += b->stride>>1;
2452 b->buf_y_offset = b->stride_line >> 1;
2453 }
2454 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
2455
2456 if(level)
2457 b->parent= &s->plane[plane_index].band[level-1][orientation];
2458 //FIXME avoid this realloc
2459 av_freep(&b->x_coeff);
2460 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
2461 }
2462 w= (w+1)>>1;
2463 h= (h+1)>>1;
2464 }
2465 }
2466
2467 return 0;
2468 }
2469
2470 #define QUANTIZE2 0
2471
2472 #if QUANTIZE2==1
2473 #define Q2_STEP 8
2474
2475 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
2476 SubBand *b= &p->band[level][orientation];
2477 int x, y;
2478 int xo=0;
2479 int yo=0;
2480 int step= 1 << (s->spatial_decomposition_count - level);
2481
2482 if(orientation&1)
2483 xo= step>>1;
2484 if(orientation&2)
2485 yo= step>>1;
2486
2487 //FIXME bias for nonzero ?
2488 //FIXME optimize
2489 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
2490 for(y=0; y<p->height; y++){
2491 for(x=0; x<p->width; x++){
2492 int sx= (x-xo + step/2) / step / Q2_STEP;
2493 int sy= (y-yo + step/2) / step / Q2_STEP;
2494 int v= r0[x + y*p->width] - r1[x + y*p->width];
2495 assert(sx>=0 && sy>=0 && sx < score_stride);
2496 v= ((v+8)>>4)<<4;
2497 score[sx + sy*score_stride] += v*v;
2498 assert(score[sx + sy*score_stride] >= 0);
2499 }
2500 }
2501 }
2502
2503 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
2504 int level, orientation;
2505
2506 for(level=0; level<s->spatial_decomposition_count; level++){
2507 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2508 SubBand *b= &p->band[level][orientation];
2509 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
2510
2511 dequantize(s, b, dst, b->stride);
2512 }
2513 }
2514 }
2515
2516 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
2517 int level, orientation, ys, xs, x, y, pass;
2518 IDWTELEM best_dequant[height * stride];
2519 IDWTELEM idwt2_buffer[height * stride];
2520 const int score_stride= (width + 10)/Q2_STEP;
2521 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
2522 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
2523 int threshold= (s->m.lambda * s->m.lambda) >> 6;
2524
2525 //FIXME pass the copy cleanly ?
2526
2527 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
2528 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
2529
2530 for(level=0; level<s->spatial_decomposition_count; level++){
2531 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2532 SubBand *b= &p->band[level][orientation];
2533 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
2534 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
2535 assert(src == b->buf); // code does not depend on this but it is true currently
2536
2537 quantize(s, b, dst, src, b->stride, s->qbias);
2538 }
2539 }
2540 for(pass=0; pass<1; pass++){
2541 if(s->qbias == 0) //keyframe
2542 continue;
2543 for(level=0; level<s->spatial_decomposition_count; level++){
2544 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2545 SubBand *b= &p->band[level][orientation];
2546 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
2547 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
2548
2549 for(ys= 0; ys<Q2_STEP; ys++){
2550 for(xs= 0; xs<Q2_STEP; xs++){
2551 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
2552 dequantize_all(s, p, idwt2_buffer, width, height);
2553 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
2554 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
2555 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
2556 for(y=ys; y<b->height; y+= Q2_STEP){
2557 for(x=xs; x<b->width; x+= Q2_STEP){
2558 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
2559 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
2560 //FIXME try more than just --
2561 }
2562 }
2563 dequantize_all(s, p, idwt2_buffer, width, height);
2564 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
2565 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
2566 for(y=ys; y<b->height; y+= Q2_STEP){
2567 for(x=xs; x<b->width; x+= Q2_STEP){
2568 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
2569 if(score[score_idx] <= best_score[score_idx] + threshold){
2570 best_score[score_idx]= score[score_idx];
2571 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
2572 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
2573 //FIXME copy instead
2574 }
2575 }
2576 }
2577 }
2578 }
2579 }
2580 }
2581 }
2582 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
2583 }
2584
2585 #endif /* QUANTIZE2==1 */
2586
2587 #define USE_HALFPEL_PLANE 0
2588
2589 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
2590 int p,x,y;
2591
2592 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
2593
2594 for(p=0; p<3; p++){
2595 int is_chroma= !!p;
2596 int w= s->avctx->width >>is_chroma;
2597 int h= s->avctx->height >>is_chroma;
2598 int ls= frame->linesize[p];
2599 uint8_t *src= frame->data[p];
2600
2601 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2602 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2603 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2604
2605 halfpel[0][p]= src;
2606 for(y=0; y<h; y++){
2607 for(x=0; x<w; x++){
2608 int i= y*ls + x;
2609
2610 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
2611 }
2612 }
2613 for(y=0; y<h; y++){
2614 for(x=0; x<w; x++){
2615 int i= y*ls + x;
2616
2617 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
2618 }
2619 }
2620 src= halfpel[1][p];
2621 for(y=0; y<h; y++){
2622 for(x=0; x<w; x++){
2623 int i= y*ls + x;
2624
2625 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
2626 }
2627 }
2628
2629 //FIXME border!
2630 }
2631 }
2632
2633 static void release_buffer(AVCodecContext *avctx){
2634 SnowContext *s = avctx->priv_data;
2635 int i;
2636
2637 if(s->last_picture[s->max_ref_frames-1].data[0]){
2638 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
2639 for(i=0; i<9; i++)
2640 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
2641 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
2642 }
2643 }
2644
2645 static int frame_start(SnowContext *s){
2646 AVFrame tmp;
2647 int w= s->avctx->width; //FIXME round up to x16 ?
2648 int h= s->avctx->height;
2649
2650 if(s->current_picture.data[0]){
2651 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
2652 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2653 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2654 }
2655
2656 release_buffer(s->avctx);
2657
2658 tmp= s->last_picture[s->max_ref_frames-1];
2659 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
2660 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
2661 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
2662 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
2663 s->last_picture[0]= s->current_picture;
2664 s->current_picture= tmp;
2665
2666 if(s->keyframe){
2667 s->ref_frames= 0;
2668 }else{
2669 int i;
2670 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
2671 if(i && s->last_picture[i-1].key_frame)
2672 break;
2673 s->ref_frames= i;
2674 if(s->ref_frames==0){
2675 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2676 return -1;
2677 }
2678 }
2679
2680 s->current_picture.reference= 1;
2681 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2682 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2683 return -1;
2684 }
2685
2686 s->current_picture.key_frame= s->keyframe;
2687
2688 return 0;
2689 }
2690
2691 static av_cold void common_end(SnowContext *s){
2692 int plane_index, level, orientation, i;
2693
2694 av_freep(&s->spatial_dwt_buffer);
2695 av_freep(&s->spatial_idwt_buffer);
2696
2697 s->m.me.temp= NULL;
2698 av_freep(&s->m.me.scratchpad);
2699 av_freep(&s->m.me.map);
2700 av_freep(&s->m.me.score_map);
2701 av_freep(&s->m.obmc_scratchpad);
2702
2703 av_freep(&s->block);
2704 av_freep(&s->scratchbuf);
2705
2706 for(i=0; i<MAX_REF_FRAMES; i++){
2707 av_freep(&s->ref_mvs[i]);
2708 av_freep(&s->ref_scores[i]);
2709 if(s->last_picture[i].data[0])
2710 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2711 }
2712
2713 for(plane_index=0; plane_index<3; plane_index++){
2714 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2715 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2716 SubBand *b= &s->plane[plane_index].band[level][orientation];
2717
2718 av_freep(&b->x_coeff);
2719 }
2720 }
2721 }
2722 }
2723
2724 static av_cold int decode_init(AVCodecContext *avctx)
2725 {
2726 avctx->pix_fmt= PIX_FMT_YUV420P;
2727
2728 common_init(avctx);
2729
2730 return 0;
2731 }
2732
2733 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2734 const uint8_t *buf = avpkt->data;
2735 int buf_size = avpkt->size;
2736 SnowContext *s = avctx->priv_data;
2737 RangeCoder * const c= &s->c;
2738 int bytes_read;
2739 AVFrame *picture = data;
2740 int level, orientation, plane_index;
2741
2742 ff_init_range_decoder(c, buf, buf_size);
2743 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2744
2745 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2746 if(decode_header(s)<0)
2747 return -1;
2748 common_init_after_header(avctx);
2749
2750 // realloc slice buffer for the case that spatial_decomposition_count changed
2751 slice_buffer_destroy(&s->sb);
2752 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
2753
2754 for(plane_index=0; plane_index<3; plane_index++){
2755 Plane *p= &s->plane[plane_index];
2756 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2757 && p->hcoeff[1]==-10
2758 && p->hcoeff[2]==2;
2759 }
2760
2761 alloc_blocks(s);
2762
2763 if(frame_start(s) < 0)
2764 return -1;
2765 //keyframe flag duplication mess FIXME
2766 if(avctx->debug&FF_DEBUG_PICT_INFO)
2767 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2768
2769 decode_blocks(s);
2770
2771 for(plane_index=0; plane_index<3; plane_index++){
2772 Plane *p= &s->plane[plane_index];
2773 int w= p->width;
2774 int h= p->height;
2775 int x, y;
2776 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2777
2778 if(s->avctx->debug&2048){
2779 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2780 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2781
2782 for(y=0; y<h; y++){
2783 for(x=0; x<w; x++){
2784 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2785 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2786 }
2787 }
2788 }
2789
2790 {
2791 for(level=0; level<s->spatial_decomposition_count; level++){
2792 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2793 SubBand *b= &p->band[level][orientation];
2794 unpack_coeffs(s, b, b->parent, orientation);
2795 }
2796 }
2797 }
2798
2799 {
2800 const int mb_h= s->b_height << s->block_max_depth;
2801 const int block_size = MB_SIZE >> s->block_max_depth;
2802 const int block_w = plane_index ? block_size/2 : block_size;
2803 int mb_y;
2804 DWTCompose cs[MAX_DECOMPOSITIONS];
2805 int yd=0, yq=0;
2806 int y;
2807 int end_y;
2808
2809 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2810 for(mb_y=0; mb_y<=mb_h; mb_y++){
2811
2812 int slice_starty = block_w*mb_y;
2813 int slice_h = block_w*(mb_y+1);
2814 if (!(s->keyframe || s->avctx->debug&512)){
2815 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2816 slice_h -= (block_w >> 1);
2817 }
2818
2819 for(level=0; level<s->spatial_decomposition_count; level++){
2820 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2821 SubBand *b= &p->band[level][orientation];
2822 int start_y;
2823 int end_y;
2824 int our_mb_start = mb_y;
2825 int our_mb_end = (mb_y + 1);
2826 const int extra= 3;
2827 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2828 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2829 if (!(s->keyframe || s->avctx->debug&512)){
2830 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2831 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2832 }
2833 start_y = FFMIN(b->height, start_y);
2834 end_y = FFMIN(b->height, end_y);
2835
2836 if (start_y != end_y){
2837 if (orientation == 0){
2838 SubBand * correlate_band = &p->band[0][0];
2839 int correlate_end_y = FFMIN(b->height, end_y + 1);
2840 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2841 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2842 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2843 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2844 }
2845 else
2846 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2847 }
2848 }
2849 }
2850
2851 for(; yd<slice_h; yd+=4){
2852 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2853 }
2854
2855 if(s->qlog == LOSSLESS_QLOG){
2856 for(; yq<slice_h && yq<h; yq++){
2857 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2858 for(x=0; x<w; x++){
2859 line[x] <<= FRAC_BITS;
2860 }
2861 }
2862 }
2863
2864 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2865
2866 y = FFMIN(p->height, slice_starty);
2867 end_y = FFMIN(p->height, slice_h);
2868 while(y < end_y)
2869 slice_buffer_release(&s->sb, y++);
2870 }
2871
2872 slice_buffer_flush(&s->sb);
2873 }
2874
2875 }
2876
2877 emms_c();
2878
2879 release_buffer(avctx);
2880
2881 if(!(s->avctx->debug&2048))
2882 *picture= s->current_picture;
2883 else
2884 *picture= s->mconly_picture;
2885
2886 *data_size = sizeof(AVFrame);
2887
2888 bytes_read= c->bytestream - c->bytestream_start;
2889 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2890
2891 return bytes_read;
2892 }
2893
2894 static av_cold int decode_end(AVCodecContext *avctx)
2895 {
2896 SnowContext *s = avctx->priv_data;
2897
2898 slice_buffer_destroy(&s->sb);
2899
2900 common_end(s);
2901
2902 return 0;
2903 }
2904
2905 AVCodec snow_decoder = {
2906 "snow",
2907 CODEC_TYPE_VIDEO,
2908 CODEC_ID_SNOW,
2909 sizeof(SnowContext),
2910 decode_init,
2911 NULL,
2912 decode_end,
2913 decode_frame,
2914 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2915 NULL,
2916 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2917 };
2918
2919 #if CONFIG_SNOW_ENCODER
2920 static av_cold int encode_init(AVCodecContext *avctx)
2921 {
2922 SnowContext *s = avctx->priv_data;
2923 int plane_index;
2924
2925 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2926 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2927 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2928 return -1;
2929 }
2930
2931 if(avctx->prediction_method == DWT_97
2932 && (avctx->flags & CODEC_FLAG_QSCALE)
2933 && avctx->global_quality == 0){
2934 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2935 return -1;
2936 }
2937
2938 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2939
2940 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2941 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2942
2943 for(plane_index=0; plane_index<3; plane_index++){
2944 s->plane[plane_index].diag_mc= 1;
2945 s->plane[plane_index].htaps= 6;
2946 s->plane[plane_index].hcoeff[0]= 40;
2947 s->plane[plane_index].hcoeff[1]= -10;
2948 s->plane[plane_index].hcoeff[2]= 2;
2949 s->plane[plane_index].fast_mc= 1;
2950 }
2951
2952 common_init(avctx);
2953 alloc_blocks(s);
2954
2955 s->version=0;
2956
2957 s->m.avctx = avctx;
2958 s->m.flags = avctx->flags;
2959 s->m.bit_rate= avctx->bit_rate;
2960
2961 s->m.me.temp =
2962 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2963 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2964 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2965 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2966 h263_encode_init(&s->m); //mv_penalty
2967
2968 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2969
2970 if(avctx->flags&CODEC_FLAG_PASS1){
2971 if(!avctx->stats_out)
2972 avctx->stats_out = av_mallocz(256);
2973 }
2974 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2975 if(ff_rate_control_init(&s->m) < 0)
2976 return -1;
2977 }
2978 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2979
2980 avctx->coded_frame= &s->current_picture;
2981 switch(avctx->pix_fmt){
2982 // case PIX_FMT_YUV444P:
2983 // case PIX_FMT_YUV422P:
2984 case PIX_FMT_YUV420P:
2985 case PIX_FMT_GRAY8:
2986 // case PIX_FMT_YUV411P:
2987 // case PIX_FMT_YUV410P:
2988 s->colorspace_type= 0;
2989 break;
2990 /* case PIX_FMT_RGB32:
2991 s->colorspace= 1;
2992 break;*/
2993 default:
2994 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2995 return -1;
2996 }
2997 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2998 s->chroma_h_shift= 1;
2999 s->chroma_v_shift= 1;
3000
3001 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3002 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3003
3004 s->avctx->get_buffer(s->avctx, &s->input_picture);
3005
3006 if(s->avctx->me_method == ME_ITER){
3007 int i;
3008 int size= s->b_width * s->b_height << 2*s->block_max_depth;
3009 for(i=0; i<s->max_ref_frames; i++){
3010 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
3011 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
3012 }
3013 }
3014
3015 return 0;
3016 }
3017
3018 //near copy & paste from dsputil, FIXME
3019 static int pix_sum(uint8_t * pix, int line_size, int w)
3020 {
3021 int s, i, j;
3022
3023 s = 0;
3024 for (i = 0; i < w; i++) {
3025 for (j = 0; j < w; j++) {
3026 s += pix[0];
3027 pix ++;
3028 }
3029 pix += line_size - w;
3030 }
3031 return s;
3032 }
3033
3034 //near copy & paste from dsputil, FIXME
3035 static int pix_norm1(uint8_t * pix, int line_size, int w)
3036 {
3037 int s, i, j;
3038 uint32_t *sq = ff_squareTbl + 256;
3039
3040 s = 0;
3041 for (i = 0; i < w; i++) {
3042 for (j = 0; j < w; j ++) {
3043 s += sq[pix[0]];
3044 pix ++;
3045 }
3046 pix += line_size - w;
3047 }
3048 return s;
1721 } 3049 }
1722 3050
1723 //FIXME copy&paste 3051 //FIXME copy&paste
1724 #define P_LEFT P[1] 3052 #define P_LEFT P[1]
1725 #define P_TOP P[2] 3053 #define P_TOP P[2]
1950 memcpy(s->block_state, p_state, sizeof(s->block_state)); 3278 memcpy(s->block_state, p_state, sizeof(s->block_state));
1951 return score; 3279 return score;
1952 } 3280 }
1953 } 3281 }
1954 3282
1955 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1956 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1957 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1958 }else{
1959 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1960 }
1961 }
1962
1963 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ 3283 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1964 const int w= s->b_width << s->block_max_depth; 3284 const int w= s->b_width << s->block_max_depth;
1965 const int rem_depth= s->block_max_depth - level; 3285 const int rem_depth= s->block_max_depth - level;
1966 const int index= (x + y*w) << rem_depth; 3286 const int index= (x + y*w) << rem_depth;
1967 int trx= (x+1)<<rem_depth; 3287 int trx= (x+1)<<rem_depth;
2010 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); 3330 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2011 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); 3331 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2012 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); 3332 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2013 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); 3333 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2014 } 3334 }
2015 }
2016
2017 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2018 const int w= s->b_width << s->block_max_depth;
2019 const int rem_depth= s->block_max_depth - level;
2020 const int index= (x + y*w) << rem_depth;
2021 int trx= (x+1)<<rem_depth;
2022 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2023 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2024 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2025 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2026 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2027
2028 if(s->keyframe){
2029 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2030 return;
2031 }
2032
2033 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2034 int type, mx, my;
2035 int l = left->color[0];
2036 int cb= left->color[1];
2037 int cr= left->color[2];
2038 int ref = 0;
2039 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2040 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2041 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2042
2043 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2044
2045 if(type){
2046 pred_mv(s, &mx, &my, 0, left, top, tr);
2047 l += get_symbol(&s->c, &s->block_state[32], 1);
2048 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2049 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2050 }else{
2051 if(s->ref_frames > 1)
2052 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2053 pred_mv(s, &mx, &my, ref, left, top, tr);
2054 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2055 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2056 }
2057 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2058 }else{
2059 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2060 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2061 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2062 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2063 }
2064 }
2065
2066 static void encode_blocks(SnowContext *s, int search){
2067 int x, y;
2068 int w= s->b_width;
2069 int h= s->b_height;
2070
2071 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2072 iterative_me(s);
2073
2074 for(y=0; y<h; y++){
2075 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2076 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2077 return;
2078 }
2079 for(x=0; x<w; x++){
2080 if(s->avctx->me_method == ME_ITER || !search)
2081 encode_q_branch2(s, 0, x, y);
2082 else
2083 encode_q_branch (s, 0, x, y);
2084 }
2085 }
2086 }
2087
2088 static void decode_blocks(SnowContext *s){
2089 int x, y;
2090 int w= s->b_width;
2091 int h= s->b_height;
2092
2093 for(y=0; y<h; y++){
2094 for(x=0; x<w; x++){
2095 decode_q_branch(s, 0, x, y);
2096 }
2097 }
2098 }
2099
2100 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2101 static const uint8_t weight[64]={
2102 8,7,6,5,4,3,2,1,
2103 7,7,0,0,0,0,0,1,
2104 6,0,6,0,0,0,2,0,
2105 5,0,0,5,0,3,0,0,
2106 4,0,0,0,4,0,0,0,
2107 3,0,0,5,0,3,0,0,
2108 2,0,6,0,0,0,2,0,
2109 1,7,0,0,0,0,0,1,
2110 };
2111
2112 static const uint8_t brane[256]={
2113 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2114 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2115 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2116 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2117 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2118 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2119 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2120 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2121 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2122 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2123 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2124 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2125 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2126 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2127 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2128 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2129 };
2130
2131 static const uint8_t needs[16]={
2132 0,1,0,0,
2133 2,4,2,0,
2134 0,1,0,0,
2135 15
2136 };
2137
2138 int x, y, b, r, l;
2139 int16_t tmpIt [64*(32+HTAPS_MAX)];
2140 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2141 int16_t *tmpI= tmpIt;
2142 uint8_t *tmp2= tmp2t[0];
2143 const uint8_t *hpel[11];
2144 assert(dx<16 && dy<16);
2145 r= brane[dx + 16*dy]&15;
2146 l= brane[dx + 16*dy]>>4;
2147
2148 b= needs[l] | needs[r];
2149 if(p && !p->diag_mc)
2150 b= 15;
2151
2152 if(b&5){
2153 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2154 for(x=0; x < b_w; x++){
2155 int a_1=src[x + HTAPS_MAX/2-4];
2156 int a0= src[x + HTAPS_MAX/2-3];
2157 int a1= src[x + HTAPS_MAX/2-2];
2158 int a2= src[x + HTAPS_MAX/2-1];
2159 int a3= src[x + HTAPS_MAX/2+0];
2160 int a4= src[x + HTAPS_MAX/2+1];
2161 int a5= src[x + HTAPS_MAX/2+2];
2162 int a6= src[x + HTAPS_MAX/2+3];
2163 int am=0;
2164 if(!p || p->fast_mc){
2165 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2166 tmpI[x]= am;
2167 am= (am+16)>>5;
2168 }else{
2169 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2170 tmpI[x]= am;
2171 am= (am+32)>>6;
2172 }
2173
2174 if(am&(~255)) am= ~(am>>31);
2175 tmp2[x]= am;
2176 }
2177 tmpI+= 64;
2178 tmp2+= stride;
2179 src += stride;
2180 }
2181 src -= stride*y;
2182 }
2183 src += HTAPS_MAX/2 - 1;
2184 tmp2= tmp2t[1];
2185
2186 if(b&2){
2187 for(y=0; y < b_h; y++){
2188 for(x=0; x < b_w+1; x++){
2189 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2190 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2191 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2192 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2193 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2194 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2195 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2196 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2197 int am=0;
2198 if(!p || p->fast_mc)
2199 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2200 else
2201 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2202
2203 if(am&(~255)) am= ~(am>>31);
2204 tmp2[x]= am;
2205 }
2206 src += stride;
2207 tmp2+= stride;
2208 }
2209 src -= stride*y;
2210 }
2211 src += stride*(HTAPS_MAX/2 - 1);
2212 tmp2= tmp2t[2];
2213 tmpI= tmpIt;
2214 if(b&4){
2215 for(y=0; y < b_h; y++){
2216 for(x=0; x < b_w; x++){
2217 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2218 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2219 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2220 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2221 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2222 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2223 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2224 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2225 int am=0;
2226 if(!p || p->fast_mc)
2227 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2228 else
2229 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2230 if(am&(~255)) am= ~(am>>31);
2231 tmp2[x]= am;
2232 }
2233 tmpI+= 64;
2234 tmp2+= stride;
2235 }
2236 }
2237
2238 hpel[ 0]= src;
2239 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2240 hpel[ 2]= src + 1;
2241
2242 hpel[ 4]= tmp2t[1];
2243 hpel[ 5]= tmp2t[2];
2244 hpel[ 6]= tmp2t[1] + 1;
2245
2246 hpel[ 8]= src + stride;
2247 hpel[ 9]= hpel[1] + stride;
2248 hpel[10]= hpel[8] + 1;
2249
2250 if(b==15){
2251 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2252 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2253 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2254 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2255 dx&=7;
2256 dy&=7;
2257 for(y=0; y < b_h; y++){
2258 for(x=0; x < b_w; x++){
2259 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2260 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2261 }
2262 src1+=stride;
2263 src2+=stride;
2264 src3+=stride;
2265 src4+=stride;
2266 dst +=stride;
2267 }
2268 }else{
2269 const uint8_t *src1= hpel[l];
2270 const uint8_t *src2= hpel[r];
2271 int a= weight[((dx&7) + (8*(dy&7)))];
2272 int b= 8-a;
2273 for(y=0; y < b_h; y++){
2274 for(x=0; x < b_w; x++){
2275 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2276 }
2277 src1+=stride;
2278 src2+=stride;
2279 dst +=stride;
2280 }
2281 }
2282 }
2283
2284 #define mca(dx,dy,b_w)\
2285 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2286 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2287 assert(h==b_w);\
2288 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2289 }
2290
2291 mca( 0, 0,16)
2292 mca( 8, 0,16)
2293 mca( 0, 8,16)
2294 mca( 8, 8,16)
2295 mca( 0, 0,8)
2296 mca( 8, 0,8)
2297 mca( 0, 8,8)
2298 mca( 8, 8,8)
2299
2300 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2301 if(block->type & BLOCK_INTRA){
2302 int x, y;
2303 const int color = block->color[plane_index];
2304 const int color4= color*0x01010101;
2305 if(b_w==32){
2306 for(y=0; y < b_h; y++){
2307 *(uint32_t*)&dst[0 + y*stride]= color4;
2308 *(uint32_t*)&dst[4 + y*stride]= color4;
2309 *(uint32_t*)&dst[8 + y*stride]= color4;
2310 *(uint32_t*)&dst[12+ y*stride]= color4;
2311 *(uint32_t*)&dst[16+ y*stride]= color4;
2312 *(uint32_t*)&dst[20+ y*stride]= color4;
2313 *(uint32_t*)&dst[24+ y*stride]= color4;
2314 *(uint32_t*)&dst[28+ y*stride]= color4;
2315 }
2316 }else if(b_w==16){
2317 for(y=0; y < b_h; y++){
2318 *(uint32_t*)&dst[0 + y*stride]= color4;
2319 *(uint32_t*)&dst[4 + y*stride]= color4;
2320 *(uint32_t*)&dst[8 + y*stride]= color4;
2321 *(uint32_t*)&dst[12+ y*stride]= color4;
2322 }
2323 }else if(b_w==8){
2324 for(y=0; y < b_h; y++){
2325 *(uint32_t*)&dst[0 + y*stride]= color4;
2326 *(uint32_t*)&dst[4 + y*stride]= color4;
2327 }
2328 }else if(b_w==4){
2329 for(y=0; y < b_h; y++){
2330 *(uint32_t*)&dst[0 + y*stride]= color4;
2331 }
2332 }else{
2333 for(y=0; y < b_h; y++){
2334 for(x=0; x < b_w; x++){
2335 dst[x + y*stride]= color;
2336 }
2337 }
2338 }
2339 }else{
2340 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2341 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2342 int mx= block->mx*scale;
2343 int my= block->my*scale;
2344 const int dx= mx&15;
2345 const int dy= my&15;
2346 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2347 sx += (mx>>4) - (HTAPS_MAX/2-1);
2348 sy += (my>>4) - (HTAPS_MAX/2-1);
2349 src += sx + sy*stride;
2350 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2351 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2352 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2353 src= tmp + MB_SIZE;
2354 }
2355 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2356 // assert(!(b_w&(b_w-1)));
2357 assert(b_w>1 && b_h>1);
2358 assert((tab_index>=0 && tab_index<4) || b_w==32);
2359 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2360 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2361 else if(b_w==32){
2362 int y;
2363 for(y=0; y<b_h; y+=16){
2364 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2365 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2366 }
2367 }else if(b_w==b_h)
2368 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2369 else if(b_w==2*b_h){
2370 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2371 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2372 }else{
2373 assert(2*b_w==b_h);
2374 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2375 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2376 }
2377 }
2378 }
2379
2380 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2381 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2382 int y, x;
2383 IDWTELEM * dst;
2384 for(y=0; y<b_h; y++){
2385 //FIXME ugly misuse of obmc_stride
2386 const uint8_t *obmc1= obmc + y*obmc_stride;
2387 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2388 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2389 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2390 dst = slice_buffer_get_line(sb, src_y + y);
2391 for(x=0; x<b_w; x++){
2392 int v= obmc1[x] * block[3][x + y*src_stride]
2393 +obmc2[x] * block[2][x + y*src_stride]
2394 +obmc3[x] * block[1][x + y*src_stride]
2395 +obmc4[x] * block[0][x + y*src_stride];
2396
2397 v <<= 8 - LOG2_OBMC_MAX;
2398 if(FRAC_BITS != 8){
2399 v >>= 8 - FRAC_BITS;
2400 }
2401 if(add){
2402 v += dst[x + src_x];
2403 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2404 if(v&(~255)) v= ~(v>>31);
2405 dst8[x + y*src_stride] = v;
2406 }else{
2407 dst[x + src_x] -= v;
2408 }
2409 }
2410 }
2411 }
2412
2413 //FIXME name cleanup (b_w, block_w, b_width stuff)
2414 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2415 const int b_width = s->b_width << s->block_max_depth;
2416 const int b_height= s->b_height << s->block_max_depth;
2417 const int b_stride= b_width;
2418 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2419 BlockNode *rt= lt+1;
2420 BlockNode *lb= lt+b_stride;
2421 BlockNode *rb= lb+1;
2422 uint8_t *block[4];
2423 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2424 uint8_t *tmp = s->scratchbuf;
2425 uint8_t *ptmp;
2426 int x,y;
2427
2428 if(b_x<0){
2429 lt= rt;
2430 lb= rb;
2431 }else if(b_x + 1 >= b_width){
2432 rt= lt;
2433 rb= lb;
2434 }
2435 if(b_y<0){
2436 lt= lb;
2437 rt= rb;
2438 }else if(b_y + 1 >= b_height){
2439 lb= lt;
2440 rb= rt;
2441 }
2442
2443 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2444 obmc -= src_x;
2445 b_w += src_x;
2446 if(!sliced && !offset_dst)
2447 dst -= src_x;
2448 src_x=0;
2449 }else if(src_x + b_w > w){
2450 b_w = w - src_x;
2451 }
2452 if(src_y<0){
2453 obmc -= src_y*obmc_stride;
2454 b_h += src_y;
2455 if(!sliced && !offset_dst)
2456 dst -= src_y*dst_stride;
2457 src_y=0;
2458 }else if(src_y + b_h> h){
2459 b_h = h - src_y;
2460 }
2461
2462 if(b_w<=0 || b_h<=0) return;
2463
2464 assert(src_stride > 2*MB_SIZE + 5);
2465
2466 if(!sliced && offset_dst)
2467 dst += src_x + src_y*dst_stride;
2468 dst8+= src_x + src_y*src_stride;
2469 // src += src_x + src_y*src_stride;
2470
2471 ptmp= tmp + 3*tmp_step;
2472 block[0]= ptmp;
2473 ptmp+=tmp_step;
2474 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2475
2476 if(same_block(lt, rt)){
2477 block[1]= block[0];
2478 }else{
2479 block[1]= ptmp;
2480 ptmp+=tmp_step;
2481 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2482 }
2483
2484 if(same_block(lt, lb)){
2485 block[2]= block[0];
2486 }else if(same_block(rt, lb)){
2487 block[2]= block[1];
2488 }else{
2489 block[2]= ptmp;
2490 ptmp+=tmp_step;
2491 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2492 }
2493
2494 if(same_block(lt, rb) ){
2495 block[3]= block[0];
2496 }else if(same_block(rt, rb)){
2497 block[3]= block[1];
2498 }else if(same_block(lb, rb)){
2499 block[3]= block[2];
2500 }else{
2501 block[3]= ptmp;
2502 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2503 }
2504 #if 0
2505 for(y=0; y<b_h; y++){
2506 for(x=0; x<b_w; x++){
2507 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2508 if(add) dst[x + y*dst_stride] += v;
2509 else dst[x + y*dst_stride] -= v;
2510 }
2511 }
2512 for(y=0; y<b_h; y++){
2513 uint8_t *obmc2= obmc + (obmc_stride>>1);
2514 for(x=0; x<b_w; x++){
2515 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2516 if(add) dst[x + y*dst_stride] += v;
2517 else dst[x + y*dst_stride] -= v;
2518 }
2519 }
2520 for(y=0; y<b_h; y++){
2521 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2522 for(x=0; x<b_w; x++){
2523 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2524 if(add) dst[x + y*dst_stride] += v;
2525 else dst[x + y*dst_stride] -= v;
2526 }
2527 }
2528 for(y=0; y<b_h; y++){
2529 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2530 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2531 for(x=0; x<b_w; x++){
2532 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2533 if(add) dst[x + y*dst_stride] += v;
2534 else dst[x + y*dst_stride] -= v;
2535 }
2536 }
2537 #else
2538 if(sliced){
2539 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2540 }else{
2541 for(y=0; y<b_h; y++){
2542 //FIXME ugly misuse of obmc_stride
2543 const uint8_t *obmc1= obmc + y*obmc_stride;
2544 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2545 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2546 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2547 for(x=0; x<b_w; x++){
2548 int v= obmc1[x] * block[3][x + y*src_stride]
2549 +obmc2[x] * block[2][x + y*src_stride]
2550 +obmc3[x] * block[1][x + y*src_stride]
2551 +obmc4[x] * block[0][x + y*src_stride];
2552
2553 v <<= 8 - LOG2_OBMC_MAX;
2554 if(FRAC_BITS != 8){
2555 v >>= 8 - FRAC_BITS;
2556 }
2557 if(add){
2558 v += dst[x + y*dst_stride];
2559 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2560 if(v&(~255)) v= ~(v>>31);
2561 dst8[x + y*src_stride] = v;
2562 }else{
2563 dst[x + y*dst_stride] -= v;
2564 }
2565 }
2566 }
2567 }
2568 #endif /* 0 */
2569 }
2570
2571 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2572 Plane *p= &s->plane[plane_index];
2573 const int mb_w= s->b_width << s->block_max_depth;
2574 const int mb_h= s->b_height << s->block_max_depth;
2575 int x, y, mb_x;
2576 int block_size = MB_SIZE >> s->block_max_depth;
2577 int block_w = plane_index ? block_size/2 : block_size;
2578 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2579 int obmc_stride= plane_index ? block_size : 2*block_size;
2580 int ref_stride= s->current_picture.linesize[plane_index];
2581 uint8_t *dst8= s->current_picture.data[plane_index];
2582 int w= p->width;
2583 int h= p->height;
2584
2585 if(s->keyframe || (s->avctx->debug&512)){
2586 if(mb_y==mb_h)
2587 return;
2588
2589 if(add){
2590 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2591 // DWTELEM * line = slice_buffer_get_line(sb, y);
2592 IDWTELEM * line = sb->line[y];
2593 for(x=0; x<w; x++){
2594 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2595 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2596 v >>= FRAC_BITS;
2597 if(v&(~255)) v= ~(v>>31);
2598 dst8[x + y*ref_stride]= v;
2599 }
2600 }
2601 }else{
2602 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2603 // DWTELEM * line = slice_buffer_get_line(sb, y);
2604 IDWTELEM * line = sb->line[y];
2605 for(x=0; x<w; x++){
2606 line[x] -= 128 << FRAC_BITS;
2607 // buf[x + y*w]-= 128<<FRAC_BITS;
2608 }
2609 }
2610 }
2611
2612 return;
2613 }
2614
2615 for(mb_x=0; mb_x<=mb_w; mb_x++){
2616 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2617 block_w*mb_x - block_w/2,
2618 block_w*mb_y - block_w/2,
2619 block_w, block_w,
2620 w, h,
2621 w, ref_stride, obmc_stride,
2622 mb_x - 1, mb_y - 1,
2623 add, 0, plane_index);
2624 }
2625 }
2626
2627 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2628 Plane *p= &s->plane[plane_index];
2629 const int mb_w= s->b_width << s->block_max_depth;
2630 const int mb_h= s->b_height << s->block_max_depth;
2631 int x, y, mb_x;
2632 int block_size = MB_SIZE >> s->block_max_depth;
2633 int block_w = plane_index ? block_size/2 : block_size;
2634 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2635 const int obmc_stride= plane_index ? block_size : 2*block_size;
2636 int ref_stride= s->current_picture.linesize[plane_index];
2637 uint8_t *dst8= s->current_picture.data[plane_index];
2638 int w= p->width;
2639 int h= p->height;
2640
2641 if(s->keyframe || (s->avctx->debug&512)){
2642 if(mb_y==mb_h)
2643 return;
2644
2645 if(add){
2646 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2647 for(x=0; x<w; x++){
2648 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2649 v >>= FRAC_BITS;
2650 if(v&(~255)) v= ~(v>>31);
2651 dst8[x + y*ref_stride]= v;
2652 }
2653 }
2654 }else{
2655 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2656 for(x=0; x<w; x++){
2657 buf[x + y*w]-= 128<<FRAC_BITS;
2658 }
2659 }
2660 }
2661
2662 return;
2663 }
2664
2665 for(mb_x=0; mb_x<=mb_w; mb_x++){
2666 add_yblock(s, 0, NULL, buf, dst8, obmc,
2667 block_w*mb_x - block_w/2,
2668 block_w*mb_y - block_w/2,
2669 block_w, block_w,
2670 w, h,
2671 w, ref_stride, obmc_stride,
2672 mb_x - 1, mb_y - 1,
2673 add, 1, plane_index);
2674 }
2675 }
2676
2677 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2678 const int mb_h= s->b_height << s->block_max_depth;
2679 int mb_y;
2680 for(mb_y=0; mb_y<=mb_h; mb_y++)
2681 predict_slice(s, buf, plane_index, add, mb_y);
2682 } 3335 }
2683 3336
2684 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ 3337 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2685 int i, x2, y2; 3338 int i, x2, y2;
2686 Plane *p= &s->plane[plane_index]; 3339 Plane *p= &s->plane[plane_index];
2936 } 3589 }
2937 } 3590 }
2938 return distortion + rate*penalty_factor; 3591 return distortion + rate*penalty_factor;
2939 } 3592 }
2940 3593
3594 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
3595 int level;
3596 for(level=decomposition_count-1; level>=0; level--){
3597 switch(type){
3598 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
3599 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
3600 }
3601 }
3602 }
3603
3604 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
3605 const int support = type==1 ? 3 : 5;
3606 int level;
3607 if(type==2) return;
3608
3609 for(level=decomposition_count-1; level>=0; level--){
3610 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
3611 switch(type){
3612 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
3613 break;
3614 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
3615 break;
3616 }
3617 }
3618 }
3619 }
3620
3621 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
3622 DWTCompose cs[MAX_DECOMPOSITIONS];
3623 int y;
3624 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
3625 for(y=0; y<height; y+=4)
3626 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
3627 }
3628
3629 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3630 const int w= b->width;
3631 const int h= b->height;
3632 int x, y;
3633
3634 if(1){
3635 int run=0;
3636 int runs[w*h];
3637 int run_index=0;
3638 int max_index;
3639
3640 for(y=0; y<h; y++){
3641 for(x=0; x<w; x++){
3642 int v, p=0;
3643 int /*ll=0, */l=0, lt=0, t=0, rt=0;
3644 v= src[x + y*stride];
3645
3646 if(y){
3647 t= src[x + (y-1)*stride];
3648 if(x){
3649 lt= src[x - 1 + (y-1)*stride];
3650 }
3651 if(x + 1 < w){
3652 rt= src[x + 1 + (y-1)*stride];
3653 }
3654 }
3655 if(x){
3656 l= src[x - 1 + y*stride];
3657 /*if(x > 1){
3658 if(orientation==1) ll= src[y + (x-2)*stride];
3659 else ll= src[x - 2 + y*stride];
3660 }*/
3661 }
3662 if(parent){
3663 int px= x>>1;
3664 int py= y>>1;
3665 if(px<b->parent->width && py<b->parent->height)
3666 p= parent[px + py*2*stride];
3667 }
3668 if(!(/*ll|*/l|lt|t|rt|p)){
3669 if(v){
3670 runs[run_index++]= run;
3671 run=0;
3672 }else{
3673 run++;
3674 }
3675 }
3676 }
3677 }
3678 max_index= run_index;
3679 runs[run_index++]= run;
3680 run_index=0;
3681 run= runs[run_index++];
3682
3683 put_symbol2(&s->c, b->state[30], max_index, 0);
3684 if(run_index <= max_index)
3685 put_symbol2(&s->c, b->state[1], run, 3);
3686
3687 for(y=0; y<h; y++){
3688 if(s->c.bytestream_end - s->c.bytestream < w*40){
3689 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3690 return -1;
3691 }
3692 for(x=0; x<w; x++){
3693 int v, p=0;
3694 int /*ll=0, */l=0, lt=0, t=0, rt=0;
3695 v= src[x + y*stride];
3696
3697 if(y){
3698 t= src[x + (y-1)*stride];
3699 if(x){
3700 lt= src[x - 1 + (y-1)*stride];
3701 }
3702 if(x + 1 < w){
3703 rt= src[x + 1 + (y-1)*stride];
3704 }
3705 }
3706 if(x){
3707 l= src[x - 1 + y*stride];
3708 /*if(x > 1){
3709 if(orientation==1) ll= src[y + (x-2)*stride];
3710 else ll= src[x - 2 + y*stride];
3711 }*/
3712 }
3713 if(parent){
3714 int px= x>>1;
3715 int py= y>>1;
3716 if(px<b->parent->width && py<b->parent->height)
3717 p= parent[px + py*2*stride];
3718 }
3719 if(/*ll|*/l|lt|t|rt|p){
3720 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3721
3722 put_rac(&s->c, &b->state[0][context], !!v);
3723 }else{
3724 if(!run){
3725 run= runs[run_index++];
3726
3727 if(run_index <= max_index)
3728 put_symbol2(&s->c, b->state[1], run, 3);
3729 assert(v);
3730 }else{
3731 run--;
3732 assert(!v);
3733 }
3734 }
3735 if(v){
3736 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3737 int l2= 2*FFABS(l) + (l<0);
3738 int t2= 2*FFABS(t) + (t<0);
3739
3740 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
3741 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
3742 }
3743 }
3744 }
3745 }
3746 return 0;
3747 }
3748
3749 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3750 // encode_subband_qtree(s, b, src, parent, stride, orientation);
3751 // encode_subband_z0run(s, b, src, parent, stride, orientation);
3752 return encode_subband_c0run(s, b, src, parent, stride, orientation);
3753 // encode_subband_dzr(s, b, src, parent, stride, orientation);
3754 }
3755
2941 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ 3756 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2942 const int b_stride= s->b_width << s->block_max_depth; 3757 const int b_stride= s->b_width << s->block_max_depth;
2943 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; 3758 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2944 BlockNode backup= *block; 3759 BlockNode backup= *block;
2945 int rd, index, value; 3760 int rd, index, value;
3242 } 4057 }
3243 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); 4058 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3244 } 4059 }
3245 } 4060 }
3246 4061
4062 static void encode_blocks(SnowContext *s, int search){
4063 int x, y;
4064 int w= s->b_width;
4065 int h= s->b_height;
4066
4067 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
4068 iterative_me(s);
4069
4070 for(y=0; y<h; y++){
4071 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
4072 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4073 return;
4074 }
4075 for(x=0; x<w; x++){
4076 if(s->avctx->me_method == ME_ITER || !search)
4077 encode_q_branch2(s, 0, x, y);
4078 else
4079 encode_q_branch (s, 0, x, y);
4080 }
4081 }
4082 }
4083
3247 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ 4084 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3248 const int w= b->width; 4085 const int w= b->width;
3249 const int h= b->height; 4086 const int h= b->height;
3250 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 4087 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3251 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); 4088 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3303 } 4140 }
3304 } 4141 }
3305 } 4142 }
3306 } 4143 }
3307 4144
3308 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3309 const int w= b->width;
3310 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3311 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3312 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3313 int x,y;
3314
3315 if(s->qlog == LOSSLESS_QLOG) return;
3316
3317 for(y=start_y; y<end_y; y++){
3318 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3319 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3320 for(x=0; x<w; x++){
3321 int i= line[x];
3322 if(i<0){
3323 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3324 }else if(i>0){
3325 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3326 }
3327 }
3328 }
3329 }
3330
3331 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ 4145 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3332 const int w= b->width; 4146 const int w= b->width;
3333 const int h= b->height; 4147 const int h= b->height;
3334 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); 4148 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3335 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); 4149 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3367 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); 4181 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3368 else src[i] -= src[i - 1]; 4182 else src[i] -= src[i - 1];
3369 } 4183 }
3370 }else{ 4184 }else{
3371 if(y) src[i] -= src[i - stride]; 4185 if(y) src[i] -= src[i - stride];
3372 }
3373 }
3374 }
3375 }
3376
3377 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3378 const int w= b->width;
3379 int x,y;
3380
3381 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3382 IDWTELEM * prev;
3383
3384 if (start_y != 0)
3385 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3386
3387 for(y=start_y; y<end_y; y++){
3388 prev = line;
3389 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3390 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3391 for(x=0; x<w; x++){
3392 if(x){
3393 if(use_median){
3394 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3395 else line[x] += line[x - 1];
3396 }else{
3397 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3398 else line[x] += line[x - 1];
3399 }
3400 }else{
3401 if(y) line[x] += prev[x];
3402 } 4186 }
3403 } 4187 }
3404 } 4188 }
3405 } 4189 }
3406 4190
3528 s->last_qlog = s->qlog; 4312 s->last_qlog = s->qlog;
3529 s->last_qbias = s->qbias; 4313 s->last_qbias = s->qbias;
3530 s->last_mv_scale = s->mv_scale; 4314 s->last_mv_scale = s->mv_scale;
3531 s->last_block_max_depth = s->block_max_depth; 4315 s->last_block_max_depth = s->block_max_depth;
3532 s->last_spatial_decomposition_count = s->spatial_decomposition_count; 4316 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3533 }
3534
3535 static void decode_qlogs(SnowContext *s){
3536 int plane_index, level, orientation;
3537
3538 for(plane_index=0; plane_index<3; plane_index++){
3539 for(level=0; level<s->spatial_decomposition_count; level++){
3540 for(orientation=level ? 1:0; orientation<4; orientation++){
3541 int q;
3542 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3543 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3544 else q= get_symbol(&s->c, s->header_state, 1);
3545 s->plane[plane_index].band[level][orientation].qlog= q;
3546 }
3547 }
3548 }
3549 }
3550
3551 #define GET_S(dst, check) \
3552 tmp= get_symbol(&s->c, s->header_state, 0);\
3553 if(!(check)){\
3554 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
3555 return -1;\
3556 }\
3557 dst= tmp;
3558
3559 static int decode_header(SnowContext *s){
3560 int plane_index, tmp;
3561 uint8_t kstate[32];
3562
3563 memset(kstate, MID_STATE, sizeof(kstate));
3564
3565 s->keyframe= get_rac(&s->c, kstate);
3566 if(s->keyframe || s->always_reset){
3567 reset_contexts(s);
3568 s->spatial_decomposition_type=
3569 s->qlog=
3570 s->qbias=
3571 s->mv_scale=
3572 s->block_max_depth= 0;
3573 }
3574 if(s->keyframe){
3575 GET_S(s->version, tmp <= 0U)
3576 s->always_reset= get_rac(&s->c, s->header_state);
3577 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3578 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3579 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3580 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3581 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3582 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3583 s->spatial_scalability= get_rac(&s->c, s->header_state);
3584 // s->rate_scalability= get_rac(&s->c, s->header_state);
3585 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
3586 s->max_ref_frames++;
3587
3588 decode_qlogs(s);
3589 }
3590
3591 if(!s->keyframe){
3592 if(get_rac(&s->c, s->header_state)){
3593 for(plane_index=0; plane_index<2; plane_index++){
3594 int htaps, i, sum=0;
3595 Plane *p= &s->plane[plane_index];
3596 p->diag_mc= get_rac(&s->c, s->header_state);
3597 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3598 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3599 return -1;
3600 p->htaps= htaps;
3601 for(i= htaps/2; i; i--){
3602 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3603 sum += p->hcoeff[i];
3604 }
3605 p->hcoeff[0]= 32-sum;
3606 }
3607 s->plane[2].diag_mc= s->plane[1].diag_mc;
3608 s->plane[2].htaps = s->plane[1].htaps;
3609 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3610 }
3611 if(get_rac(&s->c, s->header_state)){
3612 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3613 decode_qlogs(s);
3614 }
3615 }
3616
3617 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3618 if(s->spatial_decomposition_type > 1U){
3619 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3620 return -1;
3621 }
3622 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
3623 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
3624 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
3625 return -1;
3626 }
3627
3628 s->qlog += get_symbol(&s->c, s->header_state, 1);
3629 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3630 s->qbias += get_symbol(&s->c, s->header_state, 1);
3631 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3632 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3633 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3634 s->block_max_depth= 0;
3635 return -1;
3636 }
3637
3638 return 0;
3639 }
3640
3641 static void init_qexp(void){
3642 int i;
3643 double v=128;
3644
3645 for(i=0; i<QROOT; i++){
3646 qexp[i]= lrintf(v);
3647 v *= pow(2, 1.0 / QROOT);
3648 }
3649 }
3650
3651 static av_cold int common_init(AVCodecContext *avctx){
3652 SnowContext *s = avctx->priv_data;
3653 int width, height;
3654 int i, j;
3655
3656 s->avctx= avctx;
3657 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
3658
3659 dsputil_init(&s->dsp, avctx);
3660
3661 #define mcf(dx,dy)\
3662 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3663 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3664 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3665 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3666 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3667 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3668
3669 mcf( 0, 0)
3670 mcf( 4, 0)
3671 mcf( 8, 0)
3672 mcf(12, 0)
3673 mcf( 0, 4)
3674 mcf( 4, 4)
3675 mcf( 8, 4)
3676 mcf(12, 4)
3677 mcf( 0, 8)
3678 mcf( 4, 8)
3679 mcf( 8, 8)
3680 mcf(12, 8)
3681 mcf( 0,12)
3682 mcf( 4,12)
3683 mcf( 8,12)
3684 mcf(12,12)
3685
3686 #define mcfh(dx,dy)\
3687 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3688 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3689 mc_block_hpel ## dx ## dy ## 16;\
3690 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3691 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3692 mc_block_hpel ## dx ## dy ## 8;
3693
3694 mcfh(0, 0)
3695 mcfh(8, 0)
3696 mcfh(0, 8)
3697 mcfh(8, 8)
3698
3699 if(!qexp[0])
3700 init_qexp();
3701
3702 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3703
3704 width= s->avctx->width;
3705 height= s->avctx->height;
3706
3707 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3708 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3709
3710 for(i=0; i<MAX_REF_FRAMES; i++)
3711 for(j=0; j<MAX_REF_FRAMES; j++)
3712 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3713
3714 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3715 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3716
3717 return 0;
3718 }
3719
3720 static int common_init_after_header(AVCodecContext *avctx){
3721 SnowContext *s = avctx->priv_data;
3722 int plane_index, level, orientation;
3723
3724 for(plane_index=0; plane_index<3; plane_index++){
3725 int w= s->avctx->width;
3726 int h= s->avctx->height;
3727
3728 if(plane_index){
3729 w>>= s->chroma_h_shift;
3730 h>>= s->chroma_v_shift;
3731 }
3732 s->plane[plane_index].width = w;
3733 s->plane[plane_index].height= h;
3734
3735 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3736 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3737 SubBand *b= &s->plane[plane_index].band[level][orientation];
3738
3739 b->buf= s->spatial_dwt_buffer;
3740 b->level= level;
3741 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3742 b->width = (w + !(orientation&1))>>1;
3743 b->height= (h + !(orientation>1))>>1;
3744
3745 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3746 b->buf_x_offset = 0;
3747 b->buf_y_offset = 0;
3748
3749 if(orientation&1){
3750 b->buf += (w+1)>>1;
3751 b->buf_x_offset = (w+1)>>1;
3752 }
3753 if(orientation>1){
3754 b->buf += b->stride>>1;
3755 b->buf_y_offset = b->stride_line >> 1;
3756 }
3757 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3758
3759 if(level)
3760 b->parent= &s->plane[plane_index].band[level-1][orientation];
3761 //FIXME avoid this realloc
3762 av_freep(&b->x_coeff);
3763 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3764 }
3765 w= (w+1)>>1;
3766 h= (h+1)>>1;
3767 }
3768 }
3769
3770 return 0;
3771 } 4317 }
3772 4318
3773 static int qscale2qlog(int qscale){ 4319 static int qscale2qlog(int qscale){
3774 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) 4320 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3775 + 61*QROOT/8; //<64 >60 4321 + 61*QROOT/8; //<64 >60
3849 } 4395 }
3850 4396
3851 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); 4397 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3852 } 4398 }
3853 } 4399 }
3854 }
3855
3856 #define QUANTIZE2 0
3857
3858 #if QUANTIZE2==1
3859 #define Q2_STEP 8
3860
3861 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3862 SubBand *b= &p->band[level][orientation];
3863 int x, y;
3864 int xo=0;
3865 int yo=0;
3866 int step= 1 << (s->spatial_decomposition_count - level);
3867
3868 if(orientation&1)
3869 xo= step>>1;
3870 if(orientation&2)
3871 yo= step>>1;
3872
3873 //FIXME bias for nonzero ?
3874 //FIXME optimize
3875 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3876 for(y=0; y<p->height; y++){
3877 for(x=0; x<p->width; x++){
3878 int sx= (x-xo + step/2) / step / Q2_STEP;
3879 int sy= (y-yo + step/2) / step / Q2_STEP;
3880 int v= r0[x + y*p->width] - r1[x + y*p->width];
3881 assert(sx>=0 && sy>=0 && sx < score_stride);
3882 v= ((v+8)>>4)<<4;
3883 score[sx + sy*score_stride] += v*v;
3884 assert(score[sx + sy*score_stride] >= 0);
3885 }
3886 }
3887 }
3888
3889 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3890 int level, orientation;
3891
3892 for(level=0; level<s->spatial_decomposition_count; level++){
3893 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3894 SubBand *b= &p->band[level][orientation];
3895 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3896
3897 dequantize(s, b, dst, b->stride);
3898 }
3899 }
3900 }
3901
3902 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3903 int level, orientation, ys, xs, x, y, pass;
3904 IDWTELEM best_dequant[height * stride];
3905 IDWTELEM idwt2_buffer[height * stride];
3906 const int score_stride= (width + 10)/Q2_STEP;
3907 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3908 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3909 int threshold= (s->m.lambda * s->m.lambda) >> 6;
3910
3911 //FIXME pass the copy cleanly ?
3912
3913 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3914 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3915
3916 for(level=0; level<s->spatial_decomposition_count; level++){
3917 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3918 SubBand *b= &p->band[level][orientation];
3919 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3920 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
3921 assert(src == b->buf); // code does not depend on this but it is true currently
3922
3923 quantize(s, b, dst, src, b->stride, s->qbias);
3924 }
3925 }
3926 for(pass=0; pass<1; pass++){
3927 if(s->qbias == 0) //keyframe
3928 continue;
3929 for(level=0; level<s->spatial_decomposition_count; level++){
3930 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3931 SubBand *b= &p->band[level][orientation];
3932 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3933 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3934
3935 for(ys= 0; ys<Q2_STEP; ys++){
3936 for(xs= 0; xs<Q2_STEP; xs++){
3937 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3938 dequantize_all(s, p, idwt2_buffer, width, height);
3939 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3940 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3941 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3942 for(y=ys; y<b->height; y+= Q2_STEP){
3943 for(x=xs; x<b->width; x+= Q2_STEP){
3944 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3945 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3946 //FIXME try more than just --
3947 }
3948 }
3949 dequantize_all(s, p, idwt2_buffer, width, height);
3950 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3951 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3952 for(y=ys; y<b->height; y+= Q2_STEP){
3953 for(x=xs; x<b->width; x+= Q2_STEP){
3954 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3955 if(score[score_idx] <= best_score[score_idx] + threshold){
3956 best_score[score_idx]= score[score_idx];
3957 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3958 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3959 //FIXME copy instead
3960 }
3961 }
3962 }
3963 }
3964 }
3965 }
3966 }
3967 }
3968 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3969 }
3970
3971 #endif /* QUANTIZE2==1 */
3972
3973 static av_cold int encode_init(AVCodecContext *avctx)
3974 {
3975 SnowContext *s = avctx->priv_data;
3976 int plane_index;
3977
3978 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3979 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3980 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3981 return -1;
3982 }
3983
3984 if(avctx->prediction_method == DWT_97
3985 && (avctx->flags & CODEC_FLAG_QSCALE)
3986 && avctx->global_quality == 0){
3987 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3988 return -1;
3989 }
3990
3991 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3992
3993 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3994 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3995
3996 for(plane_index=0; plane_index<3; plane_index++){
3997 s->plane[plane_index].diag_mc= 1;
3998 s->plane[plane_index].htaps= 6;
3999 s->plane[plane_index].hcoeff[0]= 40;
4000 s->plane[plane_index].hcoeff[1]= -10;
4001 s->plane[plane_index].hcoeff[2]= 2;
4002 s->plane[plane_index].fast_mc= 1;
4003 }
4004
4005 common_init(avctx);
4006 alloc_blocks(s);
4007
4008 s->version=0;
4009
4010 s->m.avctx = avctx;
4011 s->m.flags = avctx->flags;
4012 s->m.bit_rate= avctx->bit_rate;
4013
4014 s->m.me.temp =
4015 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4016 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4017 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4018 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4019 h263_encode_init(&s->m); //mv_penalty
4020
4021 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4022
4023 if(avctx->flags&CODEC_FLAG_PASS1){
4024 if(!avctx->stats_out)
4025 avctx->stats_out = av_mallocz(256);
4026 }
4027 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4028 if(ff_rate_control_init(&s->m) < 0)
4029 return -1;
4030 }
4031 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4032
4033 avctx->coded_frame= &s->current_picture;
4034 switch(avctx->pix_fmt){
4035 // case PIX_FMT_YUV444P:
4036 // case PIX_FMT_YUV422P:
4037 case PIX_FMT_YUV420P:
4038 case PIX_FMT_GRAY8:
4039 // case PIX_FMT_YUV411P:
4040 // case PIX_FMT_YUV410P:
4041 s->colorspace_type= 0;
4042 break;
4043 /* case PIX_FMT_RGB32:
4044 s->colorspace= 1;
4045 break;*/
4046 default:
4047 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4048 return -1;
4049 }
4050 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4051 s->chroma_h_shift= 1;
4052 s->chroma_v_shift= 1;
4053
4054 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4055 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4056
4057 s->avctx->get_buffer(s->avctx, &s->input_picture);
4058
4059 if(s->avctx->me_method == ME_ITER){
4060 int i;
4061 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4062 for(i=0; i<s->max_ref_frames; i++){
4063 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4064 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4065 }
4066 }
4067
4068 return 0;
4069 }
4070
4071 #define USE_HALFPEL_PLANE 0
4072
4073 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4074 int p,x,y;
4075
4076 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4077
4078 for(p=0; p<3; p++){
4079 int is_chroma= !!p;
4080 int w= s->avctx->width >>is_chroma;
4081 int h= s->avctx->height >>is_chroma;
4082 int ls= frame->linesize[p];
4083 uint8_t *src= frame->data[p];
4084
4085 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4086 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4087 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4088
4089 halfpel[0][p]= src;
4090 for(y=0; y<h; y++){
4091 for(x=0; x<w; x++){
4092 int i= y*ls + x;
4093
4094 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4095 }
4096 }
4097 for(y=0; y<h; y++){
4098 for(x=0; x<w; x++){
4099 int i= y*ls + x;
4100
4101 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4102 }
4103 }
4104 src= halfpel[1][p];
4105 for(y=0; y<h; y++){
4106 for(x=0; x<w; x++){
4107 int i= y*ls + x;
4108
4109 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4110 }
4111 }
4112
4113 //FIXME border!
4114 }
4115 }
4116
4117 static void release_buffer(AVCodecContext *avctx){
4118 SnowContext *s = avctx->priv_data;
4119 int i;
4120
4121 if(s->last_picture[s->max_ref_frames-1].data[0]){
4122 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4123 for(i=0; i<9; i++)
4124 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4125 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4126 }
4127 }
4128
4129 static int frame_start(SnowContext *s){
4130 AVFrame tmp;
4131 int w= s->avctx->width; //FIXME round up to x16 ?
4132 int h= s->avctx->height;
4133
4134 if(s->current_picture.data[0]){
4135 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4136 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4137 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4138 }
4139
4140 release_buffer(s->avctx);
4141
4142 tmp= s->last_picture[s->max_ref_frames-1];
4143 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4144 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4145 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4146 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4147 s->last_picture[0]= s->current_picture;
4148 s->current_picture= tmp;
4149
4150 if(s->keyframe){
4151 s->ref_frames= 0;
4152 }else{
4153 int i;
4154 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4155 if(i && s->last_picture[i-1].key_frame)
4156 break;
4157 s->ref_frames= i;
4158 if(s->ref_frames==0){
4159 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
4160 return -1;
4161 }
4162 }
4163
4164 s->current_picture.reference= 1;
4165 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4166 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4167 return -1;
4168 }
4169
4170 s->current_picture.key_frame= s->keyframe;
4171
4172 return 0;
4173 } 4400 }
4174 4401
4175 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ 4402 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4176 SnowContext *s = avctx->priv_data; 4403 SnowContext *s = avctx->priv_data;
4177 RangeCoder * const c= &s->c; 4404 RangeCoder * const c= &s->c;
4445 emms_c(); 4672 emms_c();
4446 4673
4447 return ff_rac_terminate(c); 4674 return ff_rac_terminate(c);
4448 } 4675 }
4449 4676
4450 static av_cold void common_end(SnowContext *s){
4451 int plane_index, level, orientation, i;
4452
4453 av_freep(&s->spatial_dwt_buffer);
4454 av_freep(&s->spatial_idwt_buffer);
4455
4456 s->m.me.temp= NULL;
4457 av_freep(&s->m.me.scratchpad);
4458 av_freep(&s->m.me.map);
4459 av_freep(&s->m.me.score_map);
4460 av_freep(&s->m.obmc_scratchpad);
4461
4462 av_freep(&s->block);
4463 av_freep(&s->scratchbuf);
4464
4465 for(i=0; i<MAX_REF_FRAMES; i++){
4466 av_freep(&s->ref_mvs[i]);
4467 av_freep(&s->ref_scores[i]);
4468 if(s->last_picture[i].data[0])
4469 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4470 }
4471
4472 for(plane_index=0; plane_index<3; plane_index++){
4473 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4474 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4475 SubBand *b= &s->plane[plane_index].band[level][orientation];
4476
4477 av_freep(&b->x_coeff);
4478 }
4479 }
4480 }
4481 }
4482
4483 static av_cold int encode_end(AVCodecContext *avctx) 4677 static av_cold int encode_end(AVCodecContext *avctx)
4484 { 4678 {
4485 SnowContext *s = avctx->priv_data; 4679 SnowContext *s = avctx->priv_data;
4486 4680
4487 common_end(s); 4681 common_end(s);
4488 av_free(avctx->stats_out); 4682 av_free(avctx->stats_out);
4489 4683
4490 return 0; 4684 return 0;
4491 } 4685 }
4492 4686
4493 static av_cold int decode_init(AVCodecContext *avctx)
4494 {
4495 avctx->pix_fmt= PIX_FMT_YUV420P;
4496
4497 common_init(avctx);
4498
4499 return 0;
4500 }
4501
4502 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
4503 const uint8_t *buf = avpkt->data;
4504 int buf_size = avpkt->size;
4505 SnowContext *s = avctx->priv_data;
4506 RangeCoder * const c= &s->c;
4507 int bytes_read;
4508 AVFrame *picture = data;
4509 int level, orientation, plane_index;
4510
4511 ff_init_range_decoder(c, buf, buf_size);
4512 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4513
4514 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4515 if(decode_header(s)<0)
4516 return -1;
4517 common_init_after_header(avctx);
4518
4519 // realloc slice buffer for the case that spatial_decomposition_count changed
4520 slice_buffer_destroy(&s->sb);
4521 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4522
4523 for(plane_index=0; plane_index<3; plane_index++){
4524 Plane *p= &s->plane[plane_index];
4525 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4526 && p->hcoeff[1]==-10
4527 && p->hcoeff[2]==2;
4528 }
4529
4530 alloc_blocks(s);
4531
4532 if(frame_start(s) < 0)
4533 return -1;
4534 //keyframe flag duplication mess FIXME
4535 if(avctx->debug&FF_DEBUG_PICT_INFO)
4536 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4537
4538 decode_blocks(s);
4539
4540 for(plane_index=0; plane_index<3; plane_index++){
4541 Plane *p= &s->plane[plane_index];
4542 int w= p->width;
4543 int h= p->height;
4544 int x, y;
4545 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4546
4547 if(s->avctx->debug&2048){
4548 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4549 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4550
4551 for(y=0; y<h; y++){
4552 for(x=0; x<w; x++){
4553 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4554 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4555 }
4556 }
4557 }
4558
4559 {
4560 for(level=0; level<s->spatial_decomposition_count; level++){
4561 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4562 SubBand *b= &p->band[level][orientation];
4563 unpack_coeffs(s, b, b->parent, orientation);
4564 }
4565 }
4566 }
4567
4568 {
4569 const int mb_h= s->b_height << s->block_max_depth;
4570 const int block_size = MB_SIZE >> s->block_max_depth;
4571 const int block_w = plane_index ? block_size/2 : block_size;
4572 int mb_y;
4573 DWTCompose cs[MAX_DECOMPOSITIONS];
4574 int yd=0, yq=0;
4575 int y;
4576 int end_y;
4577
4578 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4579 for(mb_y=0; mb_y<=mb_h; mb_y++){
4580
4581 int slice_starty = block_w*mb_y;
4582 int slice_h = block_w*(mb_y+1);
4583 if (!(s->keyframe || s->avctx->debug&512)){
4584 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4585 slice_h -= (block_w >> 1);
4586 }
4587
4588 for(level=0; level<s->spatial_decomposition_count; level++){
4589 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4590 SubBand *b= &p->band[level][orientation];
4591 int start_y;
4592 int end_y;
4593 int our_mb_start = mb_y;
4594 int our_mb_end = (mb_y + 1);
4595 const int extra= 3;
4596 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4597 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4598 if (!(s->keyframe || s->avctx->debug&512)){
4599 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4600 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4601 }
4602 start_y = FFMIN(b->height, start_y);
4603 end_y = FFMIN(b->height, end_y);
4604
4605 if (start_y != end_y){
4606 if (orientation == 0){
4607 SubBand * correlate_band = &p->band[0][0];
4608 int correlate_end_y = FFMIN(b->height, end_y + 1);
4609 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4610 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4611 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4612 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4613 }
4614 else
4615 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4616 }
4617 }
4618 }
4619
4620 for(; yd<slice_h; yd+=4){
4621 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4622 }
4623
4624 if(s->qlog == LOSSLESS_QLOG){
4625 for(; yq<slice_h && yq<h; yq++){
4626 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4627 for(x=0; x<w; x++){
4628 line[x] <<= FRAC_BITS;
4629 }
4630 }
4631 }
4632
4633 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4634
4635 y = FFMIN(p->height, slice_starty);
4636 end_y = FFMIN(p->height, slice_h);
4637 while(y < end_y)
4638 slice_buffer_release(&s->sb, y++);
4639 }
4640
4641 slice_buffer_flush(&s->sb);
4642 }
4643
4644 }
4645
4646 emms_c();
4647
4648 release_buffer(avctx);
4649
4650 if(!(s->avctx->debug&2048))
4651 *picture= s->current_picture;
4652 else
4653 *picture= s->mconly_picture;
4654
4655 *data_size = sizeof(AVFrame);
4656
4657 bytes_read= c->bytestream - c->bytestream_start;
4658 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4659
4660 return bytes_read;
4661 }
4662
4663 static av_cold int decode_end(AVCodecContext *avctx)
4664 {
4665 SnowContext *s = avctx->priv_data;
4666
4667 slice_buffer_destroy(&s->sb);
4668
4669 common_end(s);
4670
4671 return 0;
4672 }
4673
4674 AVCodec snow_decoder = {
4675 "snow",
4676 CODEC_TYPE_VIDEO,
4677 CODEC_ID_SNOW,
4678 sizeof(SnowContext),
4679 decode_init,
4680 NULL,
4681 decode_end,
4682 decode_frame,
4683 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4684 NULL,
4685 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4686 };
4687
4688 #if CONFIG_SNOW_ENCODER
4689 AVCodec snow_encoder = { 4687 AVCodec snow_encoder = {
4690 "snow", 4688 "snow",
4691 CODEC_TYPE_VIDEO, 4689 CODEC_TYPE_VIDEO,
4692 CODEC_ID_SNOW, 4690 CODEC_ID_SNOW,
4693 sizeof(SnowContext), 4691 sizeof(SnowContext),