Mercurial > libavcodec.hg
comparison snow.c @ 10188:404026d9adb5 libavcodec
Reorder functions so that encoding functions are disabled by the preprocessor.
Fixes compilation with disabled optimizations and enabled Snow decoder.
author | diego |
---|---|
date | Fri, 18 Sep 2009 19:45:09 +0000 |
parents | b911dbff811c |
children | 328e2a3171d2 |
comparison
equal
deleted
inserted
replaced
10187:b14d646fe719 | 10188:404026d9adb5 |
---|---|
502 } DWTCompose; | 502 } DWTCompose; |
503 | 503 |
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) | 504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) | 505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) |
506 | 506 |
507 static void iterative_me(SnowContext *s); | |
508 | |
509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) | 507 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) |
510 { | 508 { |
511 int i; | 509 int i; |
512 | 510 |
513 buf->base_buffer = base_buffer; | 511 buf->base_buffer = base_buffer; |
1277 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | 1275 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; |
1278 } | 1276 } |
1279 } | 1277 } |
1280 } | 1278 } |
1281 | 1279 |
1282 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ | |
1283 int level; | |
1284 for(level=decomposition_count-1; level>=0; level--){ | |
1285 switch(type){ | |
1286 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; | |
1287 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
1288 } | |
1289 } | |
1290 } | |
1291 | |
1292 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ | |
1293 const int support = type==1 ? 3 : 5; | |
1294 int level; | |
1295 if(type==2) return; | |
1296 | |
1297 for(level=decomposition_count-1; level>=0; level--){ | |
1298 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1299 switch(type){ | |
1300 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | |
1301 break; | |
1302 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | |
1303 break; | |
1304 } | |
1305 } | |
1306 } | |
1307 } | |
1308 | |
1309 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ | 1280 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ |
1310 const int support = type==1 ? 3 : 5; | 1281 const int support = type==1 ? 3 : 5; |
1311 int level; | 1282 int level; |
1312 if(type==2) return; | 1283 if(type==2) return; |
1313 | 1284 |
1319 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); | 1290 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
1320 break; | 1291 break; |
1321 } | 1292 } |
1322 } | 1293 } |
1323 } | 1294 } |
1324 } | |
1325 | |
1326 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ | |
1327 DWTCompose cs[MAX_DECOMPOSITIONS]; | |
1328 int y; | |
1329 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
1330 for(y=0; y<height; y+=4) | |
1331 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
1332 } | |
1333 | |
1334 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ | |
1335 const int w= b->width; | |
1336 const int h= b->height; | |
1337 int x, y; | |
1338 | |
1339 if(1){ | |
1340 int run=0; | |
1341 int runs[w*h]; | |
1342 int run_index=0; | |
1343 int max_index; | |
1344 | |
1345 for(y=0; y<h; y++){ | |
1346 for(x=0; x<w; x++){ | |
1347 int v, p=0; | |
1348 int /*ll=0, */l=0, lt=0, t=0, rt=0; | |
1349 v= src[x + y*stride]; | |
1350 | |
1351 if(y){ | |
1352 t= src[x + (y-1)*stride]; | |
1353 if(x){ | |
1354 lt= src[x - 1 + (y-1)*stride]; | |
1355 } | |
1356 if(x + 1 < w){ | |
1357 rt= src[x + 1 + (y-1)*stride]; | |
1358 } | |
1359 } | |
1360 if(x){ | |
1361 l= src[x - 1 + y*stride]; | |
1362 /*if(x > 1){ | |
1363 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1364 else ll= src[x - 2 + y*stride]; | |
1365 }*/ | |
1366 } | |
1367 if(parent){ | |
1368 int px= x>>1; | |
1369 int py= y>>1; | |
1370 if(px<b->parent->width && py<b->parent->height) | |
1371 p= parent[px + py*2*stride]; | |
1372 } | |
1373 if(!(/*ll|*/l|lt|t|rt|p)){ | |
1374 if(v){ | |
1375 runs[run_index++]= run; | |
1376 run=0; | |
1377 }else{ | |
1378 run++; | |
1379 } | |
1380 } | |
1381 } | |
1382 } | |
1383 max_index= run_index; | |
1384 runs[run_index++]= run; | |
1385 run_index=0; | |
1386 run= runs[run_index++]; | |
1387 | |
1388 put_symbol2(&s->c, b->state[30], max_index, 0); | |
1389 if(run_index <= max_index) | |
1390 put_symbol2(&s->c, b->state[1], run, 3); | |
1391 | |
1392 for(y=0; y<h; y++){ | |
1393 if(s->c.bytestream_end - s->c.bytestream < w*40){ | |
1394 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); | |
1395 return -1; | |
1396 } | |
1397 for(x=0; x<w; x++){ | |
1398 int v, p=0; | |
1399 int /*ll=0, */l=0, lt=0, t=0, rt=0; | |
1400 v= src[x + y*stride]; | |
1401 | |
1402 if(y){ | |
1403 t= src[x + (y-1)*stride]; | |
1404 if(x){ | |
1405 lt= src[x - 1 + (y-1)*stride]; | |
1406 } | |
1407 if(x + 1 < w){ | |
1408 rt= src[x + 1 + (y-1)*stride]; | |
1409 } | |
1410 } | |
1411 if(x){ | |
1412 l= src[x - 1 + y*stride]; | |
1413 /*if(x > 1){ | |
1414 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1415 else ll= src[x - 2 + y*stride]; | |
1416 }*/ | |
1417 } | |
1418 if(parent){ | |
1419 int px= x>>1; | |
1420 int py= y>>1; | |
1421 if(px<b->parent->width && py<b->parent->height) | |
1422 p= parent[px + py*2*stride]; | |
1423 } | |
1424 if(/*ll|*/l|lt|t|rt|p){ | |
1425 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); | |
1426 | |
1427 put_rac(&s->c, &b->state[0][context], !!v); | |
1428 }else{ | |
1429 if(!run){ | |
1430 run= runs[run_index++]; | |
1431 | |
1432 if(run_index <= max_index) | |
1433 put_symbol2(&s->c, b->state[1], run, 3); | |
1434 assert(v); | |
1435 }else{ | |
1436 run--; | |
1437 assert(!v); | |
1438 } | |
1439 } | |
1440 if(v){ | |
1441 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); | |
1442 int l2= 2*FFABS(l) + (l<0); | |
1443 int t2= 2*FFABS(t) + (t<0); | |
1444 | |
1445 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); | |
1446 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); | |
1447 } | |
1448 } | |
1449 } | |
1450 } | |
1451 return 0; | |
1452 } | |
1453 | |
1454 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ | |
1455 // encode_subband_qtree(s, b, src, parent, stride, orientation); | |
1456 // encode_subband_z0run(s, b, src, parent, stride, orientation); | |
1457 return encode_subband_c0run(s, b, src, parent, stride, orientation); | |
1458 // encode_subband_dzr(s, b, src, parent, stride, orientation); | |
1459 } | 1295 } |
1460 | 1296 |
1461 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ | 1297 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
1462 const int w= b->width; | 1298 const int w= b->width; |
1463 const int h= b->height; | 1299 const int h= b->height; |
1631 *d= *s; | 1467 *d= *s; |
1632 d->bytestream= bytestream; | 1468 d->bytestream= bytestream; |
1633 d->bytestream_start= bytestream_start; | 1469 d->bytestream_start= bytestream_start; |
1634 } | 1470 } |
1635 | 1471 |
1636 //near copy & paste from dsputil, FIXME | |
1637 static int pix_sum(uint8_t * pix, int line_size, int w) | |
1638 { | |
1639 int s, i, j; | |
1640 | |
1641 s = 0; | |
1642 for (i = 0; i < w; i++) { | |
1643 for (j = 0; j < w; j++) { | |
1644 s += pix[0]; | |
1645 pix ++; | |
1646 } | |
1647 pix += line_size - w; | |
1648 } | |
1649 return s; | |
1650 } | |
1651 | |
1652 //near copy & paste from dsputil, FIXME | |
1653 static int pix_norm1(uint8_t * pix, int line_size, int w) | |
1654 { | |
1655 int s, i, j; | |
1656 uint32_t *sq = ff_squareTbl + 256; | |
1657 | |
1658 s = 0; | |
1659 for (i = 0; i < w; i++) { | |
1660 for (j = 0; j < w; j ++) { | |
1661 s += sq[pix[0]]; | |
1662 pix ++; | |
1663 } | |
1664 pix += line_size - w; | |
1665 } | |
1666 return s; | |
1667 } | |
1668 | |
1669 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ | 1472 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ |
1670 const int w= s->b_width << s->block_max_depth; | 1473 const int w= s->b_width << s->block_max_depth; |
1671 const int rem_depth= s->block_max_depth - level; | 1474 const int rem_depth= s->block_max_depth - level; |
1672 const int index= (x + y*w) << rem_depth; | 1475 const int index= (x + y*w) << rem_depth; |
1673 const int block_w= 1<<rem_depth; | 1476 const int block_w= 1<<rem_depth; |
1716 (tr ->mx * scale[tr ->ref] + 128) >>8); | 1519 (tr ->mx * scale[tr ->ref] + 128) >>8); |
1717 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, | 1520 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, |
1718 (top ->my * scale[top ->ref] + 128) >>8, | 1521 (top ->my * scale[top ->ref] + 128) >>8, |
1719 (tr ->my * scale[tr ->ref] + 128) >>8); | 1522 (tr ->my * scale[tr ->ref] + 128) >>8); |
1720 } | 1523 } |
1524 } | |
1525 | |
1526 static av_always_inline int same_block(BlockNode *a, BlockNode *b){ | |
1527 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ | |
1528 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); | |
1529 }else{ | |
1530 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); | |
1531 } | |
1532 } | |
1533 | |
1534 static void decode_q_branch(SnowContext *s, int level, int x, int y){ | |
1535 const int w= s->b_width << s->block_max_depth; | |
1536 const int rem_depth= s->block_max_depth - level; | |
1537 const int index= (x + y*w) << rem_depth; | |
1538 int trx= (x+1)<<rem_depth; | |
1539 const BlockNode *left = x ? &s->block[index-1] : &null_block; | |
1540 const BlockNode *top = y ? &s->block[index-w] : &null_block; | |
1541 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; | |
1542 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt | |
1543 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; | |
1544 | |
1545 if(s->keyframe){ | |
1546 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); | |
1547 return; | |
1548 } | |
1549 | |
1550 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ | |
1551 int type, mx, my; | |
1552 int l = left->color[0]; | |
1553 int cb= left->color[1]; | |
1554 int cr= left->color[2]; | |
1555 int ref = 0; | |
1556 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); | |
1557 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); | |
1558 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); | |
1559 | |
1560 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; | |
1561 | |
1562 if(type){ | |
1563 pred_mv(s, &mx, &my, 0, left, top, tr); | |
1564 l += get_symbol(&s->c, &s->block_state[32], 1); | |
1565 cb+= get_symbol(&s->c, &s->block_state[64], 1); | |
1566 cr+= get_symbol(&s->c, &s->block_state[96], 1); | |
1567 }else{ | |
1568 if(s->ref_frames > 1) | |
1569 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); | |
1570 pred_mv(s, &mx, &my, ref, left, top, tr); | |
1571 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); | |
1572 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); | |
1573 } | |
1574 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); | |
1575 }else{ | |
1576 decode_q_branch(s, level+1, 2*x+0, 2*y+0); | |
1577 decode_q_branch(s, level+1, 2*x+1, 2*y+0); | |
1578 decode_q_branch(s, level+1, 2*x+0, 2*y+1); | |
1579 decode_q_branch(s, level+1, 2*x+1, 2*y+1); | |
1580 } | |
1581 } | |
1582 | |
1583 static void decode_blocks(SnowContext *s){ | |
1584 int x, y; | |
1585 int w= s->b_width; | |
1586 int h= s->b_height; | |
1587 | |
1588 for(y=0; y<h; y++){ | |
1589 for(x=0; x<w; x++){ | |
1590 decode_q_branch(s, 0, x, y); | |
1591 } | |
1592 } | |
1593 } | |
1594 | |
1595 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ | |
1596 static const uint8_t weight[64]={ | |
1597 8,7,6,5,4,3,2,1, | |
1598 7,7,0,0,0,0,0,1, | |
1599 6,0,6,0,0,0,2,0, | |
1600 5,0,0,5,0,3,0,0, | |
1601 4,0,0,0,4,0,0,0, | |
1602 3,0,0,5,0,3,0,0, | |
1603 2,0,6,0,0,0,2,0, | |
1604 1,7,0,0,0,0,0,1, | |
1605 }; | |
1606 | |
1607 static const uint8_t brane[256]={ | |
1608 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, | |
1609 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, | |
1610 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, | |
1611 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, | |
1612 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, | |
1613 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, | |
1614 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, | |
1615 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, | |
1616 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, | |
1617 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, | |
1618 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, | |
1619 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, | |
1620 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, | |
1621 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, | |
1622 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, | |
1623 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, | |
1624 }; | |
1625 | |
1626 static const uint8_t needs[16]={ | |
1627 0,1,0,0, | |
1628 2,4,2,0, | |
1629 0,1,0,0, | |
1630 15 | |
1631 }; | |
1632 | |
1633 int x, y, b, r, l; | |
1634 int16_t tmpIt [64*(32+HTAPS_MAX)]; | |
1635 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; | |
1636 int16_t *tmpI= tmpIt; | |
1637 uint8_t *tmp2= tmp2t[0]; | |
1638 const uint8_t *hpel[11]; | |
1639 assert(dx<16 && dy<16); | |
1640 r= brane[dx + 16*dy]&15; | |
1641 l= brane[dx + 16*dy]>>4; | |
1642 | |
1643 b= needs[l] | needs[r]; | |
1644 if(p && !p->diag_mc) | |
1645 b= 15; | |
1646 | |
1647 if(b&5){ | |
1648 for(y=0; y < b_h+HTAPS_MAX-1; y++){ | |
1649 for(x=0; x < b_w; x++){ | |
1650 int a_1=src[x + HTAPS_MAX/2-4]; | |
1651 int a0= src[x + HTAPS_MAX/2-3]; | |
1652 int a1= src[x + HTAPS_MAX/2-2]; | |
1653 int a2= src[x + HTAPS_MAX/2-1]; | |
1654 int a3= src[x + HTAPS_MAX/2+0]; | |
1655 int a4= src[x + HTAPS_MAX/2+1]; | |
1656 int a5= src[x + HTAPS_MAX/2+2]; | |
1657 int a6= src[x + HTAPS_MAX/2+3]; | |
1658 int am=0; | |
1659 if(!p || p->fast_mc){ | |
1660 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); | |
1661 tmpI[x]= am; | |
1662 am= (am+16)>>5; | |
1663 }else{ | |
1664 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); | |
1665 tmpI[x]= am; | |
1666 am= (am+32)>>6; | |
1667 } | |
1668 | |
1669 if(am&(~255)) am= ~(am>>31); | |
1670 tmp2[x]= am; | |
1671 } | |
1672 tmpI+= 64; | |
1673 tmp2+= stride; | |
1674 src += stride; | |
1675 } | |
1676 src -= stride*y; | |
1677 } | |
1678 src += HTAPS_MAX/2 - 1; | |
1679 tmp2= tmp2t[1]; | |
1680 | |
1681 if(b&2){ | |
1682 for(y=0; y < b_h; y++){ | |
1683 for(x=0; x < b_w+1; x++){ | |
1684 int a_1=src[x + (HTAPS_MAX/2-4)*stride]; | |
1685 int a0= src[x + (HTAPS_MAX/2-3)*stride]; | |
1686 int a1= src[x + (HTAPS_MAX/2-2)*stride]; | |
1687 int a2= src[x + (HTAPS_MAX/2-1)*stride]; | |
1688 int a3= src[x + (HTAPS_MAX/2+0)*stride]; | |
1689 int a4= src[x + (HTAPS_MAX/2+1)*stride]; | |
1690 int a5= src[x + (HTAPS_MAX/2+2)*stride]; | |
1691 int a6= src[x + (HTAPS_MAX/2+3)*stride]; | |
1692 int am=0; | |
1693 if(!p || p->fast_mc) | |
1694 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; | |
1695 else | |
1696 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; | |
1697 | |
1698 if(am&(~255)) am= ~(am>>31); | |
1699 tmp2[x]= am; | |
1700 } | |
1701 src += stride; | |
1702 tmp2+= stride; | |
1703 } | |
1704 src -= stride*y; | |
1705 } | |
1706 src += stride*(HTAPS_MAX/2 - 1); | |
1707 tmp2= tmp2t[2]; | |
1708 tmpI= tmpIt; | |
1709 if(b&4){ | |
1710 for(y=0; y < b_h; y++){ | |
1711 for(x=0; x < b_w; x++){ | |
1712 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; | |
1713 int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; | |
1714 int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; | |
1715 int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; | |
1716 int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; | |
1717 int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; | |
1718 int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; | |
1719 int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; | |
1720 int am=0; | |
1721 if(!p || p->fast_mc) | |
1722 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; | |
1723 else | |
1724 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; | |
1725 if(am&(~255)) am= ~(am>>31); | |
1726 tmp2[x]= am; | |
1727 } | |
1728 tmpI+= 64; | |
1729 tmp2+= stride; | |
1730 } | |
1731 } | |
1732 | |
1733 hpel[ 0]= src; | |
1734 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1); | |
1735 hpel[ 2]= src + 1; | |
1736 | |
1737 hpel[ 4]= tmp2t[1]; | |
1738 hpel[ 5]= tmp2t[2]; | |
1739 hpel[ 6]= tmp2t[1] + 1; | |
1740 | |
1741 hpel[ 8]= src + stride; | |
1742 hpel[ 9]= hpel[1] + stride; | |
1743 hpel[10]= hpel[8] + 1; | |
1744 | |
1745 if(b==15){ | |
1746 const uint8_t *src1= hpel[dx/8 + dy/8*4 ]; | |
1747 const uint8_t *src2= hpel[dx/8 + dy/8*4+1]; | |
1748 const uint8_t *src3= hpel[dx/8 + dy/8*4+4]; | |
1749 const uint8_t *src4= hpel[dx/8 + dy/8*4+5]; | |
1750 dx&=7; | |
1751 dy&=7; | |
1752 for(y=0; y < b_h; y++){ | |
1753 for(x=0; x < b_w; x++){ | |
1754 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ | |
1755 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; | |
1756 } | |
1757 src1+=stride; | |
1758 src2+=stride; | |
1759 src3+=stride; | |
1760 src4+=stride; | |
1761 dst +=stride; | |
1762 } | |
1763 }else{ | |
1764 const uint8_t *src1= hpel[l]; | |
1765 const uint8_t *src2= hpel[r]; | |
1766 int a= weight[((dx&7) + (8*(dy&7)))]; | |
1767 int b= 8-a; | |
1768 for(y=0; y < b_h; y++){ | |
1769 for(x=0; x < b_w; x++){ | |
1770 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; | |
1771 } | |
1772 src1+=stride; | |
1773 src2+=stride; | |
1774 dst +=stride; | |
1775 } | |
1776 } | |
1777 } | |
1778 | |
1779 #define mca(dx,dy,b_w)\ | |
1780 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ | |
1781 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ | |
1782 assert(h==b_w);\ | |
1783 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ | |
1784 } | |
1785 | |
1786 mca( 0, 0,16) | |
1787 mca( 8, 0,16) | |
1788 mca( 0, 8,16) | |
1789 mca( 8, 8,16) | |
1790 mca( 0, 0,8) | |
1791 mca( 8, 0,8) | |
1792 mca( 0, 8,8) | |
1793 mca( 8, 8,8) | |
1794 | |
1795 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ | |
1796 if(block->type & BLOCK_INTRA){ | |
1797 int x, y; | |
1798 const int color = block->color[plane_index]; | |
1799 const int color4= color*0x01010101; | |
1800 if(b_w==32){ | |
1801 for(y=0; y < b_h; y++){ | |
1802 *(uint32_t*)&dst[0 + y*stride]= color4; | |
1803 *(uint32_t*)&dst[4 + y*stride]= color4; | |
1804 *(uint32_t*)&dst[8 + y*stride]= color4; | |
1805 *(uint32_t*)&dst[12+ y*stride]= color4; | |
1806 *(uint32_t*)&dst[16+ y*stride]= color4; | |
1807 *(uint32_t*)&dst[20+ y*stride]= color4; | |
1808 *(uint32_t*)&dst[24+ y*stride]= color4; | |
1809 *(uint32_t*)&dst[28+ y*stride]= color4; | |
1810 } | |
1811 }else if(b_w==16){ | |
1812 for(y=0; y < b_h; y++){ | |
1813 *(uint32_t*)&dst[0 + y*stride]= color4; | |
1814 *(uint32_t*)&dst[4 + y*stride]= color4; | |
1815 *(uint32_t*)&dst[8 + y*stride]= color4; | |
1816 *(uint32_t*)&dst[12+ y*stride]= color4; | |
1817 } | |
1818 }else if(b_w==8){ | |
1819 for(y=0; y < b_h; y++){ | |
1820 *(uint32_t*)&dst[0 + y*stride]= color4; | |
1821 *(uint32_t*)&dst[4 + y*stride]= color4; | |
1822 } | |
1823 }else if(b_w==4){ | |
1824 for(y=0; y < b_h; y++){ | |
1825 *(uint32_t*)&dst[0 + y*stride]= color4; | |
1826 } | |
1827 }else{ | |
1828 for(y=0; y < b_h; y++){ | |
1829 for(x=0; x < b_w; x++){ | |
1830 dst[x + y*stride]= color; | |
1831 } | |
1832 } | |
1833 } | |
1834 }else{ | |
1835 uint8_t *src= s->last_picture[block->ref].data[plane_index]; | |
1836 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; | |
1837 int mx= block->mx*scale; | |
1838 int my= block->my*scale; | |
1839 const int dx= mx&15; | |
1840 const int dy= my&15; | |
1841 const int tab_index= 3 - (b_w>>2) + (b_w>>4); | |
1842 sx += (mx>>4) - (HTAPS_MAX/2-1); | |
1843 sy += (my>>4) - (HTAPS_MAX/2-1); | |
1844 src += sx + sy*stride; | |
1845 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2) | |
1846 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){ | |
1847 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h); | |
1848 src= tmp + MB_SIZE; | |
1849 } | |
1850 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); | |
1851 // assert(!(b_w&(b_w-1))); | |
1852 assert(b_w>1 && b_h>1); | |
1853 assert((tab_index>=0 && tab_index<4) || b_w==32); | |
1854 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) | |
1855 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); | |
1856 else if(b_w==32){ | |
1857 int y; | |
1858 for(y=0; y<b_h; y+=16){ | |
1859 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); | |
1860 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); | |
1861 } | |
1862 }else if(b_w==b_h) | |
1863 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); | |
1864 else if(b_w==2*b_h){ | |
1865 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); | |
1866 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); | |
1867 }else{ | |
1868 assert(2*b_w==b_h); | |
1869 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); | |
1870 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); | |
1871 } | |
1872 } | |
1873 } | |
1874 | |
1875 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |
1876 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | |
1877 int y, x; | |
1878 IDWTELEM * dst; | |
1879 for(y=0; y<b_h; y++){ | |
1880 //FIXME ugly misuse of obmc_stride | |
1881 const uint8_t *obmc1= obmc + y*obmc_stride; | |
1882 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
1883 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
1884 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
1885 dst = slice_buffer_get_line(sb, src_y + y); | |
1886 for(x=0; x<b_w; x++){ | |
1887 int v= obmc1[x] * block[3][x + y*src_stride] | |
1888 +obmc2[x] * block[2][x + y*src_stride] | |
1889 +obmc3[x] * block[1][x + y*src_stride] | |
1890 +obmc4[x] * block[0][x + y*src_stride]; | |
1891 | |
1892 v <<= 8 - LOG2_OBMC_MAX; | |
1893 if(FRAC_BITS != 8){ | |
1894 v >>= 8 - FRAC_BITS; | |
1895 } | |
1896 if(add){ | |
1897 v += dst[x + src_x]; | |
1898 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
1899 if(v&(~255)) v= ~(v>>31); | |
1900 dst8[x + y*src_stride] = v; | |
1901 }else{ | |
1902 dst[x + src_x] -= v; | |
1903 } | |
1904 } | |
1905 } | |
1906 } | |
1907 | |
1908 //FIXME name cleanup (b_w, block_w, b_width stuff) | |
1909 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ | |
1910 const int b_width = s->b_width << s->block_max_depth; | |
1911 const int b_height= s->b_height << s->block_max_depth; | |
1912 const int b_stride= b_width; | |
1913 BlockNode *lt= &s->block[b_x + b_y*b_stride]; | |
1914 BlockNode *rt= lt+1; | |
1915 BlockNode *lb= lt+b_stride; | |
1916 BlockNode *rb= lb+1; | |
1917 uint8_t *block[4]; | |
1918 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; | |
1919 uint8_t *tmp = s->scratchbuf; | |
1920 uint8_t *ptmp; | |
1921 int x,y; | |
1922 | |
1923 if(b_x<0){ | |
1924 lt= rt; | |
1925 lb= rb; | |
1926 }else if(b_x + 1 >= b_width){ | |
1927 rt= lt; | |
1928 rb= lb; | |
1929 } | |
1930 if(b_y<0){ | |
1931 lt= lb; | |
1932 rt= rb; | |
1933 }else if(b_y + 1 >= b_height){ | |
1934 lb= lt; | |
1935 rb= rt; | |
1936 } | |
1937 | |
1938 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16 | |
1939 obmc -= src_x; | |
1940 b_w += src_x; | |
1941 if(!sliced && !offset_dst) | |
1942 dst -= src_x; | |
1943 src_x=0; | |
1944 }else if(src_x + b_w > w){ | |
1945 b_w = w - src_x; | |
1946 } | |
1947 if(src_y<0){ | |
1948 obmc -= src_y*obmc_stride; | |
1949 b_h += src_y; | |
1950 if(!sliced && !offset_dst) | |
1951 dst -= src_y*dst_stride; | |
1952 src_y=0; | |
1953 }else if(src_y + b_h> h){ | |
1954 b_h = h - src_y; | |
1955 } | |
1956 | |
1957 if(b_w<=0 || b_h<=0) return; | |
1958 | |
1959 assert(src_stride > 2*MB_SIZE + 5); | |
1960 | |
1961 if(!sliced && offset_dst) | |
1962 dst += src_x + src_y*dst_stride; | |
1963 dst8+= src_x + src_y*src_stride; | |
1964 // src += src_x + src_y*src_stride; | |
1965 | |
1966 ptmp= tmp + 3*tmp_step; | |
1967 block[0]= ptmp; | |
1968 ptmp+=tmp_step; | |
1969 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); | |
1970 | |
1971 if(same_block(lt, rt)){ | |
1972 block[1]= block[0]; | |
1973 }else{ | |
1974 block[1]= ptmp; | |
1975 ptmp+=tmp_step; | |
1976 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); | |
1977 } | |
1978 | |
1979 if(same_block(lt, lb)){ | |
1980 block[2]= block[0]; | |
1981 }else if(same_block(rt, lb)){ | |
1982 block[2]= block[1]; | |
1983 }else{ | |
1984 block[2]= ptmp; | |
1985 ptmp+=tmp_step; | |
1986 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); | |
1987 } | |
1988 | |
1989 if(same_block(lt, rb) ){ | |
1990 block[3]= block[0]; | |
1991 }else if(same_block(rt, rb)){ | |
1992 block[3]= block[1]; | |
1993 }else if(same_block(lb, rb)){ | |
1994 block[3]= block[2]; | |
1995 }else{ | |
1996 block[3]= ptmp; | |
1997 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); | |
1998 } | |
1999 #if 0 | |
2000 for(y=0; y<b_h; y++){ | |
2001 for(x=0; x<b_w; x++){ | |
2002 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2003 if(add) dst[x + y*dst_stride] += v; | |
2004 else dst[x + y*dst_stride] -= v; | |
2005 } | |
2006 } | |
2007 for(y=0; y<b_h; y++){ | |
2008 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2009 for(x=0; x<b_w; x++){ | |
2010 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2011 if(add) dst[x + y*dst_stride] += v; | |
2012 else dst[x + y*dst_stride] -= v; | |
2013 } | |
2014 } | |
2015 for(y=0; y<b_h; y++){ | |
2016 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2017 for(x=0; x<b_w; x++){ | |
2018 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2019 if(add) dst[x + y*dst_stride] += v; | |
2020 else dst[x + y*dst_stride] -= v; | |
2021 } | |
2022 } | |
2023 for(y=0; y<b_h; y++){ | |
2024 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2025 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2026 for(x=0; x<b_w; x++){ | |
2027 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2028 if(add) dst[x + y*dst_stride] += v; | |
2029 else dst[x + y*dst_stride] -= v; | |
2030 } | |
2031 } | |
2032 #else | |
2033 if(sliced){ | |
2034 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | |
2035 }else{ | |
2036 for(y=0; y<b_h; y++){ | |
2037 //FIXME ugly misuse of obmc_stride | |
2038 const uint8_t *obmc1= obmc + y*obmc_stride; | |
2039 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2040 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2041 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2042 for(x=0; x<b_w; x++){ | |
2043 int v= obmc1[x] * block[3][x + y*src_stride] | |
2044 +obmc2[x] * block[2][x + y*src_stride] | |
2045 +obmc3[x] * block[1][x + y*src_stride] | |
2046 +obmc4[x] * block[0][x + y*src_stride]; | |
2047 | |
2048 v <<= 8 - LOG2_OBMC_MAX; | |
2049 if(FRAC_BITS != 8){ | |
2050 v >>= 8 - FRAC_BITS; | |
2051 } | |
2052 if(add){ | |
2053 v += dst[x + y*dst_stride]; | |
2054 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2055 if(v&(~255)) v= ~(v>>31); | |
2056 dst8[x + y*src_stride] = v; | |
2057 }else{ | |
2058 dst[x + y*dst_stride] -= v; | |
2059 } | |
2060 } | |
2061 } | |
2062 } | |
2063 #endif /* 0 */ | |
2064 } | |
2065 | |
2066 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ | |
2067 Plane *p= &s->plane[plane_index]; | |
2068 const int mb_w= s->b_width << s->block_max_depth; | |
2069 const int mb_h= s->b_height << s->block_max_depth; | |
2070 int x, y, mb_x; | |
2071 int block_size = MB_SIZE >> s->block_max_depth; | |
2072 int block_w = plane_index ? block_size/2 : block_size; | |
2073 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2074 int obmc_stride= plane_index ? block_size : 2*block_size; | |
2075 int ref_stride= s->current_picture.linesize[plane_index]; | |
2076 uint8_t *dst8= s->current_picture.data[plane_index]; | |
2077 int w= p->width; | |
2078 int h= p->height; | |
2079 | |
2080 if(s->keyframe || (s->avctx->debug&512)){ | |
2081 if(mb_y==mb_h) | |
2082 return; | |
2083 | |
2084 if(add){ | |
2085 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ | |
2086 // DWTELEM * line = slice_buffer_get_line(sb, y); | |
2087 IDWTELEM * line = sb->line[y]; | |
2088 for(x=0; x<w; x++){ | |
2089 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2090 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2091 v >>= FRAC_BITS; | |
2092 if(v&(~255)) v= ~(v>>31); | |
2093 dst8[x + y*ref_stride]= v; | |
2094 } | |
2095 } | |
2096 }else{ | |
2097 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ | |
2098 // DWTELEM * line = slice_buffer_get_line(sb, y); | |
2099 IDWTELEM * line = sb->line[y]; | |
2100 for(x=0; x<w; x++){ | |
2101 line[x] -= 128 << FRAC_BITS; | |
2102 // buf[x + y*w]-= 128<<FRAC_BITS; | |
2103 } | |
2104 } | |
2105 } | |
2106 | |
2107 return; | |
2108 } | |
2109 | |
2110 for(mb_x=0; mb_x<=mb_w; mb_x++){ | |
2111 add_yblock(s, 1, sb, old_buffer, dst8, obmc, | |
2112 block_w*mb_x - block_w/2, | |
2113 block_w*mb_y - block_w/2, | |
2114 block_w, block_w, | |
2115 w, h, | |
2116 w, ref_stride, obmc_stride, | |
2117 mb_x - 1, mb_y - 1, | |
2118 add, 0, plane_index); | |
2119 } | |
2120 } | |
2121 | |
2122 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ | |
2123 Plane *p= &s->plane[plane_index]; | |
2124 const int mb_w= s->b_width << s->block_max_depth; | |
2125 const int mb_h= s->b_height << s->block_max_depth; | |
2126 int x, y, mb_x; | |
2127 int block_size = MB_SIZE >> s->block_max_depth; | |
2128 int block_w = plane_index ? block_size/2 : block_size; | |
2129 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2130 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
2131 int ref_stride= s->current_picture.linesize[plane_index]; | |
2132 uint8_t *dst8= s->current_picture.data[plane_index]; | |
2133 int w= p->width; | |
2134 int h= p->height; | |
2135 | |
2136 if(s->keyframe || (s->avctx->debug&512)){ | |
2137 if(mb_y==mb_h) | |
2138 return; | |
2139 | |
2140 if(add){ | |
2141 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ | |
2142 for(x=0; x<w; x++){ | |
2143 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2144 v >>= FRAC_BITS; | |
2145 if(v&(~255)) v= ~(v>>31); | |
2146 dst8[x + y*ref_stride]= v; | |
2147 } | |
2148 } | |
2149 }else{ | |
2150 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ | |
2151 for(x=0; x<w; x++){ | |
2152 buf[x + y*w]-= 128<<FRAC_BITS; | |
2153 } | |
2154 } | |
2155 } | |
2156 | |
2157 return; | |
2158 } | |
2159 | |
2160 for(mb_x=0; mb_x<=mb_w; mb_x++){ | |
2161 add_yblock(s, 0, NULL, buf, dst8, obmc, | |
2162 block_w*mb_x - block_w/2, | |
2163 block_w*mb_y - block_w/2, | |
2164 block_w, block_w, | |
2165 w, h, | |
2166 w, ref_stride, obmc_stride, | |
2167 mb_x - 1, mb_y - 1, | |
2168 add, 1, plane_index); | |
2169 } | |
2170 } | |
2171 | |
2172 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ | |
2173 const int mb_h= s->b_height << s->block_max_depth; | |
2174 int mb_y; | |
2175 for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2176 predict_slice(s, buf, plane_index, add, mb_y); | |
2177 } | |
2178 | |
2179 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ | |
2180 const int w= b->width; | |
2181 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | |
2182 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | |
2183 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | |
2184 int x,y; | |
2185 | |
2186 if(s->qlog == LOSSLESS_QLOG) return; | |
2187 | |
2188 for(y=start_y; y<end_y; y++){ | |
2189 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); | |
2190 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | |
2191 for(x=0; x<w; x++){ | |
2192 int i= line[x]; | |
2193 if(i<0){ | |
2194 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
2195 }else if(i>0){ | |
2196 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
2197 } | |
2198 } | |
2199 } | |
2200 } | |
2201 | |
2202 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ | |
2203 const int w= b->width; | |
2204 int x,y; | |
2205 | |
2206 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning | |
2207 IDWTELEM * prev; | |
2208 | |
2209 if (start_y != 0) | |
2210 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | |
2211 | |
2212 for(y=start_y; y<end_y; y++){ | |
2213 prev = line; | |
2214 // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); | |
2215 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | |
2216 for(x=0; x<w; x++){ | |
2217 if(x){ | |
2218 if(use_median){ | |
2219 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); | |
2220 else line[x] += line[x - 1]; | |
2221 }else{ | |
2222 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); | |
2223 else line[x] += line[x - 1]; | |
2224 } | |
2225 }else{ | |
2226 if(y) line[x] += prev[x]; | |
2227 } | |
2228 } | |
2229 } | |
2230 } | |
2231 | |
2232 static void decode_qlogs(SnowContext *s){ | |
2233 int plane_index, level, orientation; | |
2234 | |
2235 for(plane_index=0; plane_index<3; plane_index++){ | |
2236 for(level=0; level<s->spatial_decomposition_count; level++){ | |
2237 for(orientation=level ? 1:0; orientation<4; orientation++){ | |
2238 int q; | |
2239 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; | |
2240 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; | |
2241 else q= get_symbol(&s->c, s->header_state, 1); | |
2242 s->plane[plane_index].band[level][orientation].qlog= q; | |
2243 } | |
2244 } | |
2245 } | |
2246 } | |
2247 | |
2248 #define GET_S(dst, check) \ | |
2249 tmp= get_symbol(&s->c, s->header_state, 0);\ | |
2250 if(!(check)){\ | |
2251 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\ | |
2252 return -1;\ | |
2253 }\ | |
2254 dst= tmp; | |
2255 | |
2256 static int decode_header(SnowContext *s){ | |
2257 int plane_index, tmp; | |
2258 uint8_t kstate[32]; | |
2259 | |
2260 memset(kstate, MID_STATE, sizeof(kstate)); | |
2261 | |
2262 s->keyframe= get_rac(&s->c, kstate); | |
2263 if(s->keyframe || s->always_reset){ | |
2264 reset_contexts(s); | |
2265 s->spatial_decomposition_type= | |
2266 s->qlog= | |
2267 s->qbias= | |
2268 s->mv_scale= | |
2269 s->block_max_depth= 0; | |
2270 } | |
2271 if(s->keyframe){ | |
2272 GET_S(s->version, tmp <= 0U) | |
2273 s->always_reset= get_rac(&s->c, s->header_state); | |
2274 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); | |
2275 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
2276 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) | |
2277 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); | |
2278 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); | |
2279 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); | |
2280 s->spatial_scalability= get_rac(&s->c, s->header_state); | |
2281 // s->rate_scalability= get_rac(&s->c, s->header_state); | |
2282 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES) | |
2283 s->max_ref_frames++; | |
2284 | |
2285 decode_qlogs(s); | |
2286 } | |
2287 | |
2288 if(!s->keyframe){ | |
2289 if(get_rac(&s->c, s->header_state)){ | |
2290 for(plane_index=0; plane_index<2; plane_index++){ | |
2291 int htaps, i, sum=0; | |
2292 Plane *p= &s->plane[plane_index]; | |
2293 p->diag_mc= get_rac(&s->c, s->header_state); | |
2294 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; | |
2295 if((unsigned)htaps > HTAPS_MAX || htaps==0) | |
2296 return -1; | |
2297 p->htaps= htaps; | |
2298 for(i= htaps/2; i; i--){ | |
2299 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); | |
2300 sum += p->hcoeff[i]; | |
2301 } | |
2302 p->hcoeff[0]= 32-sum; | |
2303 } | |
2304 s->plane[2].diag_mc= s->plane[1].diag_mc; | |
2305 s->plane[2].htaps = s->plane[1].htaps; | |
2306 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); | |
2307 } | |
2308 if(get_rac(&s->c, s->header_state)){ | |
2309 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) | |
2310 decode_qlogs(s); | |
2311 } | |
2312 } | |
2313 | |
2314 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); | |
2315 if(s->spatial_decomposition_type > 1U){ | |
2316 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); | |
2317 return -1; | |
2318 } | |
2319 if(FFMIN(s->avctx-> width>>s->chroma_h_shift, | |
2320 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){ | |
2321 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count); | |
2322 return -1; | |
2323 } | |
2324 | |
2325 s->qlog += get_symbol(&s->c, s->header_state, 1); | |
2326 s->mv_scale += get_symbol(&s->c, s->header_state, 1); | |
2327 s->qbias += get_symbol(&s->c, s->header_state, 1); | |
2328 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); | |
2329 if(s->block_max_depth > 1 || s->block_max_depth < 0){ | |
2330 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); | |
2331 s->block_max_depth= 0; | |
2332 return -1; | |
2333 } | |
2334 | |
2335 return 0; | |
2336 } | |
2337 | |
2338 static void init_qexp(void){ | |
2339 int i; | |
2340 double v=128; | |
2341 | |
2342 for(i=0; i<QROOT; i++){ | |
2343 qexp[i]= lrintf(v); | |
2344 v *= pow(2, 1.0 / QROOT); | |
2345 } | |
2346 } | |
2347 | |
2348 static av_cold int common_init(AVCodecContext *avctx){ | |
2349 SnowContext *s = avctx->priv_data; | |
2350 int width, height; | |
2351 int i, j; | |
2352 | |
2353 s->avctx= avctx; | |
2354 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe | |
2355 | |
2356 dsputil_init(&s->dsp, avctx); | |
2357 | |
2358 #define mcf(dx,dy)\ | |
2359 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |
2360 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ | |
2361 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ | |
2362 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ | |
2363 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ | |
2364 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; | |
2365 | |
2366 mcf( 0, 0) | |
2367 mcf( 4, 0) | |
2368 mcf( 8, 0) | |
2369 mcf(12, 0) | |
2370 mcf( 0, 4) | |
2371 mcf( 4, 4) | |
2372 mcf( 8, 4) | |
2373 mcf(12, 4) | |
2374 mcf( 0, 8) | |
2375 mcf( 4, 8) | |
2376 mcf( 8, 8) | |
2377 mcf(12, 8) | |
2378 mcf( 0,12) | |
2379 mcf( 4,12) | |
2380 mcf( 8,12) | |
2381 mcf(12,12) | |
2382 | |
2383 #define mcfh(dx,dy)\ | |
2384 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ | |
2385 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ | |
2386 mc_block_hpel ## dx ## dy ## 16;\ | |
2387 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ | |
2388 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ | |
2389 mc_block_hpel ## dx ## dy ## 8; | |
2390 | |
2391 mcfh(0, 0) | |
2392 mcfh(8, 0) | |
2393 mcfh(0, 8) | |
2394 mcfh(8, 8) | |
2395 | |
2396 if(!qexp[0]) | |
2397 init_qexp(); | |
2398 | |
2399 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); | |
2400 | |
2401 width= s->avctx->width; | |
2402 height= s->avctx->height; | |
2403 | |
2404 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); | |
2405 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here | |
2406 | |
2407 for(i=0; i<MAX_REF_FRAMES; i++) | |
2408 for(j=0; j<MAX_REF_FRAMES; j++) | |
2409 scale_mv_ref[i][j] = 256*(i+1)/(j+1); | |
2410 | |
2411 s->avctx->get_buffer(s->avctx, &s->mconly_picture); | |
2412 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE); | |
2413 | |
2414 return 0; | |
2415 } | |
2416 | |
2417 static int common_init_after_header(AVCodecContext *avctx){ | |
2418 SnowContext *s = avctx->priv_data; | |
2419 int plane_index, level, orientation; | |
2420 | |
2421 for(plane_index=0; plane_index<3; plane_index++){ | |
2422 int w= s->avctx->width; | |
2423 int h= s->avctx->height; | |
2424 | |
2425 if(plane_index){ | |
2426 w>>= s->chroma_h_shift; | |
2427 h>>= s->chroma_v_shift; | |
2428 } | |
2429 s->plane[plane_index].width = w; | |
2430 s->plane[plane_index].height= h; | |
2431 | |
2432 for(level=s->spatial_decomposition_count-1; level>=0; level--){ | |
2433 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
2434 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2435 | |
2436 b->buf= s->spatial_dwt_buffer; | |
2437 b->level= level; | |
2438 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | |
2439 b->width = (w + !(orientation&1))>>1; | |
2440 b->height= (h + !(orientation>1))>>1; | |
2441 | |
2442 b->stride_line = 1 << (s->spatial_decomposition_count - level); | |
2443 b->buf_x_offset = 0; | |
2444 b->buf_y_offset = 0; | |
2445 | |
2446 if(orientation&1){ | |
2447 b->buf += (w+1)>>1; | |
2448 b->buf_x_offset = (w+1)>>1; | |
2449 } | |
2450 if(orientation>1){ | |
2451 b->buf += b->stride>>1; | |
2452 b->buf_y_offset = b->stride_line >> 1; | |
2453 } | |
2454 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); | |
2455 | |
2456 if(level) | |
2457 b->parent= &s->plane[plane_index].band[level-1][orientation]; | |
2458 //FIXME avoid this realloc | |
2459 av_freep(&b->x_coeff); | |
2460 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); | |
2461 } | |
2462 w= (w+1)>>1; | |
2463 h= (h+1)>>1; | |
2464 } | |
2465 } | |
2466 | |
2467 return 0; | |
2468 } | |
2469 | |
2470 #define QUANTIZE2 0 | |
2471 | |
2472 #if QUANTIZE2==1 | |
2473 #define Q2_STEP 8 | |
2474 | |
2475 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){ | |
2476 SubBand *b= &p->band[level][orientation]; | |
2477 int x, y; | |
2478 int xo=0; | |
2479 int yo=0; | |
2480 int step= 1 << (s->spatial_decomposition_count - level); | |
2481 | |
2482 if(orientation&1) | |
2483 xo= step>>1; | |
2484 if(orientation&2) | |
2485 yo= step>>1; | |
2486 | |
2487 //FIXME bias for nonzero ? | |
2488 //FIXME optimize | |
2489 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); | |
2490 for(y=0; y<p->height; y++){ | |
2491 for(x=0; x<p->width; x++){ | |
2492 int sx= (x-xo + step/2) / step / Q2_STEP; | |
2493 int sy= (y-yo + step/2) / step / Q2_STEP; | |
2494 int v= r0[x + y*p->width] - r1[x + y*p->width]; | |
2495 assert(sx>=0 && sy>=0 && sx < score_stride); | |
2496 v= ((v+8)>>4)<<4; | |
2497 score[sx + sy*score_stride] += v*v; | |
2498 assert(score[sx + sy*score_stride] >= 0); | |
2499 } | |
2500 } | |
2501 } | |
2502 | |
2503 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){ | |
2504 int level, orientation; | |
2505 | |
2506 for(level=0; level<s->spatial_decomposition_count; level++){ | |
2507 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
2508 SubBand *b= &p->band[level][orientation]; | |
2509 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer); | |
2510 | |
2511 dequantize(s, b, dst, b->stride); | |
2512 } | |
2513 } | |
2514 } | |
2515 | |
2516 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){ | |
2517 int level, orientation, ys, xs, x, y, pass; | |
2518 IDWTELEM best_dequant[height * stride]; | |
2519 IDWTELEM idwt2_buffer[height * stride]; | |
2520 const int score_stride= (width + 10)/Q2_STEP; | |
2521 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
2522 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
2523 int threshold= (s->m.lambda * s->m.lambda) >> 6; | |
2524 | |
2525 //FIXME pass the copy cleanly ? | |
2526 | |
2527 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM)); | |
2528 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count); | |
2529 | |
2530 for(level=0; level<s->spatial_decomposition_count; level++){ | |
2531 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
2532 SubBand *b= &p->band[level][orientation]; | |
2533 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
2534 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); | |
2535 assert(src == b->buf); // code does not depend on this but it is true currently | |
2536 | |
2537 quantize(s, b, dst, src, b->stride, s->qbias); | |
2538 } | |
2539 } | |
2540 for(pass=0; pass<1; pass++){ | |
2541 if(s->qbias == 0) //keyframe | |
2542 continue; | |
2543 for(level=0; level<s->spatial_decomposition_count; level++){ | |
2544 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
2545 SubBand *b= &p->band[level][orientation]; | |
2546 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer); | |
2547 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
2548 | |
2549 for(ys= 0; ys<Q2_STEP; ys++){ | |
2550 for(xs= 0; xs<Q2_STEP; xs++){ | |
2551 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
2552 dequantize_all(s, p, idwt2_buffer, width, height); | |
2553 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
2554 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
2555 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
2556 for(y=ys; y<b->height; y+= Q2_STEP){ | |
2557 for(x=xs; x<b->width; x+= Q2_STEP){ | |
2558 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; | |
2559 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; | |
2560 //FIXME try more than just -- | |
2561 } | |
2562 } | |
2563 dequantize_all(s, p, idwt2_buffer, width, height); | |
2564 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
2565 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
2566 for(y=ys; y<b->height; y+= Q2_STEP){ | |
2567 for(x=xs; x<b->width; x+= Q2_STEP){ | |
2568 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride; | |
2569 if(score[score_idx] <= best_score[score_idx] + threshold){ | |
2570 best_score[score_idx]= score[score_idx]; | |
2571 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++; | |
2572 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--; | |
2573 //FIXME copy instead | |
2574 } | |
2575 } | |
2576 } | |
2577 } | |
2578 } | |
2579 } | |
2580 } | |
2581 } | |
2582 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end | |
2583 } | |
2584 | |
2585 #endif /* QUANTIZE2==1 */ | |
2586 | |
2587 #define USE_HALFPEL_PLANE 0 | |
2588 | |
2589 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ | |
2590 int p,x,y; | |
2591 | |
2592 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE)); | |
2593 | |
2594 for(p=0; p<3; p++){ | |
2595 int is_chroma= !!p; | |
2596 int w= s->avctx->width >>is_chroma; | |
2597 int h= s->avctx->height >>is_chroma; | |
2598 int ls= frame->linesize[p]; | |
2599 uint8_t *src= frame->data[p]; | |
2600 | |
2601 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); | |
2602 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); | |
2603 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); | |
2604 | |
2605 halfpel[0][p]= src; | |
2606 for(y=0; y<h; y++){ | |
2607 for(x=0; x<w; x++){ | |
2608 int i= y*ls + x; | |
2609 | |
2610 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; | |
2611 } | |
2612 } | |
2613 for(y=0; y<h; y++){ | |
2614 for(x=0; x<w; x++){ | |
2615 int i= y*ls + x; | |
2616 | |
2617 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; | |
2618 } | |
2619 } | |
2620 src= halfpel[1][p]; | |
2621 for(y=0; y<h; y++){ | |
2622 for(x=0; x<w; x++){ | |
2623 int i= y*ls + x; | |
2624 | |
2625 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; | |
2626 } | |
2627 } | |
2628 | |
2629 //FIXME border! | |
2630 } | |
2631 } | |
2632 | |
2633 static void release_buffer(AVCodecContext *avctx){ | |
2634 SnowContext *s = avctx->priv_data; | |
2635 int i; | |
2636 | |
2637 if(s->last_picture[s->max_ref_frames-1].data[0]){ | |
2638 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); | |
2639 for(i=0; i<9; i++) | |
2640 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) | |
2641 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); | |
2642 } | |
2643 } | |
2644 | |
2645 static int frame_start(SnowContext *s){ | |
2646 AVFrame tmp; | |
2647 int w= s->avctx->width; //FIXME round up to x16 ? | |
2648 int h= s->avctx->height; | |
2649 | |
2650 if(s->current_picture.data[0]){ | |
2651 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); | |
2652 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); | |
2653 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); | |
2654 } | |
2655 | |
2656 release_buffer(s->avctx); | |
2657 | |
2658 tmp= s->last_picture[s->max_ref_frames-1]; | |
2659 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); | |
2660 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); | |
2661 if(USE_HALFPEL_PLANE && s->current_picture.data[0]) | |
2662 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); | |
2663 s->last_picture[0]= s->current_picture; | |
2664 s->current_picture= tmp; | |
2665 | |
2666 if(s->keyframe){ | |
2667 s->ref_frames= 0; | |
2668 }else{ | |
2669 int i; | |
2670 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) | |
2671 if(i && s->last_picture[i-1].key_frame) | |
2672 break; | |
2673 s->ref_frames= i; | |
2674 if(s->ref_frames==0){ | |
2675 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n"); | |
2676 return -1; | |
2677 } | |
2678 } | |
2679 | |
2680 s->current_picture.reference= 1; | |
2681 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ | |
2682 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
2683 return -1; | |
2684 } | |
2685 | |
2686 s->current_picture.key_frame= s->keyframe; | |
2687 | |
2688 return 0; | |
2689 } | |
2690 | |
2691 static av_cold void common_end(SnowContext *s){ | |
2692 int plane_index, level, orientation, i; | |
2693 | |
2694 av_freep(&s->spatial_dwt_buffer); | |
2695 av_freep(&s->spatial_idwt_buffer); | |
2696 | |
2697 s->m.me.temp= NULL; | |
2698 av_freep(&s->m.me.scratchpad); | |
2699 av_freep(&s->m.me.map); | |
2700 av_freep(&s->m.me.score_map); | |
2701 av_freep(&s->m.obmc_scratchpad); | |
2702 | |
2703 av_freep(&s->block); | |
2704 av_freep(&s->scratchbuf); | |
2705 | |
2706 for(i=0; i<MAX_REF_FRAMES; i++){ | |
2707 av_freep(&s->ref_mvs[i]); | |
2708 av_freep(&s->ref_scores[i]); | |
2709 if(s->last_picture[i].data[0]) | |
2710 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); | |
2711 } | |
2712 | |
2713 for(plane_index=0; plane_index<3; plane_index++){ | |
2714 for(level=s->spatial_decomposition_count-1; level>=0; level--){ | |
2715 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
2716 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2717 | |
2718 av_freep(&b->x_coeff); | |
2719 } | |
2720 } | |
2721 } | |
2722 } | |
2723 | |
2724 static av_cold int decode_init(AVCodecContext *avctx) | |
2725 { | |
2726 avctx->pix_fmt= PIX_FMT_YUV420P; | |
2727 | |
2728 common_init(avctx); | |
2729 | |
2730 return 0; | |
2731 } | |
2732 | |
2733 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){ | |
2734 const uint8_t *buf = avpkt->data; | |
2735 int buf_size = avpkt->size; | |
2736 SnowContext *s = avctx->priv_data; | |
2737 RangeCoder * const c= &s->c; | |
2738 int bytes_read; | |
2739 AVFrame *picture = data; | |
2740 int level, orientation, plane_index; | |
2741 | |
2742 ff_init_range_decoder(c, buf, buf_size); | |
2743 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2744 | |
2745 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P | |
2746 if(decode_header(s)<0) | |
2747 return -1; | |
2748 common_init_after_header(avctx); | |
2749 | |
2750 // realloc slice buffer for the case that spatial_decomposition_count changed | |
2751 slice_buffer_destroy(&s->sb); | |
2752 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer); | |
2753 | |
2754 for(plane_index=0; plane_index<3; plane_index++){ | |
2755 Plane *p= &s->plane[plane_index]; | |
2756 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 | |
2757 && p->hcoeff[1]==-10 | |
2758 && p->hcoeff[2]==2; | |
2759 } | |
2760 | |
2761 alloc_blocks(s); | |
2762 | |
2763 if(frame_start(s) < 0) | |
2764 return -1; | |
2765 //keyframe flag duplication mess FIXME | |
2766 if(avctx->debug&FF_DEBUG_PICT_INFO) | |
2767 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); | |
2768 | |
2769 decode_blocks(s); | |
2770 | |
2771 for(plane_index=0; plane_index<3; plane_index++){ | |
2772 Plane *p= &s->plane[plane_index]; | |
2773 int w= p->width; | |
2774 int h= p->height; | |
2775 int x, y; | |
2776 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ | |
2777 | |
2778 if(s->avctx->debug&2048){ | |
2779 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | |
2780 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
2781 | |
2782 for(y=0; y<h; y++){ | |
2783 for(x=0; x<w; x++){ | |
2784 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; | |
2785 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; | |
2786 } | |
2787 } | |
2788 } | |
2789 | |
2790 { | |
2791 for(level=0; level<s->spatial_decomposition_count; level++){ | |
2792 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
2793 SubBand *b= &p->band[level][orientation]; | |
2794 unpack_coeffs(s, b, b->parent, orientation); | |
2795 } | |
2796 } | |
2797 } | |
2798 | |
2799 { | |
2800 const int mb_h= s->b_height << s->block_max_depth; | |
2801 const int block_size = MB_SIZE >> s->block_max_depth; | |
2802 const int block_w = plane_index ? block_size/2 : block_size; | |
2803 int mb_y; | |
2804 DWTCompose cs[MAX_DECOMPOSITIONS]; | |
2805 int yd=0, yq=0; | |
2806 int y; | |
2807 int end_y; | |
2808 | |
2809 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
2810 for(mb_y=0; mb_y<=mb_h; mb_y++){ | |
2811 | |
2812 int slice_starty = block_w*mb_y; | |
2813 int slice_h = block_w*(mb_y+1); | |
2814 if (!(s->keyframe || s->avctx->debug&512)){ | |
2815 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); | |
2816 slice_h -= (block_w >> 1); | |
2817 } | |
2818 | |
2819 for(level=0; level<s->spatial_decomposition_count; level++){ | |
2820 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
2821 SubBand *b= &p->band[level][orientation]; | |
2822 int start_y; | |
2823 int end_y; | |
2824 int our_mb_start = mb_y; | |
2825 int our_mb_end = (mb_y + 1); | |
2826 const int extra= 3; | |
2827 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); | |
2828 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); | |
2829 if (!(s->keyframe || s->avctx->debug&512)){ | |
2830 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
2831 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
2832 } | |
2833 start_y = FFMIN(b->height, start_y); | |
2834 end_y = FFMIN(b->height, end_y); | |
2835 | |
2836 if (start_y != end_y){ | |
2837 if (orientation == 0){ | |
2838 SubBand * correlate_band = &p->band[0][0]; | |
2839 int correlate_end_y = FFMIN(b->height, end_y + 1); | |
2840 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); | |
2841 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); | |
2842 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); | |
2843 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); | |
2844 } | |
2845 else | |
2846 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | |
2847 } | |
2848 } | |
2849 } | |
2850 | |
2851 for(; yd<slice_h; yd+=4){ | |
2852 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | |
2853 } | |
2854 | |
2855 if(s->qlog == LOSSLESS_QLOG){ | |
2856 for(; yq<slice_h && yq<h; yq++){ | |
2857 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); | |
2858 for(x=0; x<w; x++){ | |
2859 line[x] <<= FRAC_BITS; | |
2860 } | |
2861 } | |
2862 } | |
2863 | |
2864 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); | |
2865 | |
2866 y = FFMIN(p->height, slice_starty); | |
2867 end_y = FFMIN(p->height, slice_h); | |
2868 while(y < end_y) | |
2869 slice_buffer_release(&s->sb, y++); | |
2870 } | |
2871 | |
2872 slice_buffer_flush(&s->sb); | |
2873 } | |
2874 | |
2875 } | |
2876 | |
2877 emms_c(); | |
2878 | |
2879 release_buffer(avctx); | |
2880 | |
2881 if(!(s->avctx->debug&2048)) | |
2882 *picture= s->current_picture; | |
2883 else | |
2884 *picture= s->mconly_picture; | |
2885 | |
2886 *data_size = sizeof(AVFrame); | |
2887 | |
2888 bytes_read= c->bytestream - c->bytestream_start; | |
2889 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME | |
2890 | |
2891 return bytes_read; | |
2892 } | |
2893 | |
2894 static av_cold int decode_end(AVCodecContext *avctx) | |
2895 { | |
2896 SnowContext *s = avctx->priv_data; | |
2897 | |
2898 slice_buffer_destroy(&s->sb); | |
2899 | |
2900 common_end(s); | |
2901 | |
2902 return 0; | |
2903 } | |
2904 | |
2905 AVCodec snow_decoder = { | |
2906 "snow", | |
2907 CODEC_TYPE_VIDEO, | |
2908 CODEC_ID_SNOW, | |
2909 sizeof(SnowContext), | |
2910 decode_init, | |
2911 NULL, | |
2912 decode_end, | |
2913 decode_frame, | |
2914 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/, | |
2915 NULL, | |
2916 .long_name = NULL_IF_CONFIG_SMALL("Snow"), | |
2917 }; | |
2918 | |
2919 #if CONFIG_SNOW_ENCODER | |
2920 static av_cold int encode_init(AVCodecContext *avctx) | |
2921 { | |
2922 SnowContext *s = avctx->priv_data; | |
2923 int plane_index; | |
2924 | |
2925 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ | |
2926 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n" | |
2927 "Use vstrict=-2 / -strict -2 to use it anyway.\n"); | |
2928 return -1; | |
2929 } | |
2930 | |
2931 if(avctx->prediction_method == DWT_97 | |
2932 && (avctx->flags & CODEC_FLAG_QSCALE) | |
2933 && avctx->global_quality == 0){ | |
2934 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); | |
2935 return -1; | |
2936 } | |
2937 | |
2938 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type | |
2939 | |
2940 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; | |
2941 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0; | |
2942 | |
2943 for(plane_index=0; plane_index<3; plane_index++){ | |
2944 s->plane[plane_index].diag_mc= 1; | |
2945 s->plane[plane_index].htaps= 6; | |
2946 s->plane[plane_index].hcoeff[0]= 40; | |
2947 s->plane[plane_index].hcoeff[1]= -10; | |
2948 s->plane[plane_index].hcoeff[2]= 2; | |
2949 s->plane[plane_index].fast_mc= 1; | |
2950 } | |
2951 | |
2952 common_init(avctx); | |
2953 alloc_blocks(s); | |
2954 | |
2955 s->version=0; | |
2956 | |
2957 s->m.avctx = avctx; | |
2958 s->m.flags = avctx->flags; | |
2959 s->m.bit_rate= avctx->bit_rate; | |
2960 | |
2961 s->m.me.temp = | |
2962 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); | |
2963 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
2964 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
2965 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); | |
2966 h263_encode_init(&s->m); //mv_penalty | |
2967 | |
2968 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); | |
2969 | |
2970 if(avctx->flags&CODEC_FLAG_PASS1){ | |
2971 if(!avctx->stats_out) | |
2972 avctx->stats_out = av_mallocz(256); | |
2973 } | |
2974 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ | |
2975 if(ff_rate_control_init(&s->m) < 0) | |
2976 return -1; | |
2977 } | |
2978 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); | |
2979 | |
2980 avctx->coded_frame= &s->current_picture; | |
2981 switch(avctx->pix_fmt){ | |
2982 // case PIX_FMT_YUV444P: | |
2983 // case PIX_FMT_YUV422P: | |
2984 case PIX_FMT_YUV420P: | |
2985 case PIX_FMT_GRAY8: | |
2986 // case PIX_FMT_YUV411P: | |
2987 // case PIX_FMT_YUV410P: | |
2988 s->colorspace_type= 0; | |
2989 break; | |
2990 /* case PIX_FMT_RGB32: | |
2991 s->colorspace= 1; | |
2992 break;*/ | |
2993 default: | |
2994 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n"); | |
2995 return -1; | |
2996 } | |
2997 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | |
2998 s->chroma_h_shift= 1; | |
2999 s->chroma_v_shift= 1; | |
3000 | |
3001 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); | |
3002 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); | |
3003 | |
3004 s->avctx->get_buffer(s->avctx, &s->input_picture); | |
3005 | |
3006 if(s->avctx->me_method == ME_ITER){ | |
3007 int i; | |
3008 int size= s->b_width * s->b_height << 2*s->block_max_depth; | |
3009 for(i=0; i<s->max_ref_frames; i++){ | |
3010 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); | |
3011 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); | |
3012 } | |
3013 } | |
3014 | |
3015 return 0; | |
3016 } | |
3017 | |
3018 //near copy & paste from dsputil, FIXME | |
3019 static int pix_sum(uint8_t * pix, int line_size, int w) | |
3020 { | |
3021 int s, i, j; | |
3022 | |
3023 s = 0; | |
3024 for (i = 0; i < w; i++) { | |
3025 for (j = 0; j < w; j++) { | |
3026 s += pix[0]; | |
3027 pix ++; | |
3028 } | |
3029 pix += line_size - w; | |
3030 } | |
3031 return s; | |
3032 } | |
3033 | |
3034 //near copy & paste from dsputil, FIXME | |
3035 static int pix_norm1(uint8_t * pix, int line_size, int w) | |
3036 { | |
3037 int s, i, j; | |
3038 uint32_t *sq = ff_squareTbl + 256; | |
3039 | |
3040 s = 0; | |
3041 for (i = 0; i < w; i++) { | |
3042 for (j = 0; j < w; j ++) { | |
3043 s += sq[pix[0]]; | |
3044 pix ++; | |
3045 } | |
3046 pix += line_size - w; | |
3047 } | |
3048 return s; | |
1721 } | 3049 } |
1722 | 3050 |
1723 //FIXME copy&paste | 3051 //FIXME copy&paste |
1724 #define P_LEFT P[1] | 3052 #define P_LEFT P[1] |
1725 #define P_TOP P[2] | 3053 #define P_TOP P[2] |
1950 memcpy(s->block_state, p_state, sizeof(s->block_state)); | 3278 memcpy(s->block_state, p_state, sizeof(s->block_state)); |
1951 return score; | 3279 return score; |
1952 } | 3280 } |
1953 } | 3281 } |
1954 | 3282 |
1955 static av_always_inline int same_block(BlockNode *a, BlockNode *b){ | |
1956 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ | |
1957 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); | |
1958 }else{ | |
1959 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); | |
1960 } | |
1961 } | |
1962 | |
1963 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ | 3283 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ |
1964 const int w= s->b_width << s->block_max_depth; | 3284 const int w= s->b_width << s->block_max_depth; |
1965 const int rem_depth= s->block_max_depth - level; | 3285 const int rem_depth= s->block_max_depth - level; |
1966 const int index= (x + y*w) << rem_depth; | 3286 const int index= (x + y*w) << rem_depth; |
1967 int trx= (x+1)<<rem_depth; | 3287 int trx= (x+1)<<rem_depth; |
2010 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); | 3330 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); |
2011 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); | 3331 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); |
2012 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); | 3332 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); |
2013 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); | 3333 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); |
2014 } | 3334 } |
2015 } | |
2016 | |
2017 static void decode_q_branch(SnowContext *s, int level, int x, int y){ | |
2018 const int w= s->b_width << s->block_max_depth; | |
2019 const int rem_depth= s->block_max_depth - level; | |
2020 const int index= (x + y*w) << rem_depth; | |
2021 int trx= (x+1)<<rem_depth; | |
2022 const BlockNode *left = x ? &s->block[index-1] : &null_block; | |
2023 const BlockNode *top = y ? &s->block[index-w] : &null_block; | |
2024 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; | |
2025 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt | |
2026 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; | |
2027 | |
2028 if(s->keyframe){ | |
2029 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); | |
2030 return; | |
2031 } | |
2032 | |
2033 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ | |
2034 int type, mx, my; | |
2035 int l = left->color[0]; | |
2036 int cb= left->color[1]; | |
2037 int cr= left->color[2]; | |
2038 int ref = 0; | |
2039 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); | |
2040 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); | |
2041 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); | |
2042 | |
2043 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; | |
2044 | |
2045 if(type){ | |
2046 pred_mv(s, &mx, &my, 0, left, top, tr); | |
2047 l += get_symbol(&s->c, &s->block_state[32], 1); | |
2048 cb+= get_symbol(&s->c, &s->block_state[64], 1); | |
2049 cr+= get_symbol(&s->c, &s->block_state[96], 1); | |
2050 }else{ | |
2051 if(s->ref_frames > 1) | |
2052 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); | |
2053 pred_mv(s, &mx, &my, ref, left, top, tr); | |
2054 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); | |
2055 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); | |
2056 } | |
2057 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); | |
2058 }else{ | |
2059 decode_q_branch(s, level+1, 2*x+0, 2*y+0); | |
2060 decode_q_branch(s, level+1, 2*x+1, 2*y+0); | |
2061 decode_q_branch(s, level+1, 2*x+0, 2*y+1); | |
2062 decode_q_branch(s, level+1, 2*x+1, 2*y+1); | |
2063 } | |
2064 } | |
2065 | |
2066 static void encode_blocks(SnowContext *s, int search){ | |
2067 int x, y; | |
2068 int w= s->b_width; | |
2069 int h= s->b_height; | |
2070 | |
2071 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) | |
2072 iterative_me(s); | |
2073 | |
2074 for(y=0; y<h; y++){ | |
2075 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit | |
2076 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); | |
2077 return; | |
2078 } | |
2079 for(x=0; x<w; x++){ | |
2080 if(s->avctx->me_method == ME_ITER || !search) | |
2081 encode_q_branch2(s, 0, x, y); | |
2082 else | |
2083 encode_q_branch (s, 0, x, y); | |
2084 } | |
2085 } | |
2086 } | |
2087 | |
2088 static void decode_blocks(SnowContext *s){ | |
2089 int x, y; | |
2090 int w= s->b_width; | |
2091 int h= s->b_height; | |
2092 | |
2093 for(y=0; y<h; y++){ | |
2094 for(x=0; x<w; x++){ | |
2095 decode_q_branch(s, 0, x, y); | |
2096 } | |
2097 } | |
2098 } | |
2099 | |
2100 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ | |
2101 static const uint8_t weight[64]={ | |
2102 8,7,6,5,4,3,2,1, | |
2103 7,7,0,0,0,0,0,1, | |
2104 6,0,6,0,0,0,2,0, | |
2105 5,0,0,5,0,3,0,0, | |
2106 4,0,0,0,4,0,0,0, | |
2107 3,0,0,5,0,3,0,0, | |
2108 2,0,6,0,0,0,2,0, | |
2109 1,7,0,0,0,0,0,1, | |
2110 }; | |
2111 | |
2112 static const uint8_t brane[256]={ | |
2113 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, | |
2114 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, | |
2115 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, | |
2116 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, | |
2117 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, | |
2118 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, | |
2119 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, | |
2120 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, | |
2121 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, | |
2122 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, | |
2123 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, | |
2124 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, | |
2125 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, | |
2126 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, | |
2127 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, | |
2128 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, | |
2129 }; | |
2130 | |
2131 static const uint8_t needs[16]={ | |
2132 0,1,0,0, | |
2133 2,4,2,0, | |
2134 0,1,0,0, | |
2135 15 | |
2136 }; | |
2137 | |
2138 int x, y, b, r, l; | |
2139 int16_t tmpIt [64*(32+HTAPS_MAX)]; | |
2140 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; | |
2141 int16_t *tmpI= tmpIt; | |
2142 uint8_t *tmp2= tmp2t[0]; | |
2143 const uint8_t *hpel[11]; | |
2144 assert(dx<16 && dy<16); | |
2145 r= brane[dx + 16*dy]&15; | |
2146 l= brane[dx + 16*dy]>>4; | |
2147 | |
2148 b= needs[l] | needs[r]; | |
2149 if(p && !p->diag_mc) | |
2150 b= 15; | |
2151 | |
2152 if(b&5){ | |
2153 for(y=0; y < b_h+HTAPS_MAX-1; y++){ | |
2154 for(x=0; x < b_w; x++){ | |
2155 int a_1=src[x + HTAPS_MAX/2-4]; | |
2156 int a0= src[x + HTAPS_MAX/2-3]; | |
2157 int a1= src[x + HTAPS_MAX/2-2]; | |
2158 int a2= src[x + HTAPS_MAX/2-1]; | |
2159 int a3= src[x + HTAPS_MAX/2+0]; | |
2160 int a4= src[x + HTAPS_MAX/2+1]; | |
2161 int a5= src[x + HTAPS_MAX/2+2]; | |
2162 int a6= src[x + HTAPS_MAX/2+3]; | |
2163 int am=0; | |
2164 if(!p || p->fast_mc){ | |
2165 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); | |
2166 tmpI[x]= am; | |
2167 am= (am+16)>>5; | |
2168 }else{ | |
2169 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); | |
2170 tmpI[x]= am; | |
2171 am= (am+32)>>6; | |
2172 } | |
2173 | |
2174 if(am&(~255)) am= ~(am>>31); | |
2175 tmp2[x]= am; | |
2176 } | |
2177 tmpI+= 64; | |
2178 tmp2+= stride; | |
2179 src += stride; | |
2180 } | |
2181 src -= stride*y; | |
2182 } | |
2183 src += HTAPS_MAX/2 - 1; | |
2184 tmp2= tmp2t[1]; | |
2185 | |
2186 if(b&2){ | |
2187 for(y=0; y < b_h; y++){ | |
2188 for(x=0; x < b_w+1; x++){ | |
2189 int a_1=src[x + (HTAPS_MAX/2-4)*stride]; | |
2190 int a0= src[x + (HTAPS_MAX/2-3)*stride]; | |
2191 int a1= src[x + (HTAPS_MAX/2-2)*stride]; | |
2192 int a2= src[x + (HTAPS_MAX/2-1)*stride]; | |
2193 int a3= src[x + (HTAPS_MAX/2+0)*stride]; | |
2194 int a4= src[x + (HTAPS_MAX/2+1)*stride]; | |
2195 int a5= src[x + (HTAPS_MAX/2+2)*stride]; | |
2196 int a6= src[x + (HTAPS_MAX/2+3)*stride]; | |
2197 int am=0; | |
2198 if(!p || p->fast_mc) | |
2199 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; | |
2200 else | |
2201 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; | |
2202 | |
2203 if(am&(~255)) am= ~(am>>31); | |
2204 tmp2[x]= am; | |
2205 } | |
2206 src += stride; | |
2207 tmp2+= stride; | |
2208 } | |
2209 src -= stride*y; | |
2210 } | |
2211 src += stride*(HTAPS_MAX/2 - 1); | |
2212 tmp2= tmp2t[2]; | |
2213 tmpI= tmpIt; | |
2214 if(b&4){ | |
2215 for(y=0; y < b_h; y++){ | |
2216 for(x=0; x < b_w; x++){ | |
2217 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; | |
2218 int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; | |
2219 int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; | |
2220 int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; | |
2221 int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; | |
2222 int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; | |
2223 int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; | |
2224 int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; | |
2225 int am=0; | |
2226 if(!p || p->fast_mc) | |
2227 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; | |
2228 else | |
2229 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; | |
2230 if(am&(~255)) am= ~(am>>31); | |
2231 tmp2[x]= am; | |
2232 } | |
2233 tmpI+= 64; | |
2234 tmp2+= stride; | |
2235 } | |
2236 } | |
2237 | |
2238 hpel[ 0]= src; | |
2239 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1); | |
2240 hpel[ 2]= src + 1; | |
2241 | |
2242 hpel[ 4]= tmp2t[1]; | |
2243 hpel[ 5]= tmp2t[2]; | |
2244 hpel[ 6]= tmp2t[1] + 1; | |
2245 | |
2246 hpel[ 8]= src + stride; | |
2247 hpel[ 9]= hpel[1] + stride; | |
2248 hpel[10]= hpel[8] + 1; | |
2249 | |
2250 if(b==15){ | |
2251 const uint8_t *src1= hpel[dx/8 + dy/8*4 ]; | |
2252 const uint8_t *src2= hpel[dx/8 + dy/8*4+1]; | |
2253 const uint8_t *src3= hpel[dx/8 + dy/8*4+4]; | |
2254 const uint8_t *src4= hpel[dx/8 + dy/8*4+5]; | |
2255 dx&=7; | |
2256 dy&=7; | |
2257 for(y=0; y < b_h; y++){ | |
2258 for(x=0; x < b_w; x++){ | |
2259 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ | |
2260 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; | |
2261 } | |
2262 src1+=stride; | |
2263 src2+=stride; | |
2264 src3+=stride; | |
2265 src4+=stride; | |
2266 dst +=stride; | |
2267 } | |
2268 }else{ | |
2269 const uint8_t *src1= hpel[l]; | |
2270 const uint8_t *src2= hpel[r]; | |
2271 int a= weight[((dx&7) + (8*(dy&7)))]; | |
2272 int b= 8-a; | |
2273 for(y=0; y < b_h; y++){ | |
2274 for(x=0; x < b_w; x++){ | |
2275 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; | |
2276 } | |
2277 src1+=stride; | |
2278 src2+=stride; | |
2279 dst +=stride; | |
2280 } | |
2281 } | |
2282 } | |
2283 | |
2284 #define mca(dx,dy,b_w)\ | |
2285 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ | |
2286 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ | |
2287 assert(h==b_w);\ | |
2288 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ | |
2289 } | |
2290 | |
2291 mca( 0, 0,16) | |
2292 mca( 8, 0,16) | |
2293 mca( 0, 8,16) | |
2294 mca( 8, 8,16) | |
2295 mca( 0, 0,8) | |
2296 mca( 8, 0,8) | |
2297 mca( 0, 8,8) | |
2298 mca( 8, 8,8) | |
2299 | |
2300 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ | |
2301 if(block->type & BLOCK_INTRA){ | |
2302 int x, y; | |
2303 const int color = block->color[plane_index]; | |
2304 const int color4= color*0x01010101; | |
2305 if(b_w==32){ | |
2306 for(y=0; y < b_h; y++){ | |
2307 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2308 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2309 *(uint32_t*)&dst[8 + y*stride]= color4; | |
2310 *(uint32_t*)&dst[12+ y*stride]= color4; | |
2311 *(uint32_t*)&dst[16+ y*stride]= color4; | |
2312 *(uint32_t*)&dst[20+ y*stride]= color4; | |
2313 *(uint32_t*)&dst[24+ y*stride]= color4; | |
2314 *(uint32_t*)&dst[28+ y*stride]= color4; | |
2315 } | |
2316 }else if(b_w==16){ | |
2317 for(y=0; y < b_h; y++){ | |
2318 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2319 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2320 *(uint32_t*)&dst[8 + y*stride]= color4; | |
2321 *(uint32_t*)&dst[12+ y*stride]= color4; | |
2322 } | |
2323 }else if(b_w==8){ | |
2324 for(y=0; y < b_h; y++){ | |
2325 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2326 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2327 } | |
2328 }else if(b_w==4){ | |
2329 for(y=0; y < b_h; y++){ | |
2330 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2331 } | |
2332 }else{ | |
2333 for(y=0; y < b_h; y++){ | |
2334 for(x=0; x < b_w; x++){ | |
2335 dst[x + y*stride]= color; | |
2336 } | |
2337 } | |
2338 } | |
2339 }else{ | |
2340 uint8_t *src= s->last_picture[block->ref].data[plane_index]; | |
2341 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; | |
2342 int mx= block->mx*scale; | |
2343 int my= block->my*scale; | |
2344 const int dx= mx&15; | |
2345 const int dy= my&15; | |
2346 const int tab_index= 3 - (b_w>>2) + (b_w>>4); | |
2347 sx += (mx>>4) - (HTAPS_MAX/2-1); | |
2348 sy += (my>>4) - (HTAPS_MAX/2-1); | |
2349 src += sx + sy*stride; | |
2350 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2) | |
2351 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){ | |
2352 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h); | |
2353 src= tmp + MB_SIZE; | |
2354 } | |
2355 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); | |
2356 // assert(!(b_w&(b_w-1))); | |
2357 assert(b_w>1 && b_h>1); | |
2358 assert((tab_index>=0 && tab_index<4) || b_w==32); | |
2359 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) | |
2360 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); | |
2361 else if(b_w==32){ | |
2362 int y; | |
2363 for(y=0; y<b_h; y+=16){ | |
2364 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); | |
2365 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); | |
2366 } | |
2367 }else if(b_w==b_h) | |
2368 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); | |
2369 else if(b_w==2*b_h){ | |
2370 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); | |
2371 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); | |
2372 }else{ | |
2373 assert(2*b_w==b_h); | |
2374 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); | |
2375 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); | |
2376 } | |
2377 } | |
2378 } | |
2379 | |
2380 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | |
2381 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | |
2382 int y, x; | |
2383 IDWTELEM * dst; | |
2384 for(y=0; y<b_h; y++){ | |
2385 //FIXME ugly misuse of obmc_stride | |
2386 const uint8_t *obmc1= obmc + y*obmc_stride; | |
2387 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2388 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2389 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2390 dst = slice_buffer_get_line(sb, src_y + y); | |
2391 for(x=0; x<b_w; x++){ | |
2392 int v= obmc1[x] * block[3][x + y*src_stride] | |
2393 +obmc2[x] * block[2][x + y*src_stride] | |
2394 +obmc3[x] * block[1][x + y*src_stride] | |
2395 +obmc4[x] * block[0][x + y*src_stride]; | |
2396 | |
2397 v <<= 8 - LOG2_OBMC_MAX; | |
2398 if(FRAC_BITS != 8){ | |
2399 v >>= 8 - FRAC_BITS; | |
2400 } | |
2401 if(add){ | |
2402 v += dst[x + src_x]; | |
2403 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2404 if(v&(~255)) v= ~(v>>31); | |
2405 dst8[x + y*src_stride] = v; | |
2406 }else{ | |
2407 dst[x + src_x] -= v; | |
2408 } | |
2409 } | |
2410 } | |
2411 } | |
2412 | |
2413 //FIXME name cleanup (b_w, block_w, b_width stuff) | |
2414 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ | |
2415 const int b_width = s->b_width << s->block_max_depth; | |
2416 const int b_height= s->b_height << s->block_max_depth; | |
2417 const int b_stride= b_width; | |
2418 BlockNode *lt= &s->block[b_x + b_y*b_stride]; | |
2419 BlockNode *rt= lt+1; | |
2420 BlockNode *lb= lt+b_stride; | |
2421 BlockNode *rb= lb+1; | |
2422 uint8_t *block[4]; | |
2423 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; | |
2424 uint8_t *tmp = s->scratchbuf; | |
2425 uint8_t *ptmp; | |
2426 int x,y; | |
2427 | |
2428 if(b_x<0){ | |
2429 lt= rt; | |
2430 lb= rb; | |
2431 }else if(b_x + 1 >= b_width){ | |
2432 rt= lt; | |
2433 rb= lb; | |
2434 } | |
2435 if(b_y<0){ | |
2436 lt= lb; | |
2437 rt= rb; | |
2438 }else if(b_y + 1 >= b_height){ | |
2439 lb= lt; | |
2440 rb= rt; | |
2441 } | |
2442 | |
2443 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16 | |
2444 obmc -= src_x; | |
2445 b_w += src_x; | |
2446 if(!sliced && !offset_dst) | |
2447 dst -= src_x; | |
2448 src_x=0; | |
2449 }else if(src_x + b_w > w){ | |
2450 b_w = w - src_x; | |
2451 } | |
2452 if(src_y<0){ | |
2453 obmc -= src_y*obmc_stride; | |
2454 b_h += src_y; | |
2455 if(!sliced && !offset_dst) | |
2456 dst -= src_y*dst_stride; | |
2457 src_y=0; | |
2458 }else if(src_y + b_h> h){ | |
2459 b_h = h - src_y; | |
2460 } | |
2461 | |
2462 if(b_w<=0 || b_h<=0) return; | |
2463 | |
2464 assert(src_stride > 2*MB_SIZE + 5); | |
2465 | |
2466 if(!sliced && offset_dst) | |
2467 dst += src_x + src_y*dst_stride; | |
2468 dst8+= src_x + src_y*src_stride; | |
2469 // src += src_x + src_y*src_stride; | |
2470 | |
2471 ptmp= tmp + 3*tmp_step; | |
2472 block[0]= ptmp; | |
2473 ptmp+=tmp_step; | |
2474 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); | |
2475 | |
2476 if(same_block(lt, rt)){ | |
2477 block[1]= block[0]; | |
2478 }else{ | |
2479 block[1]= ptmp; | |
2480 ptmp+=tmp_step; | |
2481 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); | |
2482 } | |
2483 | |
2484 if(same_block(lt, lb)){ | |
2485 block[2]= block[0]; | |
2486 }else if(same_block(rt, lb)){ | |
2487 block[2]= block[1]; | |
2488 }else{ | |
2489 block[2]= ptmp; | |
2490 ptmp+=tmp_step; | |
2491 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); | |
2492 } | |
2493 | |
2494 if(same_block(lt, rb) ){ | |
2495 block[3]= block[0]; | |
2496 }else if(same_block(rt, rb)){ | |
2497 block[3]= block[1]; | |
2498 }else if(same_block(lb, rb)){ | |
2499 block[3]= block[2]; | |
2500 }else{ | |
2501 block[3]= ptmp; | |
2502 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); | |
2503 } | |
2504 #if 0 | |
2505 for(y=0; y<b_h; y++){ | |
2506 for(x=0; x<b_w; x++){ | |
2507 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2508 if(add) dst[x + y*dst_stride] += v; | |
2509 else dst[x + y*dst_stride] -= v; | |
2510 } | |
2511 } | |
2512 for(y=0; y<b_h; y++){ | |
2513 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2514 for(x=0; x<b_w; x++){ | |
2515 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2516 if(add) dst[x + y*dst_stride] += v; | |
2517 else dst[x + y*dst_stride] -= v; | |
2518 } | |
2519 } | |
2520 for(y=0; y<b_h; y++){ | |
2521 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2522 for(x=0; x<b_w; x++){ | |
2523 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2524 if(add) dst[x + y*dst_stride] += v; | |
2525 else dst[x + y*dst_stride] -= v; | |
2526 } | |
2527 } | |
2528 for(y=0; y<b_h; y++){ | |
2529 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2530 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2531 for(x=0; x<b_w; x++){ | |
2532 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2533 if(add) dst[x + y*dst_stride] += v; | |
2534 else dst[x + y*dst_stride] -= v; | |
2535 } | |
2536 } | |
2537 #else | |
2538 if(sliced){ | |
2539 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | |
2540 }else{ | |
2541 for(y=0; y<b_h; y++){ | |
2542 //FIXME ugly misuse of obmc_stride | |
2543 const uint8_t *obmc1= obmc + y*obmc_stride; | |
2544 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2545 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2546 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2547 for(x=0; x<b_w; x++){ | |
2548 int v= obmc1[x] * block[3][x + y*src_stride] | |
2549 +obmc2[x] * block[2][x + y*src_stride] | |
2550 +obmc3[x] * block[1][x + y*src_stride] | |
2551 +obmc4[x] * block[0][x + y*src_stride]; | |
2552 | |
2553 v <<= 8 - LOG2_OBMC_MAX; | |
2554 if(FRAC_BITS != 8){ | |
2555 v >>= 8 - FRAC_BITS; | |
2556 } | |
2557 if(add){ | |
2558 v += dst[x + y*dst_stride]; | |
2559 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2560 if(v&(~255)) v= ~(v>>31); | |
2561 dst8[x + y*src_stride] = v; | |
2562 }else{ | |
2563 dst[x + y*dst_stride] -= v; | |
2564 } | |
2565 } | |
2566 } | |
2567 } | |
2568 #endif /* 0 */ | |
2569 } | |
2570 | |
2571 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ | |
2572 Plane *p= &s->plane[plane_index]; | |
2573 const int mb_w= s->b_width << s->block_max_depth; | |
2574 const int mb_h= s->b_height << s->block_max_depth; | |
2575 int x, y, mb_x; | |
2576 int block_size = MB_SIZE >> s->block_max_depth; | |
2577 int block_w = plane_index ? block_size/2 : block_size; | |
2578 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2579 int obmc_stride= plane_index ? block_size : 2*block_size; | |
2580 int ref_stride= s->current_picture.linesize[plane_index]; | |
2581 uint8_t *dst8= s->current_picture.data[plane_index]; | |
2582 int w= p->width; | |
2583 int h= p->height; | |
2584 | |
2585 if(s->keyframe || (s->avctx->debug&512)){ | |
2586 if(mb_y==mb_h) | |
2587 return; | |
2588 | |
2589 if(add){ | |
2590 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ | |
2591 // DWTELEM * line = slice_buffer_get_line(sb, y); | |
2592 IDWTELEM * line = sb->line[y]; | |
2593 for(x=0; x<w; x++){ | |
2594 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2595 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2596 v >>= FRAC_BITS; | |
2597 if(v&(~255)) v= ~(v>>31); | |
2598 dst8[x + y*ref_stride]= v; | |
2599 } | |
2600 } | |
2601 }else{ | |
2602 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ | |
2603 // DWTELEM * line = slice_buffer_get_line(sb, y); | |
2604 IDWTELEM * line = sb->line[y]; | |
2605 for(x=0; x<w; x++){ | |
2606 line[x] -= 128 << FRAC_BITS; | |
2607 // buf[x + y*w]-= 128<<FRAC_BITS; | |
2608 } | |
2609 } | |
2610 } | |
2611 | |
2612 return; | |
2613 } | |
2614 | |
2615 for(mb_x=0; mb_x<=mb_w; mb_x++){ | |
2616 add_yblock(s, 1, sb, old_buffer, dst8, obmc, | |
2617 block_w*mb_x - block_w/2, | |
2618 block_w*mb_y - block_w/2, | |
2619 block_w, block_w, | |
2620 w, h, | |
2621 w, ref_stride, obmc_stride, | |
2622 mb_x - 1, mb_y - 1, | |
2623 add, 0, plane_index); | |
2624 } | |
2625 } | |
2626 | |
2627 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ | |
2628 Plane *p= &s->plane[plane_index]; | |
2629 const int mb_w= s->b_width << s->block_max_depth; | |
2630 const int mb_h= s->b_height << s->block_max_depth; | |
2631 int x, y, mb_x; | |
2632 int block_size = MB_SIZE >> s->block_max_depth; | |
2633 int block_w = plane_index ? block_size/2 : block_size; | |
2634 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2635 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
2636 int ref_stride= s->current_picture.linesize[plane_index]; | |
2637 uint8_t *dst8= s->current_picture.data[plane_index]; | |
2638 int w= p->width; | |
2639 int h= p->height; | |
2640 | |
2641 if(s->keyframe || (s->avctx->debug&512)){ | |
2642 if(mb_y==mb_h) | |
2643 return; | |
2644 | |
2645 if(add){ | |
2646 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ | |
2647 for(x=0; x<w; x++){ | |
2648 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2649 v >>= FRAC_BITS; | |
2650 if(v&(~255)) v= ~(v>>31); | |
2651 dst8[x + y*ref_stride]= v; | |
2652 } | |
2653 } | |
2654 }else{ | |
2655 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ | |
2656 for(x=0; x<w; x++){ | |
2657 buf[x + y*w]-= 128<<FRAC_BITS; | |
2658 } | |
2659 } | |
2660 } | |
2661 | |
2662 return; | |
2663 } | |
2664 | |
2665 for(mb_x=0; mb_x<=mb_w; mb_x++){ | |
2666 add_yblock(s, 0, NULL, buf, dst8, obmc, | |
2667 block_w*mb_x - block_w/2, | |
2668 block_w*mb_y - block_w/2, | |
2669 block_w, block_w, | |
2670 w, h, | |
2671 w, ref_stride, obmc_stride, | |
2672 mb_x - 1, mb_y - 1, | |
2673 add, 1, plane_index); | |
2674 } | |
2675 } | |
2676 | |
2677 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ | |
2678 const int mb_h= s->b_height << s->block_max_depth; | |
2679 int mb_y; | |
2680 for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2681 predict_slice(s, buf, plane_index, add, mb_y); | |
2682 } | 3335 } |
2683 | 3336 |
2684 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ | 3337 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ |
2685 int i, x2, y2; | 3338 int i, x2, y2; |
2686 Plane *p= &s->plane[plane_index]; | 3339 Plane *p= &s->plane[plane_index]; |
2936 } | 3589 } |
2937 } | 3590 } |
2938 return distortion + rate*penalty_factor; | 3591 return distortion + rate*penalty_factor; |
2939 } | 3592 } |
2940 | 3593 |
3594 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ | |
3595 int level; | |
3596 for(level=decomposition_count-1; level>=0; level--){ | |
3597 switch(type){ | |
3598 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; | |
3599 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
3600 } | |
3601 } | |
3602 } | |
3603 | |
3604 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ | |
3605 const int support = type==1 ? 3 : 5; | |
3606 int level; | |
3607 if(type==2) return; | |
3608 | |
3609 for(level=decomposition_count-1; level>=0; level--){ | |
3610 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
3611 switch(type){ | |
3612 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | |
3613 break; | |
3614 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | |
3615 break; | |
3616 } | |
3617 } | |
3618 } | |
3619 } | |
3620 | |
3621 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ | |
3622 DWTCompose cs[MAX_DECOMPOSITIONS]; | |
3623 int y; | |
3624 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
3625 for(y=0; y<height; y+=4) | |
3626 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
3627 } | |
3628 | |
3629 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ | |
3630 const int w= b->width; | |
3631 const int h= b->height; | |
3632 int x, y; | |
3633 | |
3634 if(1){ | |
3635 int run=0; | |
3636 int runs[w*h]; | |
3637 int run_index=0; | |
3638 int max_index; | |
3639 | |
3640 for(y=0; y<h; y++){ | |
3641 for(x=0; x<w; x++){ | |
3642 int v, p=0; | |
3643 int /*ll=0, */l=0, lt=0, t=0, rt=0; | |
3644 v= src[x + y*stride]; | |
3645 | |
3646 if(y){ | |
3647 t= src[x + (y-1)*stride]; | |
3648 if(x){ | |
3649 lt= src[x - 1 + (y-1)*stride]; | |
3650 } | |
3651 if(x + 1 < w){ | |
3652 rt= src[x + 1 + (y-1)*stride]; | |
3653 } | |
3654 } | |
3655 if(x){ | |
3656 l= src[x - 1 + y*stride]; | |
3657 /*if(x > 1){ | |
3658 if(orientation==1) ll= src[y + (x-2)*stride]; | |
3659 else ll= src[x - 2 + y*stride]; | |
3660 }*/ | |
3661 } | |
3662 if(parent){ | |
3663 int px= x>>1; | |
3664 int py= y>>1; | |
3665 if(px<b->parent->width && py<b->parent->height) | |
3666 p= parent[px + py*2*stride]; | |
3667 } | |
3668 if(!(/*ll|*/l|lt|t|rt|p)){ | |
3669 if(v){ | |
3670 runs[run_index++]= run; | |
3671 run=0; | |
3672 }else{ | |
3673 run++; | |
3674 } | |
3675 } | |
3676 } | |
3677 } | |
3678 max_index= run_index; | |
3679 runs[run_index++]= run; | |
3680 run_index=0; | |
3681 run= runs[run_index++]; | |
3682 | |
3683 put_symbol2(&s->c, b->state[30], max_index, 0); | |
3684 if(run_index <= max_index) | |
3685 put_symbol2(&s->c, b->state[1], run, 3); | |
3686 | |
3687 for(y=0; y<h; y++){ | |
3688 if(s->c.bytestream_end - s->c.bytestream < w*40){ | |
3689 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); | |
3690 return -1; | |
3691 } | |
3692 for(x=0; x<w; x++){ | |
3693 int v, p=0; | |
3694 int /*ll=0, */l=0, lt=0, t=0, rt=0; | |
3695 v= src[x + y*stride]; | |
3696 | |
3697 if(y){ | |
3698 t= src[x + (y-1)*stride]; | |
3699 if(x){ | |
3700 lt= src[x - 1 + (y-1)*stride]; | |
3701 } | |
3702 if(x + 1 < w){ | |
3703 rt= src[x + 1 + (y-1)*stride]; | |
3704 } | |
3705 } | |
3706 if(x){ | |
3707 l= src[x - 1 + y*stride]; | |
3708 /*if(x > 1){ | |
3709 if(orientation==1) ll= src[y + (x-2)*stride]; | |
3710 else ll= src[x - 2 + y*stride]; | |
3711 }*/ | |
3712 } | |
3713 if(parent){ | |
3714 int px= x>>1; | |
3715 int py= y>>1; | |
3716 if(px<b->parent->width && py<b->parent->height) | |
3717 p= parent[px + py*2*stride]; | |
3718 } | |
3719 if(/*ll|*/l|lt|t|rt|p){ | |
3720 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); | |
3721 | |
3722 put_rac(&s->c, &b->state[0][context], !!v); | |
3723 }else{ | |
3724 if(!run){ | |
3725 run= runs[run_index++]; | |
3726 | |
3727 if(run_index <= max_index) | |
3728 put_symbol2(&s->c, b->state[1], run, 3); | |
3729 assert(v); | |
3730 }else{ | |
3731 run--; | |
3732 assert(!v); | |
3733 } | |
3734 } | |
3735 if(v){ | |
3736 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); | |
3737 int l2= 2*FFABS(l) + (l<0); | |
3738 int t2= 2*FFABS(t) + (t<0); | |
3739 | |
3740 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); | |
3741 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); | |
3742 } | |
3743 } | |
3744 } | |
3745 } | |
3746 return 0; | |
3747 } | |
3748 | |
3749 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ | |
3750 // encode_subband_qtree(s, b, src, parent, stride, orientation); | |
3751 // encode_subband_z0run(s, b, src, parent, stride, orientation); | |
3752 return encode_subband_c0run(s, b, src, parent, stride, orientation); | |
3753 // encode_subband_dzr(s, b, src, parent, stride, orientation); | |
3754 } | |
3755 | |
2941 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ | 3756 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ |
2942 const int b_stride= s->b_width << s->block_max_depth; | 3757 const int b_stride= s->b_width << s->block_max_depth; |
2943 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | 3758 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; |
2944 BlockNode backup= *block; | 3759 BlockNode backup= *block; |
2945 int rd, index, value; | 3760 int rd, index, value; |
3242 } | 4057 } |
3243 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | 4058 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); |
3244 } | 4059 } |
3245 } | 4060 } |
3246 | 4061 |
4062 static void encode_blocks(SnowContext *s, int search){ | |
4063 int x, y; | |
4064 int w= s->b_width; | |
4065 int h= s->b_height; | |
4066 | |
4067 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) | |
4068 iterative_me(s); | |
4069 | |
4070 for(y=0; y<h; y++){ | |
4071 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit | |
4072 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); | |
4073 return; | |
4074 } | |
4075 for(x=0; x<w; x++){ | |
4076 if(s->avctx->me_method == ME_ITER || !search) | |
4077 encode_q_branch2(s, 0, x, y); | |
4078 else | |
4079 encode_q_branch (s, 0, x, y); | |
4080 } | |
4081 } | |
4082 } | |
4083 | |
3247 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ | 4084 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ |
3248 const int w= b->width; | 4085 const int w= b->width; |
3249 const int h= b->height; | 4086 const int h= b->height; |
3250 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | 4087 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
3251 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); | 4088 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); |
3303 } | 4140 } |
3304 } | 4141 } |
3305 } | 4142 } |
3306 } | 4143 } |
3307 | 4144 |
3308 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ | |
3309 const int w= b->width; | |
3310 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | |
3311 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | |
3312 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | |
3313 int x,y; | |
3314 | |
3315 if(s->qlog == LOSSLESS_QLOG) return; | |
3316 | |
3317 for(y=start_y; y<end_y; y++){ | |
3318 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); | |
3319 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | |
3320 for(x=0; x<w; x++){ | |
3321 int i= line[x]; | |
3322 if(i<0){ | |
3323 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
3324 }else if(i>0){ | |
3325 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
3326 } | |
3327 } | |
3328 } | |
3329 } | |
3330 | |
3331 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ | 4145 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ |
3332 const int w= b->width; | 4146 const int w= b->width; |
3333 const int h= b->height; | 4147 const int h= b->height; |
3334 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); | 4148 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
3335 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); | 4149 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3367 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | 4181 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); |
3368 else src[i] -= src[i - 1]; | 4182 else src[i] -= src[i - 1]; |
3369 } | 4183 } |
3370 }else{ | 4184 }else{ |
3371 if(y) src[i] -= src[i - stride]; | 4185 if(y) src[i] -= src[i - stride]; |
3372 } | |
3373 } | |
3374 } | |
3375 } | |
3376 | |
3377 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ | |
3378 const int w= b->width; | |
3379 int x,y; | |
3380 | |
3381 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning | |
3382 IDWTELEM * prev; | |
3383 | |
3384 if (start_y != 0) | |
3385 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | |
3386 | |
3387 for(y=start_y; y<end_y; y++){ | |
3388 prev = line; | |
3389 // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); | |
3390 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | |
3391 for(x=0; x<w; x++){ | |
3392 if(x){ | |
3393 if(use_median){ | |
3394 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); | |
3395 else line[x] += line[x - 1]; | |
3396 }else{ | |
3397 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); | |
3398 else line[x] += line[x - 1]; | |
3399 } | |
3400 }else{ | |
3401 if(y) line[x] += prev[x]; | |
3402 } | 4186 } |
3403 } | 4187 } |
3404 } | 4188 } |
3405 } | 4189 } |
3406 | 4190 |
3528 s->last_qlog = s->qlog; | 4312 s->last_qlog = s->qlog; |
3529 s->last_qbias = s->qbias; | 4313 s->last_qbias = s->qbias; |
3530 s->last_mv_scale = s->mv_scale; | 4314 s->last_mv_scale = s->mv_scale; |
3531 s->last_block_max_depth = s->block_max_depth; | 4315 s->last_block_max_depth = s->block_max_depth; |
3532 s->last_spatial_decomposition_count = s->spatial_decomposition_count; | 4316 s->last_spatial_decomposition_count = s->spatial_decomposition_count; |
3533 } | |
3534 | |
3535 static void decode_qlogs(SnowContext *s){ | |
3536 int plane_index, level, orientation; | |
3537 | |
3538 for(plane_index=0; plane_index<3; plane_index++){ | |
3539 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3540 for(orientation=level ? 1:0; orientation<4; orientation++){ | |
3541 int q; | |
3542 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; | |
3543 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; | |
3544 else q= get_symbol(&s->c, s->header_state, 1); | |
3545 s->plane[plane_index].band[level][orientation].qlog= q; | |
3546 } | |
3547 } | |
3548 } | |
3549 } | |
3550 | |
3551 #define GET_S(dst, check) \ | |
3552 tmp= get_symbol(&s->c, s->header_state, 0);\ | |
3553 if(!(check)){\ | |
3554 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\ | |
3555 return -1;\ | |
3556 }\ | |
3557 dst= tmp; | |
3558 | |
3559 static int decode_header(SnowContext *s){ | |
3560 int plane_index, tmp; | |
3561 uint8_t kstate[32]; | |
3562 | |
3563 memset(kstate, MID_STATE, sizeof(kstate)); | |
3564 | |
3565 s->keyframe= get_rac(&s->c, kstate); | |
3566 if(s->keyframe || s->always_reset){ | |
3567 reset_contexts(s); | |
3568 s->spatial_decomposition_type= | |
3569 s->qlog= | |
3570 s->qbias= | |
3571 s->mv_scale= | |
3572 s->block_max_depth= 0; | |
3573 } | |
3574 if(s->keyframe){ | |
3575 GET_S(s->version, tmp <= 0U) | |
3576 s->always_reset= get_rac(&s->c, s->header_state); | |
3577 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); | |
3578 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
3579 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) | |
3580 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); | |
3581 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); | |
3582 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); | |
3583 s->spatial_scalability= get_rac(&s->c, s->header_state); | |
3584 // s->rate_scalability= get_rac(&s->c, s->header_state); | |
3585 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES) | |
3586 s->max_ref_frames++; | |
3587 | |
3588 decode_qlogs(s); | |
3589 } | |
3590 | |
3591 if(!s->keyframe){ | |
3592 if(get_rac(&s->c, s->header_state)){ | |
3593 for(plane_index=0; plane_index<2; plane_index++){ | |
3594 int htaps, i, sum=0; | |
3595 Plane *p= &s->plane[plane_index]; | |
3596 p->diag_mc= get_rac(&s->c, s->header_state); | |
3597 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; | |
3598 if((unsigned)htaps > HTAPS_MAX || htaps==0) | |
3599 return -1; | |
3600 p->htaps= htaps; | |
3601 for(i= htaps/2; i; i--){ | |
3602 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); | |
3603 sum += p->hcoeff[i]; | |
3604 } | |
3605 p->hcoeff[0]= 32-sum; | |
3606 } | |
3607 s->plane[2].diag_mc= s->plane[1].diag_mc; | |
3608 s->plane[2].htaps = s->plane[1].htaps; | |
3609 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); | |
3610 } | |
3611 if(get_rac(&s->c, s->header_state)){ | |
3612 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) | |
3613 decode_qlogs(s); | |
3614 } | |
3615 } | |
3616 | |
3617 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); | |
3618 if(s->spatial_decomposition_type > 1U){ | |
3619 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); | |
3620 return -1; | |
3621 } | |
3622 if(FFMIN(s->avctx-> width>>s->chroma_h_shift, | |
3623 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){ | |
3624 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count); | |
3625 return -1; | |
3626 } | |
3627 | |
3628 s->qlog += get_symbol(&s->c, s->header_state, 1); | |
3629 s->mv_scale += get_symbol(&s->c, s->header_state, 1); | |
3630 s->qbias += get_symbol(&s->c, s->header_state, 1); | |
3631 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); | |
3632 if(s->block_max_depth > 1 || s->block_max_depth < 0){ | |
3633 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); | |
3634 s->block_max_depth= 0; | |
3635 return -1; | |
3636 } | |
3637 | |
3638 return 0; | |
3639 } | |
3640 | |
3641 static void init_qexp(void){ | |
3642 int i; | |
3643 double v=128; | |
3644 | |
3645 for(i=0; i<QROOT; i++){ | |
3646 qexp[i]= lrintf(v); | |
3647 v *= pow(2, 1.0 / QROOT); | |
3648 } | |
3649 } | |
3650 | |
3651 static av_cold int common_init(AVCodecContext *avctx){ | |
3652 SnowContext *s = avctx->priv_data; | |
3653 int width, height; | |
3654 int i, j; | |
3655 | |
3656 s->avctx= avctx; | |
3657 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe | |
3658 | |
3659 dsputil_init(&s->dsp, avctx); | |
3660 | |
3661 #define mcf(dx,dy)\ | |
3662 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |
3663 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ | |
3664 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ | |
3665 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ | |
3666 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ | |
3667 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; | |
3668 | |
3669 mcf( 0, 0) | |
3670 mcf( 4, 0) | |
3671 mcf( 8, 0) | |
3672 mcf(12, 0) | |
3673 mcf( 0, 4) | |
3674 mcf( 4, 4) | |
3675 mcf( 8, 4) | |
3676 mcf(12, 4) | |
3677 mcf( 0, 8) | |
3678 mcf( 4, 8) | |
3679 mcf( 8, 8) | |
3680 mcf(12, 8) | |
3681 mcf( 0,12) | |
3682 mcf( 4,12) | |
3683 mcf( 8,12) | |
3684 mcf(12,12) | |
3685 | |
3686 #define mcfh(dx,dy)\ | |
3687 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ | |
3688 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ | |
3689 mc_block_hpel ## dx ## dy ## 16;\ | |
3690 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ | |
3691 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ | |
3692 mc_block_hpel ## dx ## dy ## 8; | |
3693 | |
3694 mcfh(0, 0) | |
3695 mcfh(8, 0) | |
3696 mcfh(0, 8) | |
3697 mcfh(8, 8) | |
3698 | |
3699 if(!qexp[0]) | |
3700 init_qexp(); | |
3701 | |
3702 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); | |
3703 | |
3704 width= s->avctx->width; | |
3705 height= s->avctx->height; | |
3706 | |
3707 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); | |
3708 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here | |
3709 | |
3710 for(i=0; i<MAX_REF_FRAMES; i++) | |
3711 for(j=0; j<MAX_REF_FRAMES; j++) | |
3712 scale_mv_ref[i][j] = 256*(i+1)/(j+1); | |
3713 | |
3714 s->avctx->get_buffer(s->avctx, &s->mconly_picture); | |
3715 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE); | |
3716 | |
3717 return 0; | |
3718 } | |
3719 | |
3720 static int common_init_after_header(AVCodecContext *avctx){ | |
3721 SnowContext *s = avctx->priv_data; | |
3722 int plane_index, level, orientation; | |
3723 | |
3724 for(plane_index=0; plane_index<3; plane_index++){ | |
3725 int w= s->avctx->width; | |
3726 int h= s->avctx->height; | |
3727 | |
3728 if(plane_index){ | |
3729 w>>= s->chroma_h_shift; | |
3730 h>>= s->chroma_v_shift; | |
3731 } | |
3732 s->plane[plane_index].width = w; | |
3733 s->plane[plane_index].height= h; | |
3734 | |
3735 for(level=s->spatial_decomposition_count-1; level>=0; level--){ | |
3736 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3737 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
3738 | |
3739 b->buf= s->spatial_dwt_buffer; | |
3740 b->level= level; | |
3741 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | |
3742 b->width = (w + !(orientation&1))>>1; | |
3743 b->height= (h + !(orientation>1))>>1; | |
3744 | |
3745 b->stride_line = 1 << (s->spatial_decomposition_count - level); | |
3746 b->buf_x_offset = 0; | |
3747 b->buf_y_offset = 0; | |
3748 | |
3749 if(orientation&1){ | |
3750 b->buf += (w+1)>>1; | |
3751 b->buf_x_offset = (w+1)>>1; | |
3752 } | |
3753 if(orientation>1){ | |
3754 b->buf += b->stride>>1; | |
3755 b->buf_y_offset = b->stride_line >> 1; | |
3756 } | |
3757 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); | |
3758 | |
3759 if(level) | |
3760 b->parent= &s->plane[plane_index].band[level-1][orientation]; | |
3761 //FIXME avoid this realloc | |
3762 av_freep(&b->x_coeff); | |
3763 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); | |
3764 } | |
3765 w= (w+1)>>1; | |
3766 h= (h+1)>>1; | |
3767 } | |
3768 } | |
3769 | |
3770 return 0; | |
3771 } | 4317 } |
3772 | 4318 |
3773 static int qscale2qlog(int qscale){ | 4319 static int qscale2qlog(int qscale){ |
3774 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) | 4320 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) |
3775 + 61*QROOT/8; //<64 >60 | 4321 + 61*QROOT/8; //<64 >60 |
3849 } | 4395 } |
3850 | 4396 |
3851 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); | 4397 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); |
3852 } | 4398 } |
3853 } | 4399 } |
3854 } | |
3855 | |
3856 #define QUANTIZE2 0 | |
3857 | |
3858 #if QUANTIZE2==1 | |
3859 #define Q2_STEP 8 | |
3860 | |
3861 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){ | |
3862 SubBand *b= &p->band[level][orientation]; | |
3863 int x, y; | |
3864 int xo=0; | |
3865 int yo=0; | |
3866 int step= 1 << (s->spatial_decomposition_count - level); | |
3867 | |
3868 if(orientation&1) | |
3869 xo= step>>1; | |
3870 if(orientation&2) | |
3871 yo= step>>1; | |
3872 | |
3873 //FIXME bias for nonzero ? | |
3874 //FIXME optimize | |
3875 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); | |
3876 for(y=0; y<p->height; y++){ | |
3877 for(x=0; x<p->width; x++){ | |
3878 int sx= (x-xo + step/2) / step / Q2_STEP; | |
3879 int sy= (y-yo + step/2) / step / Q2_STEP; | |
3880 int v= r0[x + y*p->width] - r1[x + y*p->width]; | |
3881 assert(sx>=0 && sy>=0 && sx < score_stride); | |
3882 v= ((v+8)>>4)<<4; | |
3883 score[sx + sy*score_stride] += v*v; | |
3884 assert(score[sx + sy*score_stride] >= 0); | |
3885 } | |
3886 } | |
3887 } | |
3888 | |
3889 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){ | |
3890 int level, orientation; | |
3891 | |
3892 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3893 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3894 SubBand *b= &p->band[level][orientation]; | |
3895 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3896 | |
3897 dequantize(s, b, dst, b->stride); | |
3898 } | |
3899 } | |
3900 } | |
3901 | |
3902 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){ | |
3903 int level, orientation, ys, xs, x, y, pass; | |
3904 IDWTELEM best_dequant[height * stride]; | |
3905 IDWTELEM idwt2_buffer[height * stride]; | |
3906 const int score_stride= (width + 10)/Q2_STEP; | |
3907 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3908 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3909 int threshold= (s->m.lambda * s->m.lambda) >> 6; | |
3910 | |
3911 //FIXME pass the copy cleanly ? | |
3912 | |
3913 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM)); | |
3914 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3915 | |
3916 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3917 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3918 SubBand *b= &p->band[level][orientation]; | |
3919 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3920 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); | |
3921 assert(src == b->buf); // code does not depend on this but it is true currently | |
3922 | |
3923 quantize(s, b, dst, src, b->stride, s->qbias); | |
3924 } | |
3925 } | |
3926 for(pass=0; pass<1; pass++){ | |
3927 if(s->qbias == 0) //keyframe | |
3928 continue; | |
3929 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3930 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3931 SubBand *b= &p->band[level][orientation]; | |
3932 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3933 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3934 | |
3935 for(ys= 0; ys<Q2_STEP; ys++){ | |
3936 for(xs= 0; xs<Q2_STEP; xs++){ | |
3937 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3938 dequantize_all(s, p, idwt2_buffer, width, height); | |
3939 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3940 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3941 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3942 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3943 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3944 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; | |
3945 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; | |
3946 //FIXME try more than just -- | |
3947 } | |
3948 } | |
3949 dequantize_all(s, p, idwt2_buffer, width, height); | |
3950 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3951 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3952 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3953 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3954 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride; | |
3955 if(score[score_idx] <= best_score[score_idx] + threshold){ | |
3956 best_score[score_idx]= score[score_idx]; | |
3957 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++; | |
3958 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--; | |
3959 //FIXME copy instead | |
3960 } | |
3961 } | |
3962 } | |
3963 } | |
3964 } | |
3965 } | |
3966 } | |
3967 } | |
3968 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end | |
3969 } | |
3970 | |
3971 #endif /* QUANTIZE2==1 */ | |
3972 | |
3973 static av_cold int encode_init(AVCodecContext *avctx) | |
3974 { | |
3975 SnowContext *s = avctx->priv_data; | |
3976 int plane_index; | |
3977 | |
3978 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ | |
3979 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n" | |
3980 "Use vstrict=-2 / -strict -2 to use it anyway.\n"); | |
3981 return -1; | |
3982 } | |
3983 | |
3984 if(avctx->prediction_method == DWT_97 | |
3985 && (avctx->flags & CODEC_FLAG_QSCALE) | |
3986 && avctx->global_quality == 0){ | |
3987 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); | |
3988 return -1; | |
3989 } | |
3990 | |
3991 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type | |
3992 | |
3993 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; | |
3994 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0; | |
3995 | |
3996 for(plane_index=0; plane_index<3; plane_index++){ | |
3997 s->plane[plane_index].diag_mc= 1; | |
3998 s->plane[plane_index].htaps= 6; | |
3999 s->plane[plane_index].hcoeff[0]= 40; | |
4000 s->plane[plane_index].hcoeff[1]= -10; | |
4001 s->plane[plane_index].hcoeff[2]= 2; | |
4002 s->plane[plane_index].fast_mc= 1; | |
4003 } | |
4004 | |
4005 common_init(avctx); | |
4006 alloc_blocks(s); | |
4007 | |
4008 s->version=0; | |
4009 | |
4010 s->m.avctx = avctx; | |
4011 s->m.flags = avctx->flags; | |
4012 s->m.bit_rate= avctx->bit_rate; | |
4013 | |
4014 s->m.me.temp = | |
4015 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); | |
4016 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
4017 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
4018 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); | |
4019 h263_encode_init(&s->m); //mv_penalty | |
4020 | |
4021 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); | |
4022 | |
4023 if(avctx->flags&CODEC_FLAG_PASS1){ | |
4024 if(!avctx->stats_out) | |
4025 avctx->stats_out = av_mallocz(256); | |
4026 } | |
4027 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ | |
4028 if(ff_rate_control_init(&s->m) < 0) | |
4029 return -1; | |
4030 } | |
4031 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); | |
4032 | |
4033 avctx->coded_frame= &s->current_picture; | |
4034 switch(avctx->pix_fmt){ | |
4035 // case PIX_FMT_YUV444P: | |
4036 // case PIX_FMT_YUV422P: | |
4037 case PIX_FMT_YUV420P: | |
4038 case PIX_FMT_GRAY8: | |
4039 // case PIX_FMT_YUV411P: | |
4040 // case PIX_FMT_YUV410P: | |
4041 s->colorspace_type= 0; | |
4042 break; | |
4043 /* case PIX_FMT_RGB32: | |
4044 s->colorspace= 1; | |
4045 break;*/ | |
4046 default: | |
4047 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n"); | |
4048 return -1; | |
4049 } | |
4050 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | |
4051 s->chroma_h_shift= 1; | |
4052 s->chroma_v_shift= 1; | |
4053 | |
4054 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); | |
4055 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); | |
4056 | |
4057 s->avctx->get_buffer(s->avctx, &s->input_picture); | |
4058 | |
4059 if(s->avctx->me_method == ME_ITER){ | |
4060 int i; | |
4061 int size= s->b_width * s->b_height << 2*s->block_max_depth; | |
4062 for(i=0; i<s->max_ref_frames; i++){ | |
4063 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); | |
4064 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); | |
4065 } | |
4066 } | |
4067 | |
4068 return 0; | |
4069 } | |
4070 | |
4071 #define USE_HALFPEL_PLANE 0 | |
4072 | |
4073 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ | |
4074 int p,x,y; | |
4075 | |
4076 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE)); | |
4077 | |
4078 for(p=0; p<3; p++){ | |
4079 int is_chroma= !!p; | |
4080 int w= s->avctx->width >>is_chroma; | |
4081 int h= s->avctx->height >>is_chroma; | |
4082 int ls= frame->linesize[p]; | |
4083 uint8_t *src= frame->data[p]; | |
4084 | |
4085 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); | |
4086 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); | |
4087 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); | |
4088 | |
4089 halfpel[0][p]= src; | |
4090 for(y=0; y<h; y++){ | |
4091 for(x=0; x<w; x++){ | |
4092 int i= y*ls + x; | |
4093 | |
4094 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; | |
4095 } | |
4096 } | |
4097 for(y=0; y<h; y++){ | |
4098 for(x=0; x<w; x++){ | |
4099 int i= y*ls + x; | |
4100 | |
4101 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; | |
4102 } | |
4103 } | |
4104 src= halfpel[1][p]; | |
4105 for(y=0; y<h; y++){ | |
4106 for(x=0; x<w; x++){ | |
4107 int i= y*ls + x; | |
4108 | |
4109 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; | |
4110 } | |
4111 } | |
4112 | |
4113 //FIXME border! | |
4114 } | |
4115 } | |
4116 | |
4117 static void release_buffer(AVCodecContext *avctx){ | |
4118 SnowContext *s = avctx->priv_data; | |
4119 int i; | |
4120 | |
4121 if(s->last_picture[s->max_ref_frames-1].data[0]){ | |
4122 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); | |
4123 for(i=0; i<9; i++) | |
4124 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) | |
4125 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); | |
4126 } | |
4127 } | |
4128 | |
4129 static int frame_start(SnowContext *s){ | |
4130 AVFrame tmp; | |
4131 int w= s->avctx->width; //FIXME round up to x16 ? | |
4132 int h= s->avctx->height; | |
4133 | |
4134 if(s->current_picture.data[0]){ | |
4135 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); | |
4136 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); | |
4137 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); | |
4138 } | |
4139 | |
4140 release_buffer(s->avctx); | |
4141 | |
4142 tmp= s->last_picture[s->max_ref_frames-1]; | |
4143 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); | |
4144 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); | |
4145 if(USE_HALFPEL_PLANE && s->current_picture.data[0]) | |
4146 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); | |
4147 s->last_picture[0]= s->current_picture; | |
4148 s->current_picture= tmp; | |
4149 | |
4150 if(s->keyframe){ | |
4151 s->ref_frames= 0; | |
4152 }else{ | |
4153 int i; | |
4154 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) | |
4155 if(i && s->last_picture[i-1].key_frame) | |
4156 break; | |
4157 s->ref_frames= i; | |
4158 if(s->ref_frames==0){ | |
4159 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n"); | |
4160 return -1; | |
4161 } | |
4162 } | |
4163 | |
4164 s->current_picture.reference= 1; | |
4165 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ | |
4166 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
4167 return -1; | |
4168 } | |
4169 | |
4170 s->current_picture.key_frame= s->keyframe; | |
4171 | |
4172 return 0; | |
4173 } | 4400 } |
4174 | 4401 |
4175 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ | 4402 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ |
4176 SnowContext *s = avctx->priv_data; | 4403 SnowContext *s = avctx->priv_data; |
4177 RangeCoder * const c= &s->c; | 4404 RangeCoder * const c= &s->c; |
4445 emms_c(); | 4672 emms_c(); |
4446 | 4673 |
4447 return ff_rac_terminate(c); | 4674 return ff_rac_terminate(c); |
4448 } | 4675 } |
4449 | 4676 |
4450 static av_cold void common_end(SnowContext *s){ | |
4451 int plane_index, level, orientation, i; | |
4452 | |
4453 av_freep(&s->spatial_dwt_buffer); | |
4454 av_freep(&s->spatial_idwt_buffer); | |
4455 | |
4456 s->m.me.temp= NULL; | |
4457 av_freep(&s->m.me.scratchpad); | |
4458 av_freep(&s->m.me.map); | |
4459 av_freep(&s->m.me.score_map); | |
4460 av_freep(&s->m.obmc_scratchpad); | |
4461 | |
4462 av_freep(&s->block); | |
4463 av_freep(&s->scratchbuf); | |
4464 | |
4465 for(i=0; i<MAX_REF_FRAMES; i++){ | |
4466 av_freep(&s->ref_mvs[i]); | |
4467 av_freep(&s->ref_scores[i]); | |
4468 if(s->last_picture[i].data[0]) | |
4469 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); | |
4470 } | |
4471 | |
4472 for(plane_index=0; plane_index<3; plane_index++){ | |
4473 for(level=s->spatial_decomposition_count-1; level>=0; level--){ | |
4474 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4475 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
4476 | |
4477 av_freep(&b->x_coeff); | |
4478 } | |
4479 } | |
4480 } | |
4481 } | |
4482 | |
4483 static av_cold int encode_end(AVCodecContext *avctx) | 4677 static av_cold int encode_end(AVCodecContext *avctx) |
4484 { | 4678 { |
4485 SnowContext *s = avctx->priv_data; | 4679 SnowContext *s = avctx->priv_data; |
4486 | 4680 |
4487 common_end(s); | 4681 common_end(s); |
4488 av_free(avctx->stats_out); | 4682 av_free(avctx->stats_out); |
4489 | 4683 |
4490 return 0; | 4684 return 0; |
4491 } | 4685 } |
4492 | 4686 |
4493 static av_cold int decode_init(AVCodecContext *avctx) | |
4494 { | |
4495 avctx->pix_fmt= PIX_FMT_YUV420P; | |
4496 | |
4497 common_init(avctx); | |
4498 | |
4499 return 0; | |
4500 } | |
4501 | |
4502 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){ | |
4503 const uint8_t *buf = avpkt->data; | |
4504 int buf_size = avpkt->size; | |
4505 SnowContext *s = avctx->priv_data; | |
4506 RangeCoder * const c= &s->c; | |
4507 int bytes_read; | |
4508 AVFrame *picture = data; | |
4509 int level, orientation, plane_index; | |
4510 | |
4511 ff_init_range_decoder(c, buf, buf_size); | |
4512 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
4513 | |
4514 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P | |
4515 if(decode_header(s)<0) | |
4516 return -1; | |
4517 common_init_after_header(avctx); | |
4518 | |
4519 // realloc slice buffer for the case that spatial_decomposition_count changed | |
4520 slice_buffer_destroy(&s->sb); | |
4521 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer); | |
4522 | |
4523 for(plane_index=0; plane_index<3; plane_index++){ | |
4524 Plane *p= &s->plane[plane_index]; | |
4525 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 | |
4526 && p->hcoeff[1]==-10 | |
4527 && p->hcoeff[2]==2; | |
4528 } | |
4529 | |
4530 alloc_blocks(s); | |
4531 | |
4532 if(frame_start(s) < 0) | |
4533 return -1; | |
4534 //keyframe flag duplication mess FIXME | |
4535 if(avctx->debug&FF_DEBUG_PICT_INFO) | |
4536 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); | |
4537 | |
4538 decode_blocks(s); | |
4539 | |
4540 for(plane_index=0; plane_index<3; plane_index++){ | |
4541 Plane *p= &s->plane[plane_index]; | |
4542 int w= p->width; | |
4543 int h= p->height; | |
4544 int x, y; | |
4545 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ | |
4546 | |
4547 if(s->avctx->debug&2048){ | |
4548 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | |
4549 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
4550 | |
4551 for(y=0; y<h; y++){ | |
4552 for(x=0; x<w; x++){ | |
4553 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; | |
4554 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; | |
4555 } | |
4556 } | |
4557 } | |
4558 | |
4559 { | |
4560 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4561 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4562 SubBand *b= &p->band[level][orientation]; | |
4563 unpack_coeffs(s, b, b->parent, orientation); | |
4564 } | |
4565 } | |
4566 } | |
4567 | |
4568 { | |
4569 const int mb_h= s->b_height << s->block_max_depth; | |
4570 const int block_size = MB_SIZE >> s->block_max_depth; | |
4571 const int block_w = plane_index ? block_size/2 : block_size; | |
4572 int mb_y; | |
4573 DWTCompose cs[MAX_DECOMPOSITIONS]; | |
4574 int yd=0, yq=0; | |
4575 int y; | |
4576 int end_y; | |
4577 | |
4578 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4579 for(mb_y=0; mb_y<=mb_h; mb_y++){ | |
4580 | |
4581 int slice_starty = block_w*mb_y; | |
4582 int slice_h = block_w*(mb_y+1); | |
4583 if (!(s->keyframe || s->avctx->debug&512)){ | |
4584 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); | |
4585 slice_h -= (block_w >> 1); | |
4586 } | |
4587 | |
4588 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4589 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4590 SubBand *b= &p->band[level][orientation]; | |
4591 int start_y; | |
4592 int end_y; | |
4593 int our_mb_start = mb_y; | |
4594 int our_mb_end = (mb_y + 1); | |
4595 const int extra= 3; | |
4596 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); | |
4597 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); | |
4598 if (!(s->keyframe || s->avctx->debug&512)){ | |
4599 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
4600 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
4601 } | |
4602 start_y = FFMIN(b->height, start_y); | |
4603 end_y = FFMIN(b->height, end_y); | |
4604 | |
4605 if (start_y != end_y){ | |
4606 if (orientation == 0){ | |
4607 SubBand * correlate_band = &p->band[0][0]; | |
4608 int correlate_end_y = FFMIN(b->height, end_y + 1); | |
4609 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); | |
4610 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); | |
4611 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); | |
4612 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); | |
4613 } | |
4614 else | |
4615 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | |
4616 } | |
4617 } | |
4618 } | |
4619 | |
4620 for(; yd<slice_h; yd+=4){ | |
4621 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | |
4622 } | |
4623 | |
4624 if(s->qlog == LOSSLESS_QLOG){ | |
4625 for(; yq<slice_h && yq<h; yq++){ | |
4626 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); | |
4627 for(x=0; x<w; x++){ | |
4628 line[x] <<= FRAC_BITS; | |
4629 } | |
4630 } | |
4631 } | |
4632 | |
4633 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); | |
4634 | |
4635 y = FFMIN(p->height, slice_starty); | |
4636 end_y = FFMIN(p->height, slice_h); | |
4637 while(y < end_y) | |
4638 slice_buffer_release(&s->sb, y++); | |
4639 } | |
4640 | |
4641 slice_buffer_flush(&s->sb); | |
4642 } | |
4643 | |
4644 } | |
4645 | |
4646 emms_c(); | |
4647 | |
4648 release_buffer(avctx); | |
4649 | |
4650 if(!(s->avctx->debug&2048)) | |
4651 *picture= s->current_picture; | |
4652 else | |
4653 *picture= s->mconly_picture; | |
4654 | |
4655 *data_size = sizeof(AVFrame); | |
4656 | |
4657 bytes_read= c->bytestream - c->bytestream_start; | |
4658 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME | |
4659 | |
4660 return bytes_read; | |
4661 } | |
4662 | |
4663 static av_cold int decode_end(AVCodecContext *avctx) | |
4664 { | |
4665 SnowContext *s = avctx->priv_data; | |
4666 | |
4667 slice_buffer_destroy(&s->sb); | |
4668 | |
4669 common_end(s); | |
4670 | |
4671 return 0; | |
4672 } | |
4673 | |
4674 AVCodec snow_decoder = { | |
4675 "snow", | |
4676 CODEC_TYPE_VIDEO, | |
4677 CODEC_ID_SNOW, | |
4678 sizeof(SnowContext), | |
4679 decode_init, | |
4680 NULL, | |
4681 decode_end, | |
4682 decode_frame, | |
4683 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/, | |
4684 NULL, | |
4685 .long_name = NULL_IF_CONFIG_SMALL("Snow"), | |
4686 }; | |
4687 | |
4688 #if CONFIG_SNOW_ENCODER | |
4689 AVCodec snow_encoder = { | 4687 AVCodec snow_encoder = { |
4690 "snow", | 4688 "snow", |
4691 CODEC_TYPE_VIDEO, | 4689 CODEC_TYPE_VIDEO, |
4692 CODEC_ID_SNOW, | 4690 CODEC_ID_SNOW, |
4693 sizeof(SnowContext), | 4691 sizeof(SnowContext), |