Mercurial > libavcodec.hg
comparison snow.c @ 2589:a5a62827f195 libavcodec
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
author | michael |
---|---|
date | Sun, 03 Apr 2005 15:43:57 +0000 |
parents | d0a58dca5ad2 |
children | b70b4b69960b |
comparison
equal
deleted
inserted
replaced
2588:b6b618986f80 | 2589:a5a62827f195 |
---|---|
369 }BlockNode; | 369 }BlockNode; |
370 | 370 |
371 #define LOG2_MB_SIZE 4 | 371 #define LOG2_MB_SIZE 4 |
372 #define MB_SIZE (1<<LOG2_MB_SIZE) | 372 #define MB_SIZE (1<<LOG2_MB_SIZE) |
373 | 373 |
374 typedef struct x_and_coeff{ | |
375 int16_t x; | |
376 int16_t coeff; | |
377 } x_and_coeff; | |
378 | |
374 typedef struct SubBand{ | 379 typedef struct SubBand{ |
375 int level; | 380 int level; |
376 int stride; | 381 int stride; |
377 int width; | 382 int width; |
378 int height; | 383 int height; |
379 int qlog; ///< log(qscale)/log[2^(1/6)] | 384 int qlog; ///< log(qscale)/log[2^(1/6)] |
380 DWTELEM *buf; | 385 DWTELEM *buf; |
381 int16_t *x; | 386 int buf_x_offset; |
382 DWTELEM *coeff; | 387 int buf_y_offset; |
388 int stride_line; ///< Stride measured in lines, not pixels. | |
389 x_and_coeff * x_coeff; | |
383 struct SubBand *parent; | 390 struct SubBand *parent; |
384 uint8_t state[/*7*2*/ 7 + 512][32]; | 391 uint8_t state[/*7*2*/ 7 + 512][32]; |
385 }SubBand; | 392 }SubBand; |
386 | 393 |
387 typedef struct Plane{ | 394 typedef struct Plane{ |
388 int width; | 395 int width; |
389 int height; | 396 int height; |
390 SubBand band[MAX_DECOMPOSITIONS][4]; | 397 SubBand band[MAX_DECOMPOSITIONS][4]; |
391 }Plane; | 398 }Plane; |
399 | |
400 /** Used to minimize the amount of memory used in order to optimize cache performance. **/ | |
401 typedef struct { | |
402 DWTELEM * * line; ///< For use by idwt and predict_slices. | |
403 DWTELEM * * data_stack; ///< Used for internal purposes. | |
404 int data_stack_top; | |
405 int line_count; | |
406 int line_width; | |
407 int data_count; | |
408 DWTELEM * base_buffer; ///< Buffer that this structure is caching. | |
409 } slice_buffer; | |
392 | 410 |
393 typedef struct SnowContext{ | 411 typedef struct SnowContext{ |
394 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) | 412 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) |
395 | 413 |
396 AVCodecContext *avctx; | 414 AVCodecContext *avctx; |
424 int b_width; | 442 int b_width; |
425 int b_height; | 443 int b_height; |
426 int block_max_depth; | 444 int block_max_depth; |
427 Plane plane[MAX_PLANES]; | 445 Plane plane[MAX_PLANES]; |
428 BlockNode *block; | 446 BlockNode *block; |
447 slice_buffer sb; | |
429 | 448 |
430 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) | 449 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) |
431 }SnowContext; | 450 }SnowContext; |
432 | 451 |
433 typedef struct { | 452 typedef struct { |
435 DWTELEM *b1; | 454 DWTELEM *b1; |
436 DWTELEM *b2; | 455 DWTELEM *b2; |
437 DWTELEM *b3; | 456 DWTELEM *b3; |
438 int y; | 457 int y; |
439 } dwt_compose_t; | 458 } dwt_compose_t; |
459 | |
460 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) | |
461 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) | |
462 | |
463 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer) | |
464 { | |
465 int i; | |
466 | |
467 buf->base_buffer = base_buffer; | |
468 buf->line_count = line_count; | |
469 buf->line_width = line_width; | |
470 buf->data_count = max_allocated_lines; | |
471 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count); | |
472 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines); | |
473 | |
474 for (i = 0; i < max_allocated_lines; i++) | |
475 { | |
476 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width); | |
477 } | |
478 | |
479 buf->data_stack_top = max_allocated_lines - 1; | |
480 } | |
481 | |
482 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) | |
483 { | |
484 int i; | |
485 int offset; | |
486 DWTELEM * buffer; | |
487 | |
488 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); | |
489 | |
490 assert(buf->data_stack_top >= 0); | |
491 // assert(!buf->line[line]); | |
492 if (buf->line[line]) | |
493 return buf->line[line]; | |
494 | |
495 offset = buf->line_width * line; | |
496 buffer = buf->data_stack[buf->data_stack_top]; | |
497 buf->data_stack_top--; | |
498 buf->line[line] = buffer; | |
499 | |
500 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1); | |
501 | |
502 return buffer; | |
503 } | |
504 | |
505 static void slice_buffer_release(slice_buffer * buf, int line) | |
506 { | |
507 int i; | |
508 int offset; | |
509 DWTELEM * buffer; | |
510 | |
511 assert(line >= 0 && line < buf->line_count); | |
512 assert(buf->line[line]); | |
513 | |
514 offset = buf->line_width * line; | |
515 buffer = buf->line[line]; | |
516 buf->data_stack_top++; | |
517 buf->data_stack[buf->data_stack_top] = buffer; | |
518 buf->line[line] = NULL; | |
519 | |
520 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1); | |
521 } | |
522 | |
523 static void slice_buffer_flush(slice_buffer * buf) | |
524 { | |
525 int i; | |
526 for (i = 0; i < buf->line_count; i++) | |
527 { | |
528 if (buf->line[i]) | |
529 { | |
530 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i); | |
531 slice_buffer_release(buf, i); | |
532 } | |
533 } | |
534 } | |
535 | |
536 static void slice_buffer_destroy(slice_buffer * buf) | |
537 { | |
538 int i; | |
539 slice_buffer_flush(buf); | |
540 | |
541 for (i = buf->data_count - 1; i >= 0; i--) | |
542 { | |
543 assert(buf->data_stack[i]); | |
544 av_free(buf->data_stack[i]); | |
545 } | |
546 assert(buf->data_stack); | |
547 av_free(buf->data_stack); | |
548 assert(buf->line); | |
549 av_free(buf->line); | |
550 } | |
440 | 551 |
441 #ifdef __sgi | 552 #ifdef __sgi |
442 // Avoid a name clash on SGI IRIX | 553 // Avoid a name clash on SGI IRIX |
443 #undef qexp | 554 #undef qexp |
444 #endif | 555 #endif |
1172 for(i=0; i<width; i++){ | 1283 for(i=0; i<width; i++){ |
1173 b1[i] -= (b0[i] + b2[i] + 2)>>2; | 1284 b1[i] -= (b0[i] + b2[i] + 2)>>2; |
1174 } | 1285 } |
1175 } | 1286 } |
1176 | 1287 |
1288 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ | |
1289 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); | |
1290 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); | |
1291 cs->y = -1; | |
1292 } | |
1293 | |
1177 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ | 1294 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ |
1178 cs->b0 = buffer + mirror(-1-1, height-1)*stride; | 1295 cs->b0 = buffer + mirror(-1-1, height-1)*stride; |
1179 cs->b1 = buffer + mirror(-1 , height-1)*stride; | 1296 cs->b1 = buffer + mirror(-1 , height-1)*stride; |
1180 cs->y = -1; | 1297 cs->y = -1; |
1298 } | |
1299 | |
1300 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ | |
1301 int y= cs->y; | |
1302 int mirror0 = mirror(y-1, height-1); | |
1303 int mirror1 = mirror(y , height-1); | |
1304 int mirror2 = mirror(y+1, height-1); | |
1305 int mirror3 = mirror(y+2, height-1); | |
1306 | |
1307 DWTELEM *b0= cs->b0; | |
1308 DWTELEM *b1= cs->b1; | |
1309 DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line); | |
1310 DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line); | |
1311 | |
1312 {START_TIMER | |
1313 if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width); | |
1314 if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width); | |
1315 STOP_TIMER("vertical_compose53i*")} | |
1316 | |
1317 {START_TIMER | |
1318 if(y-1 >= 0) horizontal_compose53i(b0, width); | |
1319 if(mirror0 <= mirror2) horizontal_compose53i(b1, width); | |
1320 STOP_TIMER("horizontal_compose53i")} | |
1321 | |
1322 cs->b0 = b2; | |
1323 cs->b1 = b3; | |
1324 cs->y += 2; | |
1181 } | 1325 } |
1182 | 1326 |
1183 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ | 1327 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ |
1184 int y= cs->y; | 1328 int y= cs->y; |
1185 DWTELEM *b0= cs->b0; | 1329 DWTELEM *b0= cs->b0; |
1257 for(i=0; i<width; i++){ | 1401 for(i=0; i<width; i++){ |
1258 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | 1402 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; |
1259 } | 1403 } |
1260 } | 1404 } |
1261 | 1405 |
1406 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ | |
1407 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); | |
1408 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); | |
1409 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); | |
1410 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); | |
1411 cs->y = -3; | |
1412 } | |
1413 | |
1262 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ | 1414 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ |
1263 cs->b0 = buffer + mirror(-3-1, height-1)*stride; | 1415 cs->b0 = buffer + mirror(-3-1, height-1)*stride; |
1264 cs->b1 = buffer + mirror(-3 , height-1)*stride; | 1416 cs->b1 = buffer + mirror(-3 , height-1)*stride; |
1265 cs->b2 = buffer + mirror(-3+1, height-1)*stride; | 1417 cs->b2 = buffer + mirror(-3+1, height-1)*stride; |
1266 cs->b3 = buffer + mirror(-3+2, height-1)*stride; | 1418 cs->b3 = buffer + mirror(-3+2, height-1)*stride; |
1267 cs->y = -3; | 1419 cs->y = -3; |
1420 } | |
1421 | |
1422 static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ | |
1423 int y = cs->y; | |
1424 | |
1425 int mirror0 = mirror(y - 1, height - 1); | |
1426 int mirror1 = mirror(y + 0, height - 1); | |
1427 int mirror2 = mirror(y + 1, height - 1); | |
1428 int mirror3 = mirror(y + 2, height - 1); | |
1429 int mirror4 = mirror(y + 3, height - 1); | |
1430 int mirror5 = mirror(y + 4, height - 1); | |
1431 DWTELEM *b0= cs->b0; | |
1432 DWTELEM *b1= cs->b1; | |
1433 DWTELEM *b2= cs->b2; | |
1434 DWTELEM *b3= cs->b3; | |
1435 DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line); | |
1436 DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line); | |
1437 | |
1438 if(stride_line == 1 && y+4 < height && 0){ | |
1439 int x; | |
1440 for(x=0; x<width/2; x++) | |
1441 b5[x] += 64*2; | |
1442 for(; x<width; x++) | |
1443 b5[x] += 169*2; | |
1444 } | |
1445 | |
1446 // if(mirror3 <= mirror5 && mirror2 <= mirror4 && mirror1 <= mirror3 && mirror0 <= mirror2) | |
1447 // { | |
1448 //{START_TIMER | |
1449 // vertical_compose97_complete(b0, b1, b2, b3, b4, b5, width); | |
1450 //if(width>400){ | |
1451 //STOP_TIMER("vertical_compose97i-NEW")}} | |
1452 // } | |
1453 // else | |
1454 // { | |
1455 {START_TIMER | |
1456 if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width); | |
1457 if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width); | |
1458 if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width); | |
1459 if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width); | |
1460 if(width>400){ | |
1461 STOP_TIMER("vertical_compose97i")}} | |
1462 // } | |
1463 | |
1464 {START_TIMER | |
1465 if(y-1>= 0) horizontal_compose97i(b0, width); | |
1466 if(mirror0 <= mirror2) horizontal_compose97i(b1, width); | |
1467 if(width>400 && mirror0 <= mirror2){ | |
1468 STOP_TIMER("horizontal_compose97i")}} | |
1469 | |
1470 cs->b0=b2; | |
1471 cs->b1=b3; | |
1472 cs->b2=b4; | |
1473 cs->b3=b5; | |
1474 cs->y += 2; | |
1268 } | 1475 } |
1269 | 1476 |
1270 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ | 1477 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ |
1271 int y = cs->y; | 1478 int y = cs->y; |
1272 DWTELEM *b0= cs->b0; | 1479 DWTELEM *b0= cs->b0; |
1310 spatial_compose97i_init(&cs, buffer, height, stride); | 1517 spatial_compose97i_init(&cs, buffer, height, stride); |
1311 while(cs.y <= height) | 1518 while(cs.y <= height) |
1312 spatial_compose97i_dy(&cs, buffer, width, height, stride); | 1519 spatial_compose97i_dy(&cs, buffer, width, height, stride); |
1313 } | 1520 } |
1314 | 1521 |
1522 void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ | |
1523 int level; | |
1524 for(level=decomposition_count-1; level>=0; level--){ | |
1525 switch(type){ | |
1526 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
1527 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
1528 /* not slicified yet */ | |
1529 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/ | |
1530 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break; | |
1531 } | |
1532 } | |
1533 } | |
1534 | |
1315 void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ | 1535 void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
1316 int level; | 1536 int level; |
1317 for(level=decomposition_count-1; level>=0; level--){ | 1537 for(level=decomposition_count-1; level>=0; level--){ |
1318 switch(type){ | 1538 switch(type){ |
1319 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; | 1539 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; |
1333 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | 1553 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ |
1334 switch(type){ | 1554 switch(type){ |
1335 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | 1555 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
1336 break; | 1556 break; |
1337 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | 1557 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
1558 break; | |
1559 case 2: break; | |
1560 } | |
1561 } | |
1562 } | |
1563 } | |
1564 | |
1565 void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ | |
1566 const int support = type==1 ? 3 : 5; | |
1567 int level; | |
1568 if(type==2) return; | |
1569 | |
1570 for(level=decomposition_count-1; level>=0; level--){ | |
1571 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1572 switch(type){ | |
1573 case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); | |
1574 break; | |
1575 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); | |
1338 break; | 1576 break; |
1339 case 2: break; | 1577 case 2: break; |
1340 } | 1578 } |
1341 } | 1579 } |
1342 } | 1580 } |
1474 // encode_subband_z0run(s, b, src, parent, stride, orientation); | 1712 // encode_subband_z0run(s, b, src, parent, stride, orientation); |
1475 return encode_subband_c0run(s, b, src, parent, stride, orientation); | 1713 return encode_subband_c0run(s, b, src, parent, stride, orientation); |
1476 // encode_subband_dzr(s, b, src, parent, stride, orientation); | 1714 // encode_subband_dzr(s, b, src, parent, stride, orientation); |
1477 } | 1715 } |
1478 | 1716 |
1479 static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ | 1717 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
1480 const int w= b->width; | 1718 const int w= b->width; |
1481 const int h= b->height; | 1719 const int h= b->height; |
1482 int x,y; | 1720 int x,y; |
1483 const int qlog= clip(s->qlog + b->qlog, 0, 128); | 1721 |
1484 int qmul= qexp[qlog&7]<<(qlog>>3); | |
1485 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | |
1486 | |
1487 START_TIMER | |
1488 | |
1489 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){ | |
1490 qadd= 0; | |
1491 qmul= 1<<QEXPSHIFT; | |
1492 } | |
1493 | |
1494 if(1){ | 1722 if(1){ |
1495 int run; | 1723 int run; |
1496 int index=0; | 1724 int index=0; |
1497 int prev_index=-1; | 1725 int prev_index=-1; |
1498 int prev2_index=0; | 1726 int prev2_index=0; |
1499 int parent_index= 0; | 1727 int parent_index= 0; |
1500 int prev_parent_index= 0; | 1728 int prev_parent_index= 0; |
1501 | |
1502 for(y=0; y<b->height; y++) | |
1503 memset(&src[y*stride], 0, b->width*sizeof(DWTELEM)); | |
1504 | 1729 |
1505 run= get_symbol2(&s->c, b->state[1], 3); | 1730 run= get_symbol2(&s->c, b->state[1], 3); |
1506 for(y=0; y<h; y++){ | 1731 for(y=0; y<h; y++){ |
1507 int v=0; | 1732 int v=0; |
1508 int lt=0, t=0, rt=0; | 1733 int lt=0, t=0, rt=0; |
1509 | 1734 |
1510 if(y && b->x[prev_index] == 0){ | 1735 if(y && b->x_coeff[prev_index].x == 0){ |
1511 rt= b->coeff[prev_index]; | 1736 rt= b->x_coeff[prev_index].coeff; |
1512 } | 1737 } |
1513 for(x=0; x<w; x++){ | 1738 for(x=0; x<w; x++){ |
1514 int p=0; | 1739 int p=0; |
1515 const int l= v; | 1740 const int l= v; |
1516 | 1741 |
1517 lt= t; t= rt; | 1742 lt= t; t= rt; |
1518 | 1743 |
1519 if(y){ | 1744 if(y){ |
1520 if(b->x[prev_index] <= x) | 1745 if(b->x_coeff[prev_index].x <= x) |
1521 prev_index++; | 1746 prev_index++; |
1522 if(b->x[prev_index] == x + 1) | 1747 if(b->x_coeff[prev_index].x == x + 1) |
1523 rt= b->coeff[prev_index]; | 1748 rt= b->x_coeff[prev_index].coeff; |
1524 else | 1749 else |
1525 rt=0; | 1750 rt=0; |
1526 } | 1751 } |
1527 if(parent){ | 1752 if(parent){ |
1528 if(x>>1 > b->parent->x[parent_index]){ | 1753 if(x>>1 > parent->x_coeff[parent_index].x){ |
1529 parent_index++; | 1754 parent_index++; |
1530 } | 1755 } |
1531 if(x>>1 == b->parent->x[parent_index]){ | 1756 if(x>>1 == parent->x_coeff[parent_index].x){ |
1532 p= b->parent->coeff[parent_index]; | 1757 p= parent->x_coeff[parent_index].coeff; |
1533 } | 1758 } |
1534 } | 1759 } |
1535 if(/*ll|*/l|lt|t|rt|p){ | 1760 if(/*ll|*/l|lt|t|rt|p){ |
1536 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); | 1761 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); |
1537 | 1762 |
1545 v=0; | 1770 v=0; |
1546 | 1771 |
1547 if(y && parent){ | 1772 if(y && parent){ |
1548 int max_run; | 1773 int max_run; |
1549 | 1774 |
1550 max_run= FFMIN(run, b->x[prev_index] - x - 2); | 1775 max_run= FFMIN(run, b->x_coeff[prev_index].x - x - 2); |
1551 max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1); | 1776 max_run= FFMIN(max_run, 2*parent->x_coeff[parent_index].x - x - 1); |
1552 x+= max_run; | 1777 x+= max_run; |
1553 run-= max_run; | 1778 run-= max_run; |
1554 } | 1779 } |
1555 } | 1780 } |
1556 } | 1781 } |
1557 if(v){ | 1782 if(v){ |
1558 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); | 1783 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); |
1559 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1; | 1784 v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1; |
1560 if(get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){ | 1785 if(get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])) |
1561 src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT)); | 1786 v *= -1; |
1562 v= -v; | 1787 b->x_coeff[index].x=x; |
1563 }else{ | 1788 b->x_coeff[index++].coeff= v; |
1564 src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT)); | |
1565 } | |
1566 b->x[index]=x; //FIXME interleave x/coeff | |
1567 b->coeff[index++]= v; | |
1568 } | 1789 } |
1569 } | 1790 } |
1570 b->x[index++]= w+1; //end marker | 1791 b->x_coeff[index++].x= w+1; //end marker |
1571 prev_index= prev2_index; | 1792 prev_index= prev2_index; |
1572 prev2_index= index; | 1793 prev2_index= index; |
1573 | 1794 |
1574 if(parent){ | 1795 if(parent){ |
1575 while(b->parent->x[parent_index] != b->parent->width+1) | 1796 if(y&1){ |
1797 while(parent->x_coeff[parent_index].x != parent->width+1) | |
1798 parent_index++; | |
1576 parent_index++; | 1799 parent_index++; |
1577 parent_index++; | |
1578 if(y&1){ | |
1579 prev_parent_index= parent_index; | 1800 prev_parent_index= parent_index; |
1580 }else{ | 1801 }else{ |
1581 parent_index= prev_parent_index; | 1802 parent_index= prev_parent_index; |
1582 } | 1803 } |
1583 } | 1804 } |
1584 } | 1805 } |
1585 b->x[index++]= w+1; //end marker | 1806 |
1586 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ | 1807 b->x_coeff[index++].x= w+1; //end marker |
1587 STOP_TIMER("decode_subband") | 1808 } |
1588 } | 1809 } |
1810 | |
1811 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ | |
1812 const int w= b->width; | |
1813 int x,y; | |
1814 const int qlog= clip(s->qlog + b->qlog, 0, 128); | |
1815 int qmul= qexp[qlog&7]<<(qlog>>3); | |
1816 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | |
1817 int new_index = 0; | |
1818 | |
1819 START_TIMER | |
1820 | |
1821 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){ | |
1822 qadd= 0; | |
1823 qmul= 1<<QEXPSHIFT; | |
1824 } | |
1825 | |
1826 /* If we are on the second or later slice, restore our index. */ | |
1827 if (start_y != 0) | |
1828 new_index = save_state[0]; | |
1829 | |
1589 | 1830 |
1590 return; | 1831 for(y=start_y; y<h; y++){ |
1591 } | 1832 int x = 0; |
1833 int v; | |
1834 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; | |
1835 memset(line, 0, b->width*sizeof(DWTELEM)); | |
1836 v = b->x_coeff[new_index].coeff; | |
1837 x = b->x_coeff[new_index++].x; | |
1838 while(x < w) | |
1839 { | |
1840 if (v < 0) | |
1841 line[x] = -(( -v*qmul + qadd)>>(QEXPSHIFT)); | |
1842 else | |
1843 line[x] = (( v*qmul + qadd)>>(QEXPSHIFT)); | |
1844 v = b->x_coeff[new_index].coeff; | |
1845 x = b->x_coeff[new_index++].x; | |
1846 } | |
1847 } | |
1848 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){ | |
1849 STOP_TIMER("decode_subband") | |
1850 } | |
1851 | |
1852 /* Save our variables for the next slice. */ | |
1853 save_state[0] = new_index; | |
1854 | |
1855 return; | |
1592 } | 1856 } |
1593 | 1857 |
1594 static void reset_contexts(SnowContext *s){ | 1858 static void reset_contexts(SnowContext *s){ |
1595 int plane_index, level, orientation; | 1859 int plane_index, level, orientation; |
1596 | 1860 |
2119 static always_inline int same_block(BlockNode *a, BlockNode *b){ | 2383 static always_inline int same_block(BlockNode *a, BlockNode *b){ |
2120 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type); | 2384 return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type); |
2121 } | 2385 } |
2122 | 2386 |
2123 //FIXME name clenup (b_w, block_w, b_width stuff) | 2387 //FIXME name clenup (b_w, block_w, b_width stuff) |
2388 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ | |
2389 DWTELEM * dst = NULL; | |
2390 const int b_width = s->b_width << s->block_max_depth; | |
2391 const int b_height= s->b_height << s->block_max_depth; | |
2392 const int b_stride= b_width; | |
2393 BlockNode *lt= &s->block[b_x + b_y*b_stride]; | |
2394 BlockNode *rt= lt+1; | |
2395 BlockNode *lb= lt+b_stride; | |
2396 BlockNode *rb= lb+1; | |
2397 uint8_t *block[4]; | |
2398 uint8_t tmp[src_stride*(b_h+5)]; //FIXME align | |
2399 int x,y; | |
2400 | |
2401 if(b_x<0){ | |
2402 lt= rt; | |
2403 lb= rb; | |
2404 }else if(b_x + 1 >= b_width){ | |
2405 rt= lt; | |
2406 rb= lb; | |
2407 } | |
2408 if(b_y<0){ | |
2409 lt= lb; | |
2410 rt= rb; | |
2411 }else if(b_y + 1 >= b_height){ | |
2412 lb= lt; | |
2413 rb= rt; | |
2414 } | |
2415 | |
2416 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 | |
2417 obmc -= src_x; | |
2418 b_w += src_x; | |
2419 src_x=0; | |
2420 }else if(src_x + b_w > w){ | |
2421 b_w = w - src_x; | |
2422 } | |
2423 if(src_y<0){ | |
2424 obmc -= src_y*obmc_stride; | |
2425 b_h += src_y; | |
2426 src_y=0; | |
2427 }else if(src_y + b_h> h){ | |
2428 b_h = h - src_y; | |
2429 } | |
2430 | |
2431 if(b_w<=0 || b_h<=0) return; | |
2432 | |
2433 assert(src_stride > 7*MB_SIZE); | |
2434 // old_dst += src_x + src_y*dst_stride; | |
2435 dst8+= src_x + src_y*src_stride; | |
2436 // src += src_x + src_y*src_stride; | |
2437 | |
2438 block[0]= tmp+3*MB_SIZE; | |
2439 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); | |
2440 | |
2441 if(same_block(lt, rt)){ | |
2442 block[1]= block[0]; | |
2443 }else{ | |
2444 block[1]= tmp + 4*MB_SIZE; | |
2445 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); | |
2446 } | |
2447 | |
2448 if(same_block(lt, lb)){ | |
2449 block[2]= block[0]; | |
2450 }else if(same_block(rt, lb)){ | |
2451 block[2]= block[1]; | |
2452 }else{ | |
2453 block[2]= tmp+5*MB_SIZE; | |
2454 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); | |
2455 } | |
2456 | |
2457 if(same_block(lt, rb) ){ | |
2458 block[3]= block[0]; | |
2459 }else if(same_block(rt, rb)){ | |
2460 block[3]= block[1]; | |
2461 }else if(same_block(lb, rb)){ | |
2462 block[3]= block[2]; | |
2463 }else{ | |
2464 block[3]= tmp+6*MB_SIZE; | |
2465 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); | |
2466 } | |
2467 #if 0 | |
2468 for(y=0; y<b_h; y++){ | |
2469 for(x=0; x<b_w; x++){ | |
2470 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2471 if(add) dst[x + y*dst_stride] += v; | |
2472 else dst[x + y*dst_stride] -= v; | |
2473 } | |
2474 } | |
2475 for(y=0; y<b_h; y++){ | |
2476 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2477 for(x=0; x<b_w; x++){ | |
2478 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2479 if(add) dst[x + y*dst_stride] += v; | |
2480 else dst[x + y*dst_stride] -= v; | |
2481 } | |
2482 } | |
2483 for(y=0; y<b_h; y++){ | |
2484 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2485 for(x=0; x<b_w; x++){ | |
2486 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2487 if(add) dst[x + y*dst_stride] += v; | |
2488 else dst[x + y*dst_stride] -= v; | |
2489 } | |
2490 } | |
2491 for(y=0; y<b_h; y++){ | |
2492 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2493 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2494 for(x=0; x<b_w; x++){ | |
2495 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2496 if(add) dst[x + y*dst_stride] += v; | |
2497 else dst[x + y*dst_stride] -= v; | |
2498 } | |
2499 } | |
2500 #else | |
2501 { | |
2502 | |
2503 START_TIMER | |
2504 | |
2505 int block_index = 0; | |
2506 for(y=0; y<b_h; y++){ | |
2507 //FIXME ugly missue of obmc_stride | |
2508 uint8_t *obmc1= obmc + y*obmc_stride; | |
2509 uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2510 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2511 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2512 dst = slice_buffer_get_line(sb, src_y + y); | |
2513 for(x=0; x<b_w; x++){ | |
2514 int v= obmc1[x] * block[3][x + y*src_stride] | |
2515 +obmc2[x] * block[2][x + y*src_stride] | |
2516 +obmc3[x] * block[1][x + y*src_stride] | |
2517 +obmc4[x] * block[0][x + y*src_stride]; | |
2518 | |
2519 v <<= 8 - LOG2_OBMC_MAX; | |
2520 if(FRAC_BITS != 8){ | |
2521 v += 1<<(7 - FRAC_BITS); | |
2522 v >>= 8 - FRAC_BITS; | |
2523 } | |
2524 if(add){ | |
2525 // v += old_dst[x + y*dst_stride]; | |
2526 v += dst[x + src_x]; | |
2527 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2528 if(v&(~255)) v= ~(v>>31); | |
2529 dst8[x + y*src_stride] = v; | |
2530 }else{ | |
2531 // old_dst[x + y*dst_stride] -= v; | |
2532 dst[x + src_x] -= v; | |
2533 } | |
2534 } | |
2535 } | |
2536 STOP_TIMER("Inner add y block") | |
2537 } | |
2538 #endif | |
2539 } | |
2540 | |
2541 //FIXME name clenup (b_w, block_w, b_width stuff) | |
2124 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ | 2542 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ |
2125 const int b_width = s->b_width << s->block_max_depth; | 2543 const int b_width = s->b_width << s->block_max_depth; |
2126 const int b_height= s->b_height << s->block_max_depth; | 2544 const int b_height= s->b_height << s->block_max_depth; |
2127 const int b_stride= b_width; | 2545 const int b_stride= b_width; |
2128 BlockNode *lt= &s->block[b_x + b_y*b_stride]; | 2546 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
2259 dst[x + y*dst_stride] -= v; | 2677 dst[x + y*dst_stride] -= v; |
2260 } | 2678 } |
2261 } | 2679 } |
2262 } | 2680 } |
2263 #endif | 2681 #endif |
2682 } | |
2683 | |
2684 static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){ | |
2685 Plane *p= &s->plane[plane_index]; | |
2686 const int mb_w= s->b_width << s->block_max_depth; | |
2687 const int mb_h= s->b_height << s->block_max_depth; | |
2688 int x, y, mb_x; | |
2689 int block_size = MB_SIZE >> s->block_max_depth; | |
2690 int block_w = plane_index ? block_size/2 : block_size; | |
2691 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2692 int obmc_stride= plane_index ? block_size : 2*block_size; | |
2693 int ref_stride= s->current_picture.linesize[plane_index]; | |
2694 uint8_t *ref = s->last_picture.data[plane_index]; | |
2695 uint8_t *dst8= s->current_picture.data[plane_index]; | |
2696 int w= p->width; | |
2697 int h= p->height; | |
2698 START_TIMER | |
2699 | |
2700 if(s->keyframe || (s->avctx->debug&512)){ | |
2701 if(mb_y==mb_h) | |
2702 return; | |
2703 | |
2704 if(add){ | |
2705 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++) | |
2706 { | |
2707 // DWTELEM * line = slice_buffer_get_line(sb, y); | |
2708 DWTELEM * line = sb->line[y]; | |
2709 for(x=0; x<w; x++) | |
2710 { | |
2711 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2712 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | |
2713 v >>= FRAC_BITS; | |
2714 if(v&(~255)) v= ~(v>>31); | |
2715 dst8[x + y*ref_stride]= v; | |
2716 } | |
2717 } | |
2718 }else{ | |
2719 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++) | |
2720 { | |
2721 // DWTELEM * line = slice_buffer_get_line(sb, y); | |
2722 DWTELEM * line = sb->line[y]; | |
2723 for(x=0; x<w; x++) | |
2724 { | |
2725 line[x] -= 128 << FRAC_BITS; | |
2726 // buf[x + y*w]-= 128<<FRAC_BITS; | |
2727 } | |
2728 } | |
2729 } | |
2730 | |
2731 return; | |
2732 } | |
2733 | |
2734 for(mb_x=0; mb_x<=mb_w; mb_x++){ | |
2735 START_TIMER | |
2736 | |
2737 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, | |
2738 block_w*mb_x - block_w/2, | |
2739 block_w*mb_y - block_w/2, | |
2740 block_w, block_w, | |
2741 w, h, | |
2742 w, ref_stride, obmc_stride, | |
2743 mb_x - 1, mb_y - 1, | |
2744 add, plane_index); | |
2745 | |
2746 STOP_TIMER("add_yblock") | |
2747 } | |
2748 | |
2749 STOP_TIMER("predict_slice") | |
2264 } | 2750 } |
2265 | 2751 |
2266 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ | 2752 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ |
2267 Plane *p= &s->plane[plane_index]; | 2753 Plane *p= &s->plane[plane_index]; |
2268 const int mb_w= s->b_width << s->block_max_depth; | 2754 const int mb_w= s->b_width << s->block_max_depth; |
2389 if(level+1 == s->spatial_decomposition_count){ | 2875 if(level+1 == s->spatial_decomposition_count){ |
2390 // STOP_TIMER("quantize") | 2876 // STOP_TIMER("quantize") |
2391 } | 2877 } |
2392 } | 2878 } |
2393 | 2879 |
2880 static void dequantize_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride){ | |
2881 const int w= b->width; | |
2882 const int h= b->height; | |
2883 const int qlog= clip(s->qlog + b->qlog, 0, 128); | |
2884 const int qmul= qexp[qlog&7]<<(qlog>>3); | |
2885 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; | |
2886 int x,y; | |
2887 START_TIMER | |
2888 | |
2889 if(s->qlog == LOSSLESS_QLOG) return; | |
2890 | |
2891 assert(QROOT==8); | |
2892 | |
2893 for(y=0; y<h; y++){ | |
2894 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); | |
2895 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | |
2896 for(x=0; x<w; x++){ | |
2897 int i= line[x]; | |
2898 if(i<0){ | |
2899 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
2900 }else if(i>0){ | |
2901 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
2902 } | |
2903 } | |
2904 } | |
2905 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ | |
2906 STOP_TIMER("dquant") | |
2907 } | |
2908 } | |
2909 | |
2394 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){ | 2910 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){ |
2395 const int w= b->width; | 2911 const int w= b->width; |
2396 const int h= b->height; | 2912 const int h= b->height; |
2397 const int qlog= clip(s->qlog + b->qlog, 0, 128); | 2913 const int qlog= clip(s->qlog + b->qlog, 0, 128); |
2398 const int qmul= qexp[qlog&7]<<(qlog>>3); | 2914 const int qmul= qexp[qlog&7]<<(qlog>>3); |
2439 }else{ | 2955 }else{ |
2440 if(y) src[i] -= src[i - stride]; | 2956 if(y) src[i] -= src[i - stride]; |
2441 } | 2957 } |
2442 } | 2958 } |
2443 } | 2959 } |
2960 } | |
2961 | |
2962 static void correlate_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ | |
2963 const int w= b->width; | |
2964 const int h= b->height; | |
2965 int x,y; | |
2966 | |
2967 // START_TIMER | |
2968 | |
2969 DWTELEM * line; | |
2970 DWTELEM * prev; | |
2971 | |
2972 for(y=0; y<h; y++){ | |
2973 prev = line; | |
2974 // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); | |
2975 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; | |
2976 for(x=0; x<w; x++){ | |
2977 if(x){ | |
2978 if(use_median){ | |
2979 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); | |
2980 else line[x] += line[x - 1]; | |
2981 }else{ | |
2982 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); | |
2983 else line[x] += line[x - 1]; | |
2984 } | |
2985 }else{ | |
2986 if(y) line[x] += prev[x]; | |
2987 } | |
2988 } | |
2989 } | |
2990 | |
2991 // STOP_TIMER("correlate") | |
2444 } | 2992 } |
2445 | 2993 |
2446 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ | 2994 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ |
2447 const int w= b->width; | 2995 const int w= b->width; |
2448 const int h= b->height; | 2996 const int h= b->height; |
2638 b->level= level; | 3186 b->level= level; |
2639 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | 3187 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); |
2640 b->width = (w + !(orientation&1))>>1; | 3188 b->width = (w + !(orientation&1))>>1; |
2641 b->height= (h + !(orientation>1))>>1; | 3189 b->height= (h + !(orientation>1))>>1; |
2642 | 3190 |
2643 if(orientation&1) b->buf += (w+1)>>1; | 3191 b->stride_line = 1 << (s->spatial_decomposition_count - level); |
2644 if(orientation>1) b->buf += b->stride>>1; | 3192 b->buf_x_offset = 0; |
3193 b->buf_y_offset = 0; | |
3194 | |
3195 if(orientation&1){ | |
3196 b->buf += (w+1)>>1; | |
3197 b->buf_x_offset = (w+1)>>1; | |
3198 } | |
3199 if(orientation>1){ | |
3200 b->buf += b->stride>>1; | |
3201 b->buf_y_offset = b->stride_line >> 1; | |
3202 } | |
2645 | 3203 |
2646 if(level) | 3204 if(level) |
2647 b->parent= &s->plane[plane_index].band[level-1][orientation]; | 3205 b->parent= &s->plane[plane_index].band[level-1][orientation]; |
2648 b->x = av_mallocz(((b->width+1) * b->height+1)*sizeof(int16_t)); | 3206 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); |
2649 b->coeff= av_mallocz(((b->width+1) * b->height+1)*sizeof(DWTELEM)); | |
2650 } | 3207 } |
2651 w= (w+1)>>1; | 3208 w= (w+1)>>1; |
2652 h= (h+1)>>1; | 3209 h= (h+1)>>1; |
2653 } | 3210 } |
2654 } | 3211 } |
2950 for(plane_index=0; plane_index<3; plane_index++){ | 3507 for(plane_index=0; plane_index<3; plane_index++){ |
2951 for(level=s->spatial_decomposition_count-1; level>=0; level--){ | 3508 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
2952 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | 3509 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
2953 SubBand *b= &s->plane[plane_index].band[level][orientation]; | 3510 SubBand *b= &s->plane[plane_index].band[level][orientation]; |
2954 | 3511 |
2955 av_freep(&b->x); | 3512 av_freep(&b->x_coeff); |
2956 av_freep(&b->coeff); | |
2957 } | 3513 } |
2958 } | 3514 } |
2959 } | 3515 } |
2960 } | 3516 } |
2961 | 3517 |
2968 return 0; | 3524 return 0; |
2969 } | 3525 } |
2970 | 3526 |
2971 static int decode_init(AVCodecContext *avctx) | 3527 static int decode_init(AVCodecContext *avctx) |
2972 { | 3528 { |
2973 // SnowContext *s = avctx->priv_data; | 3529 SnowContext *s = avctx->priv_data; |
3530 int block_size; | |
2974 | 3531 |
2975 common_init(avctx); | 3532 common_init(avctx); |
3533 | |
3534 block_size = MB_SIZE >> s->block_max_depth; | |
3535 /* FIXME block_size * 2 is determined empirically. block_size * 1.5 is definitely needed, but I (Robert) cannot figure out why more than that is needed. Perhaps there is a bug, or perhaps I overlooked some demands that are placed on the buffer. */ | |
3536 /* FIXME The formula is WRONG. For height > 480, the buffer will overflow. */ | |
3537 /* FIXME For now, I will use a full frame of lines. Fortunately, this should not materially effect cache performance because lines are allocated using a stack, so if in fact only 50 out of 496 lines are needed at a time, the other 446 will sit allocated but never accessed. */ | |
3538 // slice_buffer_init(s->plane[0].sb, s->plane[0].height, (block_size * 2) + (s->spatial_decomposition_count * s->spatial_decomposition_count), s->plane[0].width, s->spatial_dwt_buffer); | |
3539 slice_buffer_init(&s->sb, s->plane[0].height, s->plane[0].height, s->plane[0].width, s->spatial_dwt_buffer); | |
2976 | 3540 |
2977 return 0; | 3541 return 0; |
2978 } | 3542 } |
2979 | 3543 |
2980 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ | 3544 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ |
3001 for(plane_index=0; plane_index<3; plane_index++){ | 3565 for(plane_index=0; plane_index<3; plane_index++){ |
3002 Plane *p= &s->plane[plane_index]; | 3566 Plane *p= &s->plane[plane_index]; |
3003 int w= p->width; | 3567 int w= p->width; |
3004 int h= p->height; | 3568 int h= p->height; |
3005 int x, y; | 3569 int x, y; |
3570 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ | |
3571 SubBand * correlate_band; | |
3006 | 3572 |
3007 if(s->avctx->debug&2048){ | 3573 if(s->avctx->debug&2048){ |
3008 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | 3574 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); |
3009 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); | 3575 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); |
3010 | 3576 |
3013 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; | 3579 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; |
3014 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; | 3580 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; |
3015 } | 3581 } |
3016 } | 3582 } |
3017 } | 3583 } |
3018 for(level=0; level<s->spatial_decomposition_count; level++){ | 3584 |
3019 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | 3585 { START_TIMER |
3020 SubBand *b= &p->band[level][orientation]; | 3586 for(level=0; level<s->spatial_decomposition_count; level++){ |
3021 | 3587 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
3022 decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation); | 3588 SubBand *b= &p->band[level][orientation]; |
3023 if(orientation==0){ | 3589 unpack_coeffs(s, b, b->parent, orientation); |
3024 correlate(s, b, b->buf, b->stride, 1, 0); | 3590 } |
3025 dequantize(s, b, b->buf, b->stride); | 3591 } |
3026 assert(b->buf == s->spatial_dwt_buffer); | 3592 STOP_TIMER("unpack coeffs"); |
3027 } | 3593 } |
3028 } | 3594 |
3029 } | 3595 /* Handle level 0, orientation 0 specially. It is particularly resistant to slicing but fortunately quite small, so process it in one pass. */ |
3596 correlate_band = &p->band[0][0]; | |
3597 decode_subband_slice_buffered(s, correlate_band, &s->sb, 0, correlate_band->height, decode_state[0][0]); | |
3598 correlate_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0); | |
3599 dequantize_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride); | |
3030 | 3600 |
3031 {START_TIMER | 3601 {START_TIMER |
3032 const int mb_h= s->b_height << s->block_max_depth; | 3602 const int mb_h= s->b_height << s->block_max_depth; |
3033 const int block_size = MB_SIZE >> s->block_max_depth; | 3603 const int block_size = MB_SIZE >> s->block_max_depth; |
3034 const int block_w = plane_index ? block_size/2 : block_size; | 3604 const int block_w = plane_index ? block_size/2 : block_size; |
3035 int mb_y; | 3605 int mb_y; |
3036 dwt_compose_t cs[MAX_DECOMPOSITIONS]; | 3606 dwt_compose_t cs[MAX_DECOMPOSITIONS]; |
3037 int yd=0, yq=0; | 3607 int yd=0, yq=0; |
3038 | 3608 int y; |
3039 ff_spatial_idwt_init(cs, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | 3609 int end_y; |
3610 | |
3611 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
3040 for(mb_y=0; mb_y<=mb_h; mb_y++){ | 3612 for(mb_y=0; mb_y<=mb_h; mb_y++){ |
3613 | |
3614 const int slice_starty = block_w*mb_y; | |
3041 const int slice_h = block_w*(mb_y+1); | 3615 const int slice_h = block_w*(mb_y+1); |
3042 for(; yd<slice_h; yd+=4) | 3616 |
3043 ff_spatial_idwt_slice(cs, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | 3617 { |
3044 | 3618 START_TIMER |
3619 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3620 for(orientation=level ? 1 : 1; orientation<4; orientation++){ | |
3621 SubBand *b= &p->band[level][orientation]; | |
3622 int start_y; | |
3623 int end_y; | |
3624 int our_mb_start = mb_y; | |
3625 int our_mb_end = (mb_y + 1); | |
3626 start_y = FFMIN(b->height, (mb_y ? ((block_w * our_mb_start - 4) >> (s->spatial_decomposition_count - level)) + 5 : 0)); | |
3627 end_y = FFMIN(b->height, (((block_w * our_mb_end - 4) >> (s->spatial_decomposition_count - level)) + 5)); | |
3628 | |
3629 if (start_y != end_y) | |
3630 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | |
3631 } | |
3632 } | |
3633 STOP_TIMER("decode_subband_slice"); | |
3634 } | |
3635 | |
3636 { START_TIMER | |
3637 for(; yd<slice_h; yd+=4){ | |
3638 ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | |
3639 } | |
3640 STOP_TIMER("idwt slice");} | |
3641 | |
3642 | |
3045 if(s->qlog == LOSSLESS_QLOG){ | 3643 if(s->qlog == LOSSLESS_QLOG){ |
3046 for(; yq<slice_h && yq<h; yq++){ | 3644 for(; yq<slice_h && yq<h; yq++){ |
3645 DWTELEM * line = slice_buffer_get_line(&s->sb, yq); | |
3047 for(x=0; x<w; x++){ | 3646 for(x=0; x<w; x++){ |
3048 s->spatial_dwt_buffer[yq*w + x]<<=FRAC_BITS; | 3647 line[x] <<= FRAC_BITS; |
3049 } | 3648 } |
3050 } | 3649 } |
3051 } | 3650 } |
3052 | 3651 |
3053 predict_slice(s, s->spatial_dwt_buffer, plane_index, 1, mb_y); | 3652 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y); |
3054 } | 3653 |
3654 /* Nasty hack based empirically on how predict_slice_buffered() hits the buffer. */ | |
3655 /* FIXME If possible, make predict_slice fit into the slice. As of now, it works on some previous lines (up to slice_height / 2) if the condition on the next line is false. */ | |
3656 if (s->keyframe || (s->avctx->debug&512)){ | |
3657 y = FFMIN(p->height, slice_starty); | |
3658 end_y = FFMIN(p->height, slice_h); | |
3659 } | |
3660 else{ | |
3661 y = FFMAX(0, FFMIN(p->height, slice_starty - (block_w >> 1))); | |
3662 end_y = FFMAX(0, FFMIN(p->height, slice_h - (block_w >> 1))); | |
3663 } | |
3664 while(y < end_y) | |
3665 slice_buffer_release(&s->sb, y++); | |
3666 } | |
3667 | |
3668 slice_buffer_flush(&s->sb); | |
3669 | |
3055 STOP_TIMER("idwt + predict_slices")} | 3670 STOP_TIMER("idwt + predict_slices")} |
3056 } | 3671 } |
3057 | 3672 |
3058 emms_c(); | 3673 emms_c(); |
3059 | 3674 |
3075 | 3690 |
3076 static int decode_end(AVCodecContext *avctx) | 3691 static int decode_end(AVCodecContext *avctx) |
3077 { | 3692 { |
3078 SnowContext *s = avctx->priv_data; | 3693 SnowContext *s = avctx->priv_data; |
3079 | 3694 |
3695 slice_buffer_destroy(&s->sb); | |
3696 | |
3080 common_end(s); | 3697 common_end(s); |
3081 | 3698 |
3082 return 0; | 3699 return 0; |
3083 } | 3700 } |
3084 | 3701 |