comparison svq1.c @ 2012:b7c82b9ef098 libavcodec

p frame encoding, only with 0,0 motion vectors yet though
author michael
date Sun, 09 May 2004 15:50:08 +0000
parents 8c7e7c332b86
children f089d25c82f0
comparison
equal deleted inserted replaced
2011:8c7e7c332b86 2012:b7c82b9ef098
65 typedef struct SVQ1Context { 65 typedef struct SVQ1Context {
66 66
67 AVCodecContext *avctx; 67 AVCodecContext *avctx;
68 DSPContext dsp; 68 DSPContext dsp;
69 AVFrame picture; 69 AVFrame picture;
70 AVFrame ref_picture[2]; 70 AVFrame current_picture;
71 AVFrame last_picture;
71 PutBitContext pb; 72 PutBitContext pb;
72 GetBitContext gb; 73 GetBitContext gb;
73 74
74 PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex 75 PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex
75 76
832 init_vlc(&svq1_block_type, 2, 4, 833 init_vlc(&svq1_block_type, 2, 4,
833 &svq1_block_type_vlc[0][1], 2, 1, 834 &svq1_block_type_vlc[0][1], 2, 1,
834 &svq1_block_type_vlc[0][0], 2, 1); 835 &svq1_block_type_vlc[0][0], 2, 1);
835 836
836 init_vlc(&svq1_motion_component, 7, 65, 837 init_vlc(&svq1_motion_component, 7, 65,
837 &svq1_motion_component_vlc[0][1], 4, 2, 838 &svq1_motion_component_vlc[0][1], 2, 1,
838 &svq1_motion_component_vlc[0][0], 4, 2); 839 &svq1_motion_component_vlc[0][0], 2, 1);
839 840
840 for (i = 0; i < 6; i++) { 841 for (i = 0; i < 6; i++) {
841 init_vlc(&svq1_intra_multistage[i], 3, 8, 842 init_vlc(&svq1_intra_multistage[i], 3, 8,
842 &svq1_intra_multistage_vlc[i][0][1], 2, 1, 843 &svq1_intra_multistage_vlc[i][0][1], 2, 1,
843 &svq1_intra_multistage_vlc[i][0][0], 2, 1); 844 &svq1_intra_multistage_vlc[i][0][0], 2, 1);
1309 1310
1310 return ret; 1311 return ret;
1311 } 1312 }
1312 #endif 1313 #endif
1313 1314
1314 static int encode_block(SVQ1Context *s, uint8_t *src, int stride, int level, int threshold, int lambda){ 1315 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
1315 int count, y, x, i, j, split, best_mean, best_score, best_count; 1316 int count, y, x, i, j, split, best_mean, best_score, best_count;
1316 int best_vector[6]; 1317 int best_vector[6];
1317 int block_sum[7]= {0, 0, 0, 0, 0, 0}; 1318 int block_sum[7]= {0, 0, 0, 0, 0, 0};
1318 int w= 2<<((level+2)>>1); 1319 int w= 2<<((level+2)>>1);
1319 int h= 2<<((level+1)>>1); 1320 int h= 2<<((level+1)>>1);
1320 int size=w*h; 1321 int size=w*h;
1321 int16_t block[7][256]; 1322 int16_t block[7][256];
1322 const int intra= 1;
1323 const int8_t *codebook_sum, *codebook; 1323 const int8_t *codebook_sum, *codebook;
1324 const uint16_t (*mean_vlc)[2]; 1324 const uint16_t (*mean_vlc)[2];
1325 const uint8_t (*multistage_vlc)[2]; 1325 const uint8_t (*multistage_vlc)[2];
1326 1326
1327 best_score=0;
1328 //FIXME optimize, this doenst need to be done multiple times
1327 if(intra){ 1329 if(intra){
1328 codebook_sum= svq1_intra_codebook_sum[level]; 1330 codebook_sum= svq1_intra_codebook_sum[level];
1329 codebook= svq1_intra_codebooks[level]; 1331 codebook= svq1_intra_codebooks[level];
1330 mean_vlc= svq1_intra_mean_vlc; 1332 mean_vlc= svq1_intra_mean_vlc;
1331 multistage_vlc= svq1_intra_multistage_vlc[level]; 1333 multistage_vlc= svq1_intra_multistage_vlc[level];
1334 for(y=0; y<h; y++){
1335 for(x=0; x<w; x++){
1336 int v= src[x + y*stride];
1337 block[0][x + w*y]= v;
1338 best_score += v*v;
1339 block_sum[0] += v;
1340 }
1341 }
1332 }else{ 1342 }else{
1333 codebook_sum= svq1_inter_codebook_sum[level]; 1343 codebook_sum= svq1_inter_codebook_sum[level];
1334 codebook= svq1_inter_codebooks[level]; 1344 codebook= svq1_inter_codebooks[level];
1335 mean_vlc= svq1_inter_mean_vlc; 1345 mean_vlc= svq1_inter_mean_vlc + 256;
1336 multistage_vlc= svq1_inter_multistage_vlc[level]; 1346 multistage_vlc= svq1_inter_multistage_vlc[level];
1337 } 1347 for(y=0; y<h; y++){
1338 1348 for(x=0; x<w; x++){
1339 best_score=0; 1349 int v= src[x + y*stride] - ref[x + y*stride];
1340 for(y=0; y<h; y++){ 1350 block[0][x + w*y]= v;
1341 for(x=0; x<w; x++){ 1351 best_score += v*v;
1342 int v= src[x + y*stride]; 1352 block_sum[0] += v;
1343 block[0][x + w*y]= v; 1353 }
1344 best_score += v*v;
1345 block_sum[0] += v;
1346 } 1354 }
1347 } 1355 }
1348 1356
1349 best_count=0; 1357 best_count=0;
1350 best_score -= ((block_sum[0]*block_sum[0])>>(level+3)); 1358 best_score -= ((block_sum[0]*block_sum[0])>>(level+3));
1351 best_mean= (block_sum[0] + (size>>1)) >> (level+3); 1359 best_mean= (block_sum[0] + (size>>1)) >> (level+3);
1352 1360
1353 if(level<4){ 1361 if(level<4){
1354 for(count=1; count<7; count++){ 1362 for(count=1; count<7; count++){
1355 int best_vector_score= INT_MAX; 1363 int best_vector_score= INT_MAX;
1356 int best_vector_sum=-99, best_vector_mean=-99; 1364 int best_vector_sum=-999, best_vector_mean=-999;
1357 const int stage= count-1; 1365 const int stage= count-1;
1358 const int8_t *vector; 1366 const int8_t *vector;
1359 1367
1360 for(i=0; i<16; i++){ 1368 for(i=0; i<16; i++){
1361 int sum= codebook_sum[stage*16 + i]; 1369 int sum= codebook_sum[stage*16 + i];
1368 int v= vector[j]; 1376 int v= vector[j];
1369 sqr += (v - block[stage][j])*(v - block[stage][j]); 1377 sqr += (v - block[stage][j])*(v - block[stage][j]);
1370 } 1378 }
1371 diff= block_sum[stage] - sum; 1379 diff= block_sum[stage] - sum;
1372 mean= (diff + (size>>1)) >> (level+3); 1380 mean= (diff + (size>>1)) >> (level+3);
1373 assert(mean >-50 && mean<300); 1381 assert(mean >-300 && mean<300);
1374 mean= clip(mean, 0, 255); 1382 if(intra) mean= clip(mean, 0, 255);
1383 else mean= clip(mean, -256, 255);
1375 score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow 1384 score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow
1376 if(score < best_vector_score){ 1385 if(score < best_vector_score){
1377 best_vector_score= score; 1386 best_vector_score= score;
1378 best_vector[stage]= i; 1387 best_vector[stage]= i;
1379 best_vector_sum= sum; 1388 best_vector_sum= sum;
1380 best_vector_mean= mean; 1389 best_vector_mean= mean;
1381 } 1390 }
1382 } 1391 }
1383 assert(best_vector_mean != -99); 1392 assert(best_vector_mean != -999);
1384 vector= codebook + stage*size*16 + best_vector[stage]*size; 1393 vector= codebook + stage*size*16 + best_vector[stage]*size;
1385 for(j=0; j<size; j++){ 1394 for(j=0; j<size; j++){
1386 block[stage+1][j] = block[stage][j] - vector[j]; 1395 block[stage+1][j] = block[stage][j] - vector[j];
1387 } 1396 }
1388 block_sum[stage+1]= block_sum[stage] - best_vector_sum; 1397 block_sum[stage+1]= block_sum[stage] - best_vector_sum;
1406 PutBitContext backup[6]; 1415 PutBitContext backup[6];
1407 1416
1408 for(i=level-1; i>=0; i--){ 1417 for(i=level-1; i>=0; i--){
1409 backup[i]= s->reorder_pb[i]; 1418 backup[i]= s->reorder_pb[i];
1410 } 1419 }
1411 score += encode_block(s, src , stride, level-1, threshold>>1, lambda); 1420 score += encode_block(s, src , ref , decoded , stride, level-1, threshold>>1, lambda, intra);
1412 score += encode_block(s, src + offset, stride, level-1, threshold>>1, lambda); 1421 score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
1413 score += lambda; 1422 score += lambda;
1414 1423
1415 if(score < best_score){ 1424 if(score < best_score){
1416 best_score= score; 1425 best_score= score;
1417 split=1; 1426 split=1;
1423 } 1432 }
1424 if (level > 0) 1433 if (level > 0)
1425 put_bits(&s->reorder_pb[level], 1, split); 1434 put_bits(&s->reorder_pb[level], 1, split);
1426 1435
1427 if(!split){ 1436 if(!split){
1428 assert(best_mean >= 0 && best_mean<256); 1437 assert((best_mean >= 0 && best_mean<256) || !intra);
1438 assert(best_mean >= -256 && best_mean<256);
1429 assert(best_count >=0 && best_count<7); 1439 assert(best_count >=0 && best_count<7);
1430 assert(level<4 || best_count==0); 1440 assert(level<4 || best_count==0);
1431 1441
1432 /* output the encoding */ 1442 /* output the encoding */
1433 put_bits(&s->reorder_pb[level], 1443 put_bits(&s->reorder_pb[level],
1438 1448
1439 for (i = 0; i < best_count; i++){ 1449 for (i = 0; i < best_count; i++){
1440 assert(best_vector[i]>=0 && best_vector[i]<16); 1450 assert(best_vector[i]>=0 && best_vector[i]<16);
1441 put_bits(&s->reorder_pb[level], 4, best_vector[i]); 1451 put_bits(&s->reorder_pb[level], 4, best_vector[i]);
1442 } 1452 }
1453
1454 for(y=0; y<h; y++){
1455 for(x=0; x<w; x++){
1456 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
1457 }
1458 }
1443 } 1459 }
1444 1460
1445 return best_score; 1461 return best_score;
1446 } 1462 }
1447 1463
1448 static void svq1_encode_plane(SVQ1Context *s, unsigned char *plane, 1464 static void svq1_encode_plane(SVQ1Context *s, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
1449 int width, int height, int stride) 1465 int width, int height, int src_stride, int stride)
1450 { 1466 {
1451 unsigned char buffer0[256]; 1467 unsigned char buffer0[256];
1452 unsigned char buffer1[256]; 1468 unsigned char buffer1[256];
1453 int current_buffer; 1469 int current_buffer;
1454 unsigned char *vector; 1470 unsigned char *vector;
1459 int i, j; 1475 int i, j;
1460 int block_width, block_height; 1476 int block_width, block_height;
1461 int left_edge; 1477 int left_edge;
1462 int level; 1478 int level;
1463 int threshold[6]; 1479 int threshold[6];
1480 const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
1464 1481
1465 static int frame = 0; 1482 static int frame = 0;
1466 1483
1467 #ifdef DEBUG_SVQ1 1484 #ifdef DEBUG_SVQ1
1468 av_log(s->avctx, AV_LOG_INFO, "********* frame #%d\n", frame++); 1485 av_log(s->avctx, AV_LOG_INFO, "********* frame #%d\n", frame++);
1475 1492
1476 block_width = (width + 15) / 16; 1493 block_width = (width + 15) / 16;
1477 block_height = (height + 15) / 16; 1494 block_height = (height + 15) / 16;
1478 1495
1479 for (y = 0; y < block_height; y++) { 1496 for (y = 0; y < block_height; y++) {
1497 uint8_t src[stride*16];
1498
1499 for(i=0; i<16 && i + 16*y<height; i++){
1500 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1501 for(x=width; x<16*block_width; x++)
1502 src[i*stride+x]= src[i*stride+x-1];
1503 }
1504 for(; i<16 && i + 16*y<16*block_height; i++)
1505 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1480 1506
1481 for (x = 0; x < block_width; x++) { 1507 for (x = 0; x < block_width; x++) {
1482 uint8_t reorder_buffer[6][7*32]; 1508 uint8_t reorder_buffer[2][6][7*32];
1483 uint8_t *src= plane + y * 16 * stride + x * 16; 1509 int count[2][6];
1484 uint8_t buf[stride*16]; 1510 int offset = y * 16 * stride + x * 16;
1511 uint8_t *decoded= decoded_plane + offset;
1512 uint8_t *ref= ref_plane + offset;
1513 int score[2]={0,0}, best;
1514 uint8_t temp[16*stride];
1485 1515
1486 #ifdef DEBUG_SVQ1 1516 #ifdef DEBUG_SVQ1
1487 av_log(s->avctx, AV_LOG_INFO, "* level 5 vector @ %d, %d:\n", x * 16, y * 16); 1517 av_log(s->avctx, AV_LOG_INFO, "* level 5 vector @ %d, %d:\n", x * 16, y * 16);
1488 #endif 1518 #endif
1489
1490 /* copy the block into the current work buffer */
1491 left_edge = (y * 16 * stride) + (x * 16);
1492 1519
1493 for(i=0; i<6; i++){ 1520 for(i=0; i<6; i++){
1494 init_put_bits(&s->reorder_pb[i], reorder_buffer[i], 7*32); 1521 init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
1495 } 1522 }
1496 if(x*16 + 16 > width || y*16 + 16 > height){ 1523 if(s->picture.pict_type == P_TYPE){
1497 ff_emulated_edge_mc(buf, src, stride, 16, 16, 16*x, 16*y, width, height); 1524 const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
1498 src= buf; 1525 put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1526 score[0]= vlc[1]*lambda;
1499 } 1527 }
1500 s->rd_total += encode_block(s, src, stride, 5, 256, (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT)); 1528 score[0]+= encode_block(s, src+16*x, ref, temp, stride, 5, 64, lambda, 1);
1529 for(i=0; i<6; i++){
1530 count[0][i]= put_bits_count(&s->reorder_pb[i]);
1531 flush_put_bits(&s->reorder_pb[i]);
1532 init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
1533 }
1534 if(s->picture.pict_type == P_TYPE){
1535 const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTER];
1536 put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1537 score[1] = vlc[1]*lambda;
1538 for(i=0; i<2; i++){
1539 vlc= svq1_motion_component_vlc[32];
1540 put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1541 score[1] += vlc[1]*lambda;
1542 }
1543
1544 score[1]+= encode_block(s, src+16*x, ref, decoded, stride, 5, 64, lambda, 0);
1545 best= score[1] <= score[0];
1546 if(best==1){
1547 for(i=0; i<6; i++){
1548 count[1][i]= put_bits_count(&s->reorder_pb[i]);
1549 flush_put_bits(&s->reorder_pb[i]);
1550 }
1551 }
1552 }else
1553 best= 0;
1554
1555 s->rd_total += score[best];
1556
1501 for(i=5; i>=0; i--){ 1557 for(i=5; i>=0; i--){
1502 int count= put_bits_count(&s->reorder_pb[i]); 1558 ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
1503 1559 }
1504 flush_put_bits(&s->reorder_pb[i]); 1560 if(best==0){
1505 ff_copy_bits(&s->pb, s->reorder_pb[i].buf, count); 1561 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
1506 } 1562 }
1507 1563
1508 #if 0 1564 #if 0
1509 for (i = 0; i < 256; i += 16) { 1565 for (i = 0; i < 256; i += 16) {
1510 memcpy(&buffer0[i], &plane[left_edge], 16); 1566 memcpy(&buffer0[i], &plane[left_edge], 16);
1663 int buf_size, void *data) 1719 int buf_size, void *data)
1664 { 1720 {
1665 SVQ1Context * const s = avctx->priv_data; 1721 SVQ1Context * const s = avctx->priv_data;
1666 AVFrame *pict = data; 1722 AVFrame *pict = data;
1667 AVFrame * const p= (AVFrame*)&s->picture; 1723 AVFrame * const p= (AVFrame*)&s->picture;
1668 1724 AVFrame temp;
1725 int i;
1726
1727 if(avctx->pix_fmt != PIX_FMT_YUV410P){
1728 av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
1729 return -1;
1730 }
1731
1732 if(!s->current_picture.data[0]){
1733 avctx->get_buffer(avctx, &s->current_picture);
1734 avctx->get_buffer(avctx, &s->last_picture);
1735 }
1736
1737 temp= s->current_picture;
1738 s->current_picture= s->last_picture;
1739 s->last_picture= temp;
1740
1669 init_put_bits(&s->pb, buf, buf_size); 1741 init_put_bits(&s->pb, buf, buf_size);
1670 1742
1671 *p = *pict; 1743 *p = *pict;
1672 p->pict_type = I_TYPE; 1744 p->pict_type = avctx->frame_number % avctx->gop_size ? P_TYPE : I_TYPE;
1673 p->key_frame = p->pict_type == I_TYPE; 1745 p->key_frame = p->pict_type == I_TYPE;
1674 1746
1675 svq1_write_header(s, p->pict_type); 1747 svq1_write_header(s, p->pict_type);
1676 svq1_encode_plane(s, s->picture.data[0], s->frame_width, s->frame_height, 1748 for(i=0; i<3; i++){
1677 s->picture.linesize[0]); 1749 svq1_encode_plane(s,
1678 // if (avctx->flags & CODEC_FLAG_GRAY) { 1750 s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
1679 if (avctx->pix_fmt != PIX_FMT_YUV410P) { 1751 s->frame_width / (i?4:1), s->frame_height / (i?4:1),
1680 svq1_output_intra_constant_mean(s, s->c_block_width * 2, 1752 s->picture.linesize[i], s->current_picture.linesize[i]);
1681 s->c_block_height * 2, 128);
1682 } else {
1683 svq1_encode_plane(s, s->picture.data[1], s->frame_width / 4,
1684 s->frame_height / 4, s->picture.linesize[1]);
1685 svq1_encode_plane(s, s->picture.data[2], s->frame_width / 4,
1686 s->frame_height / 4, s->picture.linesize[2]);
1687 } 1753 }
1688 1754
1689 // align_put_bits(&s->pb); 1755 // align_put_bits(&s->pb);
1690 while(put_bits_count(&s->pb) & 31) 1756 while(put_bits_count(&s->pb) & 31)
1691 put_bits(&s->pb, 1, 0); 1757 put_bits(&s->pb, 1, 0);