Mercurial > libavcodec.hg
comparison vorbis.c @ 3570:991ef6ade276 libavcodec
misc tweaks in vorbis_residue_decode().
4% faster vorbis.
author | lorenm |
---|---|
date | Thu, 10 Aug 2006 21:55:31 +0000 |
parents | 945caa35ee9a |
children | 3bfe36a9db73 |
comparison
equal
deleted
inserted
replaced
3569:c42c03f3b402 | 3570:991ef6ade276 |
---|---|
1372 uint_fast16_t j_times_ptns_to_read; | 1372 uint_fast16_t j_times_ptns_to_read; |
1373 | 1373 |
1374 voffset=vr->begin; | 1374 voffset=vr->begin; |
1375 for(partition_count=0;partition_count<ptns_to_read;) { // SPEC error | 1375 for(partition_count=0;partition_count<ptns_to_read;) { // SPEC error |
1376 if (!pass) { | 1376 if (!pass) { |
1377 uint_fast32_t inverse_class = inverse[vr->classifications]; | |
1377 for(j_times_ptns_to_read=0, j=0;j<ch_used;++j) { | 1378 for(j_times_ptns_to_read=0, j=0;j<ch_used;++j) { |
1378 if (!do_not_decode[j]) { | 1379 if (!do_not_decode[j]) { |
1379 uint_fast32_t temp=get_vlc2(gb, vc->codebooks[vr->classbook].vlc.table, | 1380 uint_fast32_t temp=get_vlc2(gb, vc->codebooks[vr->classbook].vlc.table, |
1380 vc->codebooks[vr->classbook].nb_bits, 3); | 1381 vc->codebooks[vr->classbook].nb_bits, 3); |
1381 | 1382 |
1383 | 1384 |
1384 assert(vr->classifications > 1 && temp<=65536); //needed for inverse[] | 1385 assert(vr->classifications > 1 && temp<=65536); //needed for inverse[] |
1385 for(i=0;i<c_p_c;++i) { | 1386 for(i=0;i<c_p_c;++i) { |
1386 uint_fast32_t temp2; | 1387 uint_fast32_t temp2; |
1387 | 1388 |
1388 temp2=(((uint_fast64_t)temp) * inverse[vr->classifications])>>32; | 1389 temp2=(((uint_fast64_t)temp) * inverse_class)>>32; |
1389 if (partition_count+c_p_c-1-i < ptns_to_read) { | 1390 if (partition_count+c_p_c-1-i < ptns_to_read) { |
1390 classifs[j_times_ptns_to_read+partition_count+c_p_c-1-i]=temp-temp2*vr->classifications; | 1391 classifs[j_times_ptns_to_read+partition_count+c_p_c-1-i]=temp-temp2*vr->classifications; |
1391 } | 1392 } |
1392 temp=temp2; | 1393 temp=temp2; |
1393 } | 1394 } |
1403 uint_fast8_t vqclass=classifs[j_times_ptns_to_read+partition_count]; | 1404 uint_fast8_t vqclass=classifs[j_times_ptns_to_read+partition_count]; |
1404 int_fast16_t vqbook=vr->books[vqclass][pass]; | 1405 int_fast16_t vqbook=vr->books[vqclass][pass]; |
1405 | 1406 |
1406 if (vqbook>=0) { | 1407 if (vqbook>=0) { |
1407 uint_fast16_t coffs; | 1408 uint_fast16_t coffs; |
1408 uint_fast8_t dim= vc->codebooks[vqbook].dimensions; | 1409 unsigned dim= vc->codebooks[vqbook].dimensions; // not uint_fast8_t: 64bit is slower here on amd64 |
1409 uint_fast16_t step= dim==1 ? vr->partition_size | 1410 uint_fast16_t step= dim==1 ? vr->partition_size |
1410 : FASTDIV(vr->partition_size, dim); | 1411 : FASTDIV(vr->partition_size, dim); |
1411 vorbis_codebook codebook= vc->codebooks[vqbook]; | 1412 vorbis_codebook codebook= vc->codebooks[vqbook]; |
1412 | 1413 |
1413 if (vr->type==0) { | 1414 if (vr->type==0) { |
1414 | 1415 |
1415 voffs=voffset+j*vlen; | 1416 voffs=voffset+j*vlen; |
1416 for(k=0;k<step;++k) { | 1417 for(k=0;k<step;++k) { |
1417 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions; | 1418 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; |
1418 for(l=0;l<codebook.dimensions;++l) { | 1419 for(l=0;l<dim;++l) { |
1419 vec[voffs+k+l*step]+=codebook.codevectors[coffs+l]; // FPMATH | 1420 vec[voffs+k+l*step]+=codebook.codevectors[coffs+l]; // FPMATH |
1420 } | 1421 } |
1421 } | 1422 } |
1422 } | 1423 } |
1423 else if (vr->type==1) { | 1424 else if (vr->type==1) { |
1424 voffs=voffset+j*vlen; | 1425 voffs=voffset+j*vlen; |
1425 for(k=0;k<step;++k) { | 1426 for(k=0;k<step;++k) { |
1426 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions; | 1427 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; |
1427 for(l=0;l<codebook.dimensions;++l, ++voffs) { | 1428 for(l=0;l<dim;++l, ++voffs) { |
1428 vec[voffs]+=codebook.codevectors[coffs+l]; // FPMATH | 1429 vec[voffs]+=codebook.codevectors[coffs+l]; // FPMATH |
1429 | 1430 |
1430 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs); | 1431 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs); |
1431 } | 1432 } |
1432 } | 1433 } |
1433 } | 1434 } |
1434 else if (vr->type==2 && ch==2 && (voffset&1)==0 && (codebook.dimensions&1)==0) { // most frequent case optimized | 1435 else if (vr->type==2 && ch==2 && (voffset&1)==0 && (dim&1)==0) { // most frequent case optimized |
1435 voffs=voffset>>1; | 1436 voffs=voffset>>1; |
1436 | 1437 |
1438 if(dim==2) { | |
1439 for(k=0;k<step;++k) { | |
1440 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * 2; | |
1441 vec[voffs+k ]+=codebook.codevectors[coffs ]; // FPMATH | |
1442 vec[voffs+k+vlen]+=codebook.codevectors[coffs+1]; // FPMATH | |
1443 } | |
1444 } else | |
1437 for(k=0;k<step;++k) { | 1445 for(k=0;k<step;++k) { |
1438 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions; | 1446 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; |
1439 for(l=0;l<codebook.dimensions;l+=2, voffs++) { | 1447 for(l=0;l<dim;l+=2, voffs++) { |
1440 vec[voffs ]+=codebook.codevectors[coffs+l ]; // FPMATH | 1448 vec[voffs ]+=codebook.codevectors[coffs+l ]; // FPMATH |
1441 vec[voffs+vlen]+=codebook.codevectors[coffs+l+1]; // FPMATH | 1449 vec[voffs+vlen]+=codebook.codevectors[coffs+l+1]; // FPMATH |
1442 | 1450 |
1443 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); | 1451 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); |
1444 } | 1452 } |
1447 } | 1455 } |
1448 else if (vr->type==2) { | 1456 else if (vr->type==2) { |
1449 voffs=voffset; | 1457 voffs=voffset; |
1450 | 1458 |
1451 for(k=0;k<step;++k) { | 1459 for(k=0;k<step;++k) { |
1452 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions; | 1460 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; |
1453 for(l=0;l<codebook.dimensions;++l, ++voffs) { | 1461 for(l=0;l<dim;++l, ++voffs) { |
1454 vec[voffs/ch+(voffs%ch)*vlen]+=codebook.codevectors[coffs+l]; // FPMATH FIXME use if and counter instead of / and % | 1462 vec[voffs/ch+(voffs%ch)*vlen]+=codebook.codevectors[coffs+l]; // FPMATH FIXME use if and counter instead of / and % |
1455 | 1463 |
1456 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); | 1464 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); |
1457 } | 1465 } |
1458 } | 1466 } |