comparison vorbis.c @ 3570:991ef6ade276 libavcodec

misc tweaks in vorbis_residue_decode(). 4% faster vorbis.
author lorenm
date Thu, 10 Aug 2006 21:55:31 +0000
parents 945caa35ee9a
children 3bfe36a9db73
comparison
equal deleted inserted replaced
3569:c42c03f3b402 3570:991ef6ade276
1372 uint_fast16_t j_times_ptns_to_read; 1372 uint_fast16_t j_times_ptns_to_read;
1373 1373
1374 voffset=vr->begin; 1374 voffset=vr->begin;
1375 for(partition_count=0;partition_count<ptns_to_read;) { // SPEC error 1375 for(partition_count=0;partition_count<ptns_to_read;) { // SPEC error
1376 if (!pass) { 1376 if (!pass) {
1377 uint_fast32_t inverse_class = inverse[vr->classifications];
1377 for(j_times_ptns_to_read=0, j=0;j<ch_used;++j) { 1378 for(j_times_ptns_to_read=0, j=0;j<ch_used;++j) {
1378 if (!do_not_decode[j]) { 1379 if (!do_not_decode[j]) {
1379 uint_fast32_t temp=get_vlc2(gb, vc->codebooks[vr->classbook].vlc.table, 1380 uint_fast32_t temp=get_vlc2(gb, vc->codebooks[vr->classbook].vlc.table,
1380 vc->codebooks[vr->classbook].nb_bits, 3); 1381 vc->codebooks[vr->classbook].nb_bits, 3);
1381 1382
1383 1384
1384 assert(vr->classifications > 1 && temp<=65536); //needed for inverse[] 1385 assert(vr->classifications > 1 && temp<=65536); //needed for inverse[]
1385 for(i=0;i<c_p_c;++i) { 1386 for(i=0;i<c_p_c;++i) {
1386 uint_fast32_t temp2; 1387 uint_fast32_t temp2;
1387 1388
1388 temp2=(((uint_fast64_t)temp) * inverse[vr->classifications])>>32; 1389 temp2=(((uint_fast64_t)temp) * inverse_class)>>32;
1389 if (partition_count+c_p_c-1-i < ptns_to_read) { 1390 if (partition_count+c_p_c-1-i < ptns_to_read) {
1390 classifs[j_times_ptns_to_read+partition_count+c_p_c-1-i]=temp-temp2*vr->classifications; 1391 classifs[j_times_ptns_to_read+partition_count+c_p_c-1-i]=temp-temp2*vr->classifications;
1391 } 1392 }
1392 temp=temp2; 1393 temp=temp2;
1393 } 1394 }
1403 uint_fast8_t vqclass=classifs[j_times_ptns_to_read+partition_count]; 1404 uint_fast8_t vqclass=classifs[j_times_ptns_to_read+partition_count];
1404 int_fast16_t vqbook=vr->books[vqclass][pass]; 1405 int_fast16_t vqbook=vr->books[vqclass][pass];
1405 1406
1406 if (vqbook>=0) { 1407 if (vqbook>=0) {
1407 uint_fast16_t coffs; 1408 uint_fast16_t coffs;
1408 uint_fast8_t dim= vc->codebooks[vqbook].dimensions; 1409 unsigned dim= vc->codebooks[vqbook].dimensions; // not uint_fast8_t: 64bit is slower here on amd64
1409 uint_fast16_t step= dim==1 ? vr->partition_size 1410 uint_fast16_t step= dim==1 ? vr->partition_size
1410 : FASTDIV(vr->partition_size, dim); 1411 : FASTDIV(vr->partition_size, dim);
1411 vorbis_codebook codebook= vc->codebooks[vqbook]; 1412 vorbis_codebook codebook= vc->codebooks[vqbook];
1412 1413
1413 if (vr->type==0) { 1414 if (vr->type==0) {
1414 1415
1415 voffs=voffset+j*vlen; 1416 voffs=voffset+j*vlen;
1416 for(k=0;k<step;++k) { 1417 for(k=0;k<step;++k) {
1417 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions; 1418 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
1418 for(l=0;l<codebook.dimensions;++l) { 1419 for(l=0;l<dim;++l) {
1419 vec[voffs+k+l*step]+=codebook.codevectors[coffs+l]; // FPMATH 1420 vec[voffs+k+l*step]+=codebook.codevectors[coffs+l]; // FPMATH
1420 } 1421 }
1421 } 1422 }
1422 } 1423 }
1423 else if (vr->type==1) { 1424 else if (vr->type==1) {
1424 voffs=voffset+j*vlen; 1425 voffs=voffset+j*vlen;
1425 for(k=0;k<step;++k) { 1426 for(k=0;k<step;++k) {
1426 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions; 1427 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
1427 for(l=0;l<codebook.dimensions;++l, ++voffs) { 1428 for(l=0;l<dim;++l, ++voffs) {
1428 vec[voffs]+=codebook.codevectors[coffs+l]; // FPMATH 1429 vec[voffs]+=codebook.codevectors[coffs+l]; // FPMATH
1429 1430
1430 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs); 1431 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs);
1431 } 1432 }
1432 } 1433 }
1433 } 1434 }
1434 else if (vr->type==2 && ch==2 && (voffset&1)==0 && (codebook.dimensions&1)==0) { // most frequent case optimized 1435 else if (vr->type==2 && ch==2 && (voffset&1)==0 && (dim&1)==0) { // most frequent case optimized
1435 voffs=voffset>>1; 1436 voffs=voffset>>1;
1436 1437
1438 if(dim==2) {
1439 for(k=0;k<step;++k) {
1440 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * 2;
1441 vec[voffs+k ]+=codebook.codevectors[coffs ]; // FPMATH
1442 vec[voffs+k+vlen]+=codebook.codevectors[coffs+1]; // FPMATH
1443 }
1444 } else
1437 for(k=0;k<step;++k) { 1445 for(k=0;k<step;++k) {
1438 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions; 1446 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
1439 for(l=0;l<codebook.dimensions;l+=2, voffs++) { 1447 for(l=0;l<dim;l+=2, voffs++) {
1440 vec[voffs ]+=codebook.codevectors[coffs+l ]; // FPMATH 1448 vec[voffs ]+=codebook.codevectors[coffs+l ]; // FPMATH
1441 vec[voffs+vlen]+=codebook.codevectors[coffs+l+1]; // FPMATH 1449 vec[voffs+vlen]+=codebook.codevectors[coffs+l+1]; // FPMATH
1442 1450
1443 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); 1451 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l);
1444 } 1452 }
1447 } 1455 }
1448 else if (vr->type==2) { 1456 else if (vr->type==2) {
1449 voffs=voffset; 1457 voffs=voffset;
1450 1458
1451 for(k=0;k<step;++k) { 1459 for(k=0;k<step;++k) {
1452 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions; 1460 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
1453 for(l=0;l<codebook.dimensions;++l, ++voffs) { 1461 for(l=0;l<dim;++l, ++voffs) {
1454 vec[voffs/ch+(voffs%ch)*vlen]+=codebook.codevectors[coffs+l]; // FPMATH FIXME use if and counter instead of / and % 1462 vec[voffs/ch+(voffs%ch)*vlen]+=codebook.codevectors[coffs+l]; // FPMATH FIXME use if and counter instead of / and %
1455 1463
1456 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); 1464 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l);
1457 } 1465 }
1458 } 1466 }