comparison vorbis_dec.c @ 7764:a34931a8fea9 libavcodec

Optimize vorbis_residue_decode() so that vr->type is a constant. Based on a patch by Siarhei Siamashka.
author michael
date Sun, 31 Aug 2008 22:49:42 +0000
parents 8226017a65ae
children c0745c5b1bb4
comparison
equal deleted inserted replaced
7763:73b10d25cb8d 7764:a34931a8fea9
1222 return 0; 1222 return 0;
1223 } 1223 }
1224 1224
1225 // Read and decode residue 1225 // Read and decode residue
1226 1226
1227 static int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen) { 1227 static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen, int vr_type) {
1228 GetBitContext *gb=&vc->gb; 1228 GetBitContext *gb=&vc->gb;
1229 uint_fast8_t c_p_c=vc->codebooks[vr->classbook].dimensions; 1229 uint_fast8_t c_p_c=vc->codebooks[vr->classbook].dimensions;
1230 uint_fast16_t n_to_read=vr->end-vr->begin; 1230 uint_fast16_t n_to_read=vr->end-vr->begin;
1231 uint_fast16_t ptns_to_read=n_to_read/vr->partition_size; 1231 uint_fast16_t ptns_to_read=n_to_read/vr->partition_size;
1232 uint_fast8_t classifs[ptns_to_read*vc->audio_channels]; 1232 uint_fast8_t classifs[ptns_to_read*vc->audio_channels];
1233 uint_fast8_t pass; 1233 uint_fast8_t pass;
1234 uint_fast8_t ch_used; 1234 uint_fast8_t ch_used;
1235 uint_fast8_t i,j,l; 1235 uint_fast8_t i,j,l;
1236 uint_fast16_t k; 1236 uint_fast16_t k;
1237 1237
1238 if (vr->type==2) { 1238 if (vr_type==2) {
1239 for(j=1;j<ch;++j) { 1239 for(j=1;j<ch;++j) {
1240 do_not_decode[0]&=do_not_decode[j]; // FIXME - clobbering input 1240 do_not_decode[0]&=do_not_decode[j]; // FIXME - clobbering input
1241 } 1241 }
1242 if (do_not_decode[0]) return 0; 1242 if (do_not_decode[0]) return 0;
1243 ch_used=1; 1243 ch_used=1;
1290 unsigned dim= vc->codebooks[vqbook].dimensions; // not uint_fast8_t: 64bit is slower here on amd64 1290 unsigned dim= vc->codebooks[vqbook].dimensions; // not uint_fast8_t: 64bit is slower here on amd64
1291 uint_fast16_t step= dim==1 ? vr->partition_size 1291 uint_fast16_t step= dim==1 ? vr->partition_size
1292 : FASTDIV(vr->partition_size, dim); 1292 : FASTDIV(vr->partition_size, dim);
1293 vorbis_codebook codebook= vc->codebooks[vqbook]; 1293 vorbis_codebook codebook= vc->codebooks[vqbook];
1294 1294
1295 if (vr->type==0) { 1295 if (vr_type==0) {
1296 1296
1297 voffs=voffset+j*vlen; 1297 voffs=voffset+j*vlen;
1298 for(k=0;k<step;++k) { 1298 for(k=0;k<step;++k) {
1299 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; 1299 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
1300 for(l=0;l<dim;++l) { 1300 for(l=0;l<dim;++l) {
1301 vec[voffs+k+l*step]+=codebook.codevectors[coffs+l]; // FPMATH 1301 vec[voffs+k+l*step]+=codebook.codevectors[coffs+l]; // FPMATH
1302 } 1302 }
1303 } 1303 }
1304 } 1304 }
1305 else if (vr->type==1) { 1305 else if (vr_type==1) {
1306 voffs=voffset+j*vlen; 1306 voffs=voffset+j*vlen;
1307 for(k=0;k<step;++k) { 1307 for(k=0;k<step;++k) {
1308 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; 1308 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
1309 for(l=0;l<dim;++l, ++voffs) { 1309 for(l=0;l<dim;++l, ++voffs) {
1310 vec[voffs]+=codebook.codevectors[coffs+l]; // FPMATH 1310 vec[voffs]+=codebook.codevectors[coffs+l]; // FPMATH
1311 1311
1312 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs); 1312 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs);
1313 } 1313 }
1314 } 1314 }
1315 } 1315 }
1316 else if (vr->type==2 && ch==2 && (voffset&1)==0 && (dim&1)==0) { // most frequent case optimized 1316 else if (vr_type==2 && ch==2 && (voffset&1)==0 && (dim&1)==0) { // most frequent case optimized
1317 voffs=voffset>>1; 1317 voffs=voffset>>1;
1318 1318
1319 if(dim==2) { 1319 if(dim==2) {
1320 for(k=0;k<step;++k) { 1320 for(k=0;k<step;++k) {
1321 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * 2; 1321 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * 2;
1340 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); 1340 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l);
1341 } 1341 }
1342 } 1342 }
1343 1343
1344 } 1344 }
1345 else if (vr->type==2) { 1345 else if (vr_type==2) {
1346 voffs=voffset; 1346 voffs=voffset;
1347 1347
1348 for(k=0;k<step;++k) { 1348 for(k=0;k<step;++k) {
1349 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; 1349 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
1350 for(l=0;l<dim;++l, ++voffs) { 1350 for(l=0;l<dim;++l, ++voffs) {
1351 vec[voffs/ch+(voffs%ch)*vlen]+=codebook.codevectors[coffs+l]; // FPMATH FIXME use if and counter instead of / and % 1351 vec[voffs/ch+(voffs%ch)*vlen]+=codebook.codevectors[coffs+l]; // FPMATH FIXME use if and counter instead of / and %
1352 1352
1353 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); 1353 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l);
1354 } 1354 }
1355 } 1355 }
1356 } else {
1357 av_log(vc->avccontext, AV_LOG_ERROR, " Invalid residue type while residue decode?! \n");
1358 return 1;
1359 } 1356 }
1360 } 1357 }
1361 } 1358 }
1362 j_times_ptns_to_read+=ptns_to_read; 1359 j_times_ptns_to_read+=ptns_to_read;
1363 } 1360 }
1365 voffset+=vr->partition_size; 1362 voffset+=vr->partition_size;
1366 } 1363 }
1367 } 1364 }
1368 } 1365 }
1369 return 0; 1366 return 0;
1367 }
1368
1369 static inline int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen)
1370 {
1371 if (vr->type==2)
1372 return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, 2);
1373 else if (vr->type==1)
1374 return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, 1);
1375 else if (vr->type==0)
1376 return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, 0);
1377 else {
1378 av_log(vc->avccontext, AV_LOG_ERROR, " Invalid residue type while residue decode?! \n");
1379 return 1;
1380 }
1370 } 1381 }
1371 1382
1372 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize) 1383 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
1373 { 1384 {
1374 int i; 1385 int i;