Mercurial > libavcodec.hg
comparison vorbis_dec.c @ 7764:a34931a8fea9 libavcodec
Optimize vorbis_residue_decode() so that vr->type is a constant.
Based on a patch by Siarhei Siamashka.
author | michael |
---|---|
date | Sun, 31 Aug 2008 22:49:42 +0000 |
parents | 8226017a65ae |
children | c0745c5b1bb4 |
comparison
equal
deleted
inserted
replaced
7763:73b10d25cb8d | 7764:a34931a8fea9 |
---|---|
1222 return 0; | 1222 return 0; |
1223 } | 1223 } |
1224 | 1224 |
1225 // Read and decode residue | 1225 // Read and decode residue |
1226 | 1226 |
1227 static int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen) { | 1227 static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen, int vr_type) { |
1228 GetBitContext *gb=&vc->gb; | 1228 GetBitContext *gb=&vc->gb; |
1229 uint_fast8_t c_p_c=vc->codebooks[vr->classbook].dimensions; | 1229 uint_fast8_t c_p_c=vc->codebooks[vr->classbook].dimensions; |
1230 uint_fast16_t n_to_read=vr->end-vr->begin; | 1230 uint_fast16_t n_to_read=vr->end-vr->begin; |
1231 uint_fast16_t ptns_to_read=n_to_read/vr->partition_size; | 1231 uint_fast16_t ptns_to_read=n_to_read/vr->partition_size; |
1232 uint_fast8_t classifs[ptns_to_read*vc->audio_channels]; | 1232 uint_fast8_t classifs[ptns_to_read*vc->audio_channels]; |
1233 uint_fast8_t pass; | 1233 uint_fast8_t pass; |
1234 uint_fast8_t ch_used; | 1234 uint_fast8_t ch_used; |
1235 uint_fast8_t i,j,l; | 1235 uint_fast8_t i,j,l; |
1236 uint_fast16_t k; | 1236 uint_fast16_t k; |
1237 | 1237 |
1238 if (vr->type==2) { | 1238 if (vr_type==2) { |
1239 for(j=1;j<ch;++j) { | 1239 for(j=1;j<ch;++j) { |
1240 do_not_decode[0]&=do_not_decode[j]; // FIXME - clobbering input | 1240 do_not_decode[0]&=do_not_decode[j]; // FIXME - clobbering input |
1241 } | 1241 } |
1242 if (do_not_decode[0]) return 0; | 1242 if (do_not_decode[0]) return 0; |
1243 ch_used=1; | 1243 ch_used=1; |
1290 unsigned dim= vc->codebooks[vqbook].dimensions; // not uint_fast8_t: 64bit is slower here on amd64 | 1290 unsigned dim= vc->codebooks[vqbook].dimensions; // not uint_fast8_t: 64bit is slower here on amd64 |
1291 uint_fast16_t step= dim==1 ? vr->partition_size | 1291 uint_fast16_t step= dim==1 ? vr->partition_size |
1292 : FASTDIV(vr->partition_size, dim); | 1292 : FASTDIV(vr->partition_size, dim); |
1293 vorbis_codebook codebook= vc->codebooks[vqbook]; | 1293 vorbis_codebook codebook= vc->codebooks[vqbook]; |
1294 | 1294 |
1295 if (vr->type==0) { | 1295 if (vr_type==0) { |
1296 | 1296 |
1297 voffs=voffset+j*vlen; | 1297 voffs=voffset+j*vlen; |
1298 for(k=0;k<step;++k) { | 1298 for(k=0;k<step;++k) { |
1299 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; | 1299 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; |
1300 for(l=0;l<dim;++l) { | 1300 for(l=0;l<dim;++l) { |
1301 vec[voffs+k+l*step]+=codebook.codevectors[coffs+l]; // FPMATH | 1301 vec[voffs+k+l*step]+=codebook.codevectors[coffs+l]; // FPMATH |
1302 } | 1302 } |
1303 } | 1303 } |
1304 } | 1304 } |
1305 else if (vr->type==1) { | 1305 else if (vr_type==1) { |
1306 voffs=voffset+j*vlen; | 1306 voffs=voffset+j*vlen; |
1307 for(k=0;k<step;++k) { | 1307 for(k=0;k<step;++k) { |
1308 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; | 1308 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; |
1309 for(l=0;l<dim;++l, ++voffs) { | 1309 for(l=0;l<dim;++l, ++voffs) { |
1310 vec[voffs]+=codebook.codevectors[coffs+l]; // FPMATH | 1310 vec[voffs]+=codebook.codevectors[coffs+l]; // FPMATH |
1311 | 1311 |
1312 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs); | 1312 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs); |
1313 } | 1313 } |
1314 } | 1314 } |
1315 } | 1315 } |
1316 else if (vr->type==2 && ch==2 && (voffset&1)==0 && (dim&1)==0) { // most frequent case optimized | 1316 else if (vr_type==2 && ch==2 && (voffset&1)==0 && (dim&1)==0) { // most frequent case optimized |
1317 voffs=voffset>>1; | 1317 voffs=voffset>>1; |
1318 | 1318 |
1319 if(dim==2) { | 1319 if(dim==2) { |
1320 for(k=0;k<step;++k) { | 1320 for(k=0;k<step;++k) { |
1321 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * 2; | 1321 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * 2; |
1340 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); | 1340 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); |
1341 } | 1341 } |
1342 } | 1342 } |
1343 | 1343 |
1344 } | 1344 } |
1345 else if (vr->type==2) { | 1345 else if (vr_type==2) { |
1346 voffs=voffset; | 1346 voffs=voffset; |
1347 | 1347 |
1348 for(k=0;k<step;++k) { | 1348 for(k=0;k<step;++k) { |
1349 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; | 1349 coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim; |
1350 for(l=0;l<dim;++l, ++voffs) { | 1350 for(l=0;l<dim;++l, ++voffs) { |
1351 vec[voffs/ch+(voffs%ch)*vlen]+=codebook.codevectors[coffs+l]; // FPMATH FIXME use if and counter instead of / and % | 1351 vec[voffs/ch+(voffs%ch)*vlen]+=codebook.codevectors[coffs+l]; // FPMATH FIXME use if and counter instead of / and % |
1352 | 1352 |
1353 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); | 1353 AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l); |
1354 } | 1354 } |
1355 } | 1355 } |
1356 } else { | |
1357 av_log(vc->avccontext, AV_LOG_ERROR, " Invalid residue type while residue decode?! \n"); | |
1358 return 1; | |
1359 } | 1356 } |
1360 } | 1357 } |
1361 } | 1358 } |
1362 j_times_ptns_to_read+=ptns_to_read; | 1359 j_times_ptns_to_read+=ptns_to_read; |
1363 } | 1360 } |
1365 voffset+=vr->partition_size; | 1362 voffset+=vr->partition_size; |
1366 } | 1363 } |
1367 } | 1364 } |
1368 } | 1365 } |
1369 return 0; | 1366 return 0; |
1367 } | |
1368 | |
1369 static inline int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fast8_t ch, uint_fast8_t *do_not_decode, float *vec, uint_fast16_t vlen) | |
1370 { | |
1371 if (vr->type==2) | |
1372 return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, 2); | |
1373 else if (vr->type==1) | |
1374 return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, 1); | |
1375 else if (vr->type==0) | |
1376 return vorbis_residue_decode_internal(vc, vr, ch, do_not_decode, vec, vlen, 0); | |
1377 else { | |
1378 av_log(vc->avccontext, AV_LOG_ERROR, " Invalid residue type while residue decode?! \n"); | |
1379 return 1; | |
1380 } | |
1370 } | 1381 } |
1371 | 1382 |
1372 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize) | 1383 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize) |
1373 { | 1384 { |
1374 int i; | 1385 int i; |