Mercurial > libavcodec.hg
comparison vorbis.c @ 3568:945caa35ee9a libavcodec
sse and 3dnow implementations of float->int conversion and mdct windowing.
15% faster vorbis.
author | lorenm |
---|---|
date | Thu, 10 Aug 2006 19:06:25 +0000 |
parents | 5ea82888103e |
children | 991ef6ade276 |
comparison
equal
deleted
inserted
replaced
3567:1f8730f62765 | 3568:945caa35ee9a |
---|---|
190 av_free(vc->mappings[i].magnitude); | 190 av_free(vc->mappings[i].magnitude); |
191 av_free(vc->mappings[i].angle); | 191 av_free(vc->mappings[i].angle); |
192 av_free(vc->mappings[i].mux); | 192 av_free(vc->mappings[i].mux); |
193 } | 193 } |
194 av_freep(&vc->mappings); | 194 av_freep(&vc->mappings); |
195 | |
196 if(vc->exp_bias){ | |
197 av_freep(&vc->swin); | |
198 av_freep(&vc->lwin); | |
199 } | |
195 } | 200 } |
196 | 201 |
197 // Parse setup header ------------------------------------------------- | 202 // Parse setup header ------------------------------------------------- |
198 | 203 |
199 // Process codebooks part | 204 // Process codebooks part |
886 return 4; | 891 return 4; |
887 } | 892 } |
888 vc->swin=vwin[bl0-6]; | 893 vc->swin=vwin[bl0-6]; |
889 vc->lwin=vwin[bl1-6]; | 894 vc->lwin=vwin[bl1-6]; |
890 | 895 |
896 if(vc->exp_bias){ | |
897 int i; | |
898 float *win; | |
899 win = av_malloc(vc->blocksize_0/2 * sizeof(float)); | |
900 for(i=0; i<vc->blocksize_0/2; i++) | |
901 win[i] = vc->swin[i] * (1<<15); | |
902 vc->swin = win; | |
903 win = av_malloc(vc->blocksize_1/2 * sizeof(float)); | |
904 for(i=0; i<vc->blocksize_1/2; i++) | |
905 win[i] = vc->lwin[i] * (1<<15); | |
906 vc->lwin = win; | |
907 } | |
908 | |
891 if ((get_bits1(gb)) == 0) { | 909 if ((get_bits1(gb)) == 0) { |
892 av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis id header packet corrupt (framing flag not set). \n"); | 910 av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis id header packet corrupt (framing flag not set). \n"); |
893 return 2; | 911 return 2; |
894 } | 912 } |
895 | 913 |
928 GetBitContext *gb = &(vc->gb); | 946 GetBitContext *gb = &(vc->gb); |
929 int i, j, hdr_type; | 947 int i, j, hdr_type; |
930 | 948 |
931 vc->avccontext = avccontext; | 949 vc->avccontext = avccontext; |
932 dsputil_init(&vc->dsp, avccontext); | 950 dsputil_init(&vc->dsp, avccontext); |
951 | |
952 if(vc->dsp.float_to_int16 == ff_float_to_int16_c) { | |
953 vc->add_bias = 385; | |
954 vc->exp_bias = 0; | |
955 } else { | |
956 vc->add_bias = 0; | |
957 vc->exp_bias = 15<<23; | |
958 } | |
933 | 959 |
934 if (!headers_len) { | 960 if (!headers_len) { |
935 av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n"); | 961 av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n"); |
936 return -1; | 962 return -1; |
937 } | 963 } |
1470 } | 1496 } |
1471 } | 1497 } |
1472 } | 1498 } |
1473 | 1499 |
1474 // Decode the audio packet using the functions above | 1500 // Decode the audio packet using the functions above |
1475 #define BIAS 385 | |
1476 | 1501 |
1477 static int vorbis_parse_audio_packet(vorbis_context *vc) { | 1502 static int vorbis_parse_audio_packet(vorbis_context *vc) { |
1478 GetBitContext *gb=&vc->gb; | 1503 GetBitContext *gb=&vc->gb; |
1479 | 1504 |
1480 uint_fast8_t previous_window=0,next_window=0; | 1505 uint_fast8_t previous_window=0,next_window=0; |
1488 float *ch_floor_ptr=vc->channel_floors; | 1513 float *ch_floor_ptr=vc->channel_floors; |
1489 uint_fast8_t res_chan[vc->audio_channels]; | 1514 uint_fast8_t res_chan[vc->audio_channels]; |
1490 uint_fast8_t res_num=0; | 1515 uint_fast8_t res_num=0; |
1491 int_fast16_t retlen=0; | 1516 int_fast16_t retlen=0; |
1492 uint_fast16_t saved_start=0; | 1517 uint_fast16_t saved_start=0; |
1518 float fadd_bias = vc->add_bias; | |
1493 | 1519 |
1494 if (get_bits1(gb)) { | 1520 if (get_bits1(gb)) { |
1495 av_log(vc->avccontext, AV_LOG_ERROR, "Not a Vorbis I audio packet.\n"); | 1521 av_log(vc->avccontext, AV_LOG_ERROR, "Not a Vorbis I audio packet.\n"); |
1496 return -1; // packet type not audio | 1522 return -1; // packet type not audio |
1497 } | 1523 } |
1574 | 1600 |
1575 // Dotproduct | 1601 // Dotproduct |
1576 | 1602 |
1577 for(j=0, ch_floor_ptr=vc->channel_floors;j<vc->audio_channels;++j,ch_floor_ptr+=blocksize/2) { | 1603 for(j=0, ch_floor_ptr=vc->channel_floors;j<vc->audio_channels;++j,ch_floor_ptr+=blocksize/2) { |
1578 ch_res_ptr=vc->channel_residues+res_chan[j]*blocksize/2; | 1604 ch_res_ptr=vc->channel_residues+res_chan[j]*blocksize/2; |
1579 | 1605 vc->dsp.vector_fmul(ch_floor_ptr, ch_res_ptr, blocksize/2); |
1580 for(i=0;i<blocksize/2;++i) { | |
1581 ch_floor_ptr[i]*=ch_res_ptr[i]; //FPMATH | |
1582 } | |
1583 } | 1606 } |
1584 | 1607 |
1585 // MDCT, overlap/add, save data for next overlapping FPMATH | 1608 // MDCT, overlap/add, save data for next overlapping FPMATH |
1586 | 1609 |
1587 for(j=0;j<vc->audio_channels;++j) { | 1610 for(j=0;j<vc->audio_channels;++j) { |
1598 | 1621 |
1599 saved_start=vc->saved_start; | 1622 saved_start=vc->saved_start; |
1600 | 1623 |
1601 vc->mdct0.fft.imdct_calc(vc->modes[mode_number].blockflag ? &vc->mdct1 : &vc->mdct0, buf, ch_floor_ptr, buf_tmp); | 1624 vc->mdct0.fft.imdct_calc(vc->modes[mode_number].blockflag ? &vc->mdct1 : &vc->mdct0, buf, ch_floor_ptr, buf_tmp); |
1602 | 1625 |
1626 //FIXME process channels together, to allow faster simd vector_fmul_add_add? | |
1603 if (vc->modes[mode_number].blockflag) { | 1627 if (vc->modes[mode_number].blockflag) { |
1604 // -- overlap/add | 1628 // -- overlap/add |
1605 if (previous_window) { | 1629 if (previous_window) { |
1606 for(k=j, i=0;i<vc->blocksize_1/2;++i, k+=step) { | 1630 vc->dsp.vector_fmul_add_add(ret+j, buf, lwin, saved, vc->add_bias, vc->blocksize_1/2, step); |
1607 ret[k]=saved[i]+buf[i]*lwin[i]+BIAS; | |
1608 } | |
1609 retlen=vc->blocksize_1/2; | 1631 retlen=vc->blocksize_1/2; |
1610 } else { | 1632 } else { |
1611 buf += (vc->blocksize_1-vc->blocksize_0)/4; | 1633 int len = (vc->blocksize_1-vc->blocksize_0)/4; |
1612 for(k=j, i=0;i<vc->blocksize_0/2;++i, k+=step) { | 1634 buf += len; |
1613 ret[k]=saved[i]+buf[i]*swin[i]+BIAS; | 1635 vc->dsp.vector_fmul_add_add(ret+j, buf, swin, saved, vc->add_bias, vc->blocksize_0/2, step); |
1614 } | 1636 k = vc->blocksize_0/2*step + j; |
1615 buf += vc->blocksize_0/2; | 1637 buf += vc->blocksize_0/2; |
1616 for(i=0;i<(vc->blocksize_1-vc->blocksize_0)/4;++i, k+=step) { | 1638 if(vc->exp_bias){ |
1617 ret[k]=buf[i]+BIAS; | 1639 for(i=0; i<len; i++, k+=step) |
1640 ((uint32_t*)ret)[k] = ((uint32_t*)buf)[i] + vc->exp_bias; // ret[k]=buf[i]*(1<<bias) | |
1641 } else { | |
1642 for(i=0; i<len; i++, k+=step) | |
1643 ret[k] = buf[i] + fadd_bias; | |
1618 } | 1644 } |
1619 buf=vc->buf; | 1645 buf=vc->buf; |
1620 retlen=vc->blocksize_0/2+(vc->blocksize_1-vc->blocksize_0)/4; | 1646 retlen=vc->blocksize_0/2+len; |
1621 } | 1647 } |
1622 // -- save | 1648 // -- save |
1623 if (next_window) { | 1649 if (next_window) { |
1624 buf += vc->blocksize_1/2; | 1650 buf += vc->blocksize_1/2; |
1625 lwin += vc->blocksize_1/2-1; | 1651 vc->dsp.vector_fmul_reverse(saved, buf, lwin, vc->blocksize_1/2); |
1626 for(i=0;i<vc->blocksize_1/2;++i) { | |
1627 saved[i]=buf[i]*lwin[-i]; | |
1628 } | |
1629 saved_start=0; | 1652 saved_start=0; |
1630 } else { | 1653 } else { |
1631 saved_start=(vc->blocksize_1-vc->blocksize_0)/4; | 1654 saved_start=(vc->blocksize_1-vc->blocksize_0)/4; |
1632 buf += vc->blocksize_1/2; | 1655 buf += vc->blocksize_1/2; |
1633 for(i=0;i<saved_start;++i) { | 1656 for(i=0; i<saved_start; i++) |
1634 saved[i]=buf[i]; | 1657 ((uint32_t*)saved)[i] = ((uint32_t*)buf)[i] + vc->exp_bias; |
1635 } | 1658 vc->dsp.vector_fmul_reverse(saved+saved_start, buf+saved_start, swin, vc->blocksize_0/2); |
1636 swin += vc->blocksize_0/2-1; | |
1637 for(i=0;i<vc->blocksize_0/2;++i) { | |
1638 saved[saved_start+i]=buf[saved_start+i]*swin[-i]; | |
1639 } | |
1640 } | 1659 } |
1641 } else { | 1660 } else { |
1642 // --overlap/add | 1661 // --overlap/add |
1643 for(k=j, i=0;i<saved_start;++i, k+=step) { | 1662 if(vc->add_bias) { |
1644 ret[k]=saved[i]+BIAS; | 1663 for(k=j, i=0;i<saved_start;++i, k+=step) |
1645 } | 1664 ret[k] = saved[i] + fadd_bias; |
1646 for(i=0;i<vc->blocksize_0/2;++i, k+=step) { | 1665 } else { |
1647 ret[k]=saved[saved_start+i]+buf[i]*swin[i]+BIAS; | 1666 for(k=j, i=0;i<saved_start;++i, k+=step) |
1648 } | 1667 ret[k] = saved[i]; |
1668 } | |
1669 vc->dsp.vector_fmul_add_add(ret+k, buf, swin, saved+saved_start, vc->add_bias, vc->blocksize_0/2, step); | |
1649 retlen=saved_start+vc->blocksize_0/2; | 1670 retlen=saved_start+vc->blocksize_0/2; |
1650 // -- save | 1671 // -- save |
1651 buf += vc->blocksize_0/2; | 1672 buf += vc->blocksize_0/2; |
1652 swin += vc->blocksize_0/2-1; | 1673 vc->dsp.vector_fmul_reverse(saved, buf, swin, vc->blocksize_0/2); |
1653 for(i=0;i<vc->blocksize_0/2;++i) { | |
1654 saved[i]=buf[i]*swin[-i]; | |
1655 } | |
1656 saved_start=0; | 1674 saved_start=0; |
1657 } | 1675 } |
1658 } | 1676 } |
1659 vc->saved_start=saved_start; | 1677 vc->saved_start=saved_start; |
1660 | 1678 |
1693 return buf_size ; | 1711 return buf_size ; |
1694 } | 1712 } |
1695 | 1713 |
1696 AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len); | 1714 AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len); |
1697 | 1715 |
1698 for(i=0;i<len;++i) { | 1716 vc->dsp.float_to_int16(data, vc->ret, len); |
1699 int_fast32_t tmp= ((int32_t*)vc->ret)[i]; | |
1700 if(tmp & 0xf0000){ | |
1701 // tmp= (0x43c0ffff - tmp)>>31; //ask gcc devs why this is slower | |
1702 if(tmp > 0x43c0ffff) tmp= 0xFFFF; | |
1703 else tmp= 0; | |
1704 } | |
1705 ((int16_t*)data)[i]=tmp - 0x8000; | |
1706 } | |
1707 | |
1708 *data_size=len*2; | 1717 *data_size=len*2; |
1709 | 1718 |
1710 return buf_size ; | 1719 return buf_size ; |
1711 } | 1720 } |
1712 | 1721 |