Mercurial > libavcodec.hg
comparison mpegaudiodec.c @ 11707:eb9e142ea51f libavcodec
float based mp1/mp2/mp3 decoders.
author | michael |
---|---|
date | Tue, 11 May 2010 19:52:42 +0000 |
parents | a66b535972b7 |
children | 8d2e66ba12cf |
comparison
equal
deleted
inserted
replaced
11706:15e7486e5c7a | 11707:eb9e142ea51f |
---|---|
37 #include "mpegaudio.h" | 37 #include "mpegaudio.h" |
38 #include "mpegaudiodecheader.h" | 38 #include "mpegaudiodecheader.h" |
39 | 39 |
40 #include "mathops.h" | 40 #include "mathops.h" |
41 | 41 |
42 #if CONFIG_FLOAT | |
43 # define SHR(a,b) ((a)*(1.0/(1<<(b)))) | |
44 # define compute_antialias compute_antialias_float | |
45 # define FIXR_OLD(a) ((int)((a) * FRAC_ONE + 0.5)) | |
46 # define FIXR(x) (x) | |
47 # define FIXHR(x) (x) | |
48 # define MULH3(x, y, s) ((s)*(y)*(x)) | |
49 # define MULLx(x, y, s) ((y)*(x)) | |
50 # define RENAME(a) a ## _float | |
51 #else | |
52 # define SHR(a,b) ((a)>>(b)) | |
53 # define compute_antialias compute_antialias_integer | |
42 /* WARNING: only correct for posititive numbers */ | 54 /* WARNING: only correct for posititive numbers */ |
43 #define FIXR(a) ((int)((a) * FRAC_ONE + 0.5)) | 55 # define FIXR_OLD(a) ((int)((a) * FRAC_ONE + 0.5)) |
44 | 56 # define FIXR(a) ((int)((a) * FRAC_ONE + 0.5)) |
45 #define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) | 57 # define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) |
58 # define MULH3(x, y, s) MULH((s)*(x), y) | |
59 # define MULLx(x, y, s) MULL(x,y,s) | |
60 # define RENAME(a) a | |
61 #endif | |
46 | 62 |
47 /****************/ | 63 /****************/ |
48 | 64 |
49 #define HEADER_SIZE 4 | 65 #define HEADER_SIZE 4 |
50 | 66 |
71 }; | 87 }; |
72 /* computed from band_size_long */ | 88 /* computed from band_size_long */ |
73 static uint16_t band_index_long[9][23]; | 89 static uint16_t band_index_long[9][23]; |
74 #include "mpegaudio_tablegen.h" | 90 #include "mpegaudio_tablegen.h" |
75 /* intensity stereo coef table */ | 91 /* intensity stereo coef table */ |
76 static int32_t is_table[2][16]; | 92 static INTFLOAT is_table[2][16]; |
77 static int32_t is_table_lsf[2][2][16]; | 93 static INTFLOAT is_table_lsf[2][2][16]; |
78 static int32_t csa_table[8][4]; | 94 static int32_t csa_table[8][4]; |
79 static float csa_table_float[8][4]; | 95 static float csa_table_float[8][4]; |
80 static int32_t mdct_win[8][36]; | 96 static INTFLOAT mdct_win[8][36]; |
81 | 97 |
82 /* lower 2 bits: modulo 3, higher bits: shift */ | 98 /* lower 2 bits: modulo 3, higher bits: shift */ |
83 static uint16_t scale_factor_modshift[64]; | 99 static uint16_t scale_factor_modshift[64]; |
84 /* [i][j]: 2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */ | 100 /* [i][j]: 2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */ |
85 static int32_t scale_factor_mult[15][3]; | 101 static int32_t scale_factor_mult[15][3]; |
86 /* mult table for layer 2 group quantization */ | 102 /* mult table for layer 2 group quantization */ |
87 | 103 |
88 #define SCALE_GEN(v) \ | 104 #define SCALE_GEN(v) \ |
89 { FIXR(1.0 * (v)), FIXR(0.7937005259 * (v)), FIXR(0.6299605249 * (v)) } | 105 { FIXR_OLD(1.0 * (v)), FIXR_OLD(0.7937005259 * (v)), FIXR_OLD(0.6299605249 * (v)) } |
90 | 106 |
91 static const int32_t scale_factor_mult2[3][3] = { | 107 static const int32_t scale_factor_mult2[3][3] = { |
92 SCALE_GEN(4.0 / 3.0), /* 3 steps */ | 108 SCALE_GEN(4.0 / 3.0), /* 3 steps */ |
93 SCALE_GEN(4.0 / 5.0), /* 5 steps */ | 109 SCALE_GEN(4.0 / 5.0), /* 5 steps */ |
94 SCALE_GEN(4.0 / 9.0), /* 9 steps */ | 110 SCALE_GEN(4.0 / 9.0), /* 9 steps */ |
95 }; | 111 }; |
96 | 112 |
97 DECLARE_ALIGNED(16, MPA_INT, ff_mpa_synth_window)[512]; | 113 DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512]; |
98 | 114 |
99 /** | 115 /** |
100 * Convert region offsets to region sizes and truncate | 116 * Convert region offsets to region sizes and truncate |
101 * size to big_values. | 117 * size to big_values. |
102 */ | 118 */ |
291 s->avctx = avctx; | 307 s->avctx = avctx; |
292 | 308 |
293 avctx->sample_fmt= OUT_FMT; | 309 avctx->sample_fmt= OUT_FMT; |
294 s->error_recognition= avctx->error_recognition; | 310 s->error_recognition= avctx->error_recognition; |
295 | 311 |
296 if(avctx->antialias_algo != FF_AA_FLOAT) | |
297 s->compute_antialias= compute_antialias_integer; | |
298 else | |
299 s->compute_antialias= compute_antialias_float; | |
300 | |
301 if (!init && !avctx->parse_only) { | 312 if (!init && !avctx->parse_only) { |
302 int offset; | 313 int offset; |
303 | 314 |
304 /* scale factors table for layer 1/2 */ | 315 /* scale factors table for layer 1/2 */ |
305 for(i=0;i<64;i++) { | 316 for(i=0;i<64;i++) { |
313 /* scale factor multiply for layer 1 */ | 324 /* scale factor multiply for layer 1 */ |
314 for(i=0;i<15;i++) { | 325 for(i=0;i<15;i++) { |
315 int n, norm; | 326 int n, norm; |
316 n = i + 2; | 327 n = i + 2; |
317 norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1); | 328 norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1); |
318 scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm, FRAC_BITS); | 329 scale_factor_mult[i][0] = MULLx(norm, FIXR(1.0 * 2.0), FRAC_BITS); |
319 scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm, FRAC_BITS); | 330 scale_factor_mult[i][1] = MULLx(norm, FIXR(0.7937005259 * 2.0), FRAC_BITS); |
320 scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm, FRAC_BITS); | 331 scale_factor_mult[i][2] = MULLx(norm, FIXR(0.6299605249 * 2.0), FRAC_BITS); |
321 dprintf(avctx, "%d: norm=%x s=%x %x %x\n", | 332 dprintf(avctx, "%d: norm=%x s=%x %x %x\n", |
322 i, norm, | 333 i, norm, |
323 scale_factor_mult[i][0], | 334 scale_factor_mult[i][0], |
324 scale_factor_mult[i][1], | 335 scale_factor_mult[i][1], |
325 scale_factor_mult[i][2]); | 336 scale_factor_mult[i][2]); |
326 } | 337 } |
327 | 338 |
328 ff_mpa_synth_init(ff_mpa_synth_window); | 339 RENAME(ff_mpa_synth_init)(RENAME(ff_mpa_synth_window)); |
329 | 340 |
330 /* huffman decode tables */ | 341 /* huffman decode tables */ |
331 offset = 0; | 342 offset = 0; |
332 for(i=1;i<16;i++) { | 343 for(i=1;i<16;i++) { |
333 const HuffTable *h = &mpa_huff_tables[i]; | 344 const HuffTable *h = &mpa_huff_tables[i]; |
383 int_pow_init(); | 394 int_pow_init(); |
384 mpegaudio_tableinit(); | 395 mpegaudio_tableinit(); |
385 | 396 |
386 for(i=0;i<7;i++) { | 397 for(i=0;i<7;i++) { |
387 float f; | 398 float f; |
388 int v; | 399 INTFLOAT v; |
389 if (i != 6) { | 400 if (i != 6) { |
390 f = tan((double)i * M_PI / 12.0); | 401 f = tan((double)i * M_PI / 12.0); |
391 v = FIXR(f / (1.0 + f)); | 402 v = FIXR(f / (1.0 + f)); |
392 } else { | 403 } else { |
393 v = FIXR(1.0); | 404 v = FIXR(1.0); |
518 #define BF(a, b, c, s)\ | 529 #define BF(a, b, c, s)\ |
519 {\ | 530 {\ |
520 tmp0 = tab[a] + tab[b];\ | 531 tmp0 = tab[a] + tab[b];\ |
521 tmp1 = tab[a] - tab[b];\ | 532 tmp1 = tab[a] - tab[b];\ |
522 tab[a] = tmp0;\ | 533 tab[a] = tmp0;\ |
523 tab[b] = MULH(tmp1<<(s), c);\ | 534 tab[b] = MULH3(tmp1, c, 1<<(s));\ |
524 } | 535 } |
525 | 536 |
526 #define BF1(a, b, c, d)\ | 537 #define BF1(a, b, c, d)\ |
527 {\ | 538 {\ |
528 BF(a, b, COS4_0, 1);\ | 539 BF(a, b, COS4_0, 1);\ |
541 } | 552 } |
542 | 553 |
543 #define ADD(a, b) tab[a] += tab[b] | 554 #define ADD(a, b) tab[a] += tab[b] |
544 | 555 |
545 /* DCT32 without 1/sqrt(2) coef zero scaling. */ | 556 /* DCT32 without 1/sqrt(2) coef zero scaling. */ |
546 static void dct32(int32_t *out, int32_t *tab) | 557 static void dct32(INTFLOAT *out, INTFLOAT *tab) |
547 { | 558 { |
548 int tmp0, tmp1; | 559 INTFLOAT tmp0, tmp1; |
549 | 560 |
550 /* pass 1 */ | 561 /* pass 1 */ |
551 BF( 0, 31, COS0_0 , 1); | 562 BF( 0, 31, COS0_0 , 1); |
552 BF(15, 16, COS0_15, 5); | 563 BF(15, 16, COS0_15, 5); |
553 /* pass 2 */ | 564 /* pass 2 */ |
699 out[23] = tab[29] + tab[19]; | 710 out[23] = tab[29] + tab[19]; |
700 out[15] = tab[30] + tab[17]; | 711 out[15] = tab[30] + tab[17]; |
701 out[31] = tab[31]; | 712 out[31] = tab[31]; |
702 } | 713 } |
703 | 714 |
704 #if FRAC_BITS <= 15 | 715 #if CONFIG_FLOAT |
716 static inline float round_sample(float *sum) | |
717 { | |
718 float sum1=*sum; | |
719 *sum = 0; | |
720 return sum1; | |
721 } | |
722 | |
723 /* signed 16x16 -> 32 multiply add accumulate */ | |
724 #define MACS(rt, ra, rb) rt+=(ra)*(rb) | |
725 | |
726 /* signed 16x16 -> 32 multiply */ | |
727 #define MULS(ra, rb) ((ra)*(rb)) | |
728 | |
729 #define MLSS(rt, ra, rb) rt-=(ra)*(rb) | |
730 | |
731 #elif FRAC_BITS <= 15 | |
705 | 732 |
706 static inline int round_sample(int *sum) | 733 static inline int round_sample(int *sum) |
707 { | 734 { |
708 int sum1; | 735 int sum1; |
709 sum1 = (*sum) >> OUT_SHIFT; | 736 sum1 = (*sum) >> OUT_SHIFT; |
746 op(sum, (w)[7 * 64], (p)[7 * 64]); \ | 773 op(sum, (w)[7 * 64], (p)[7 * 64]); \ |
747 } | 774 } |
748 | 775 |
749 #define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \ | 776 #define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \ |
750 { \ | 777 { \ |
751 int tmp;\ | 778 INTFLOAT tmp;\ |
752 tmp = p[0 * 64];\ | 779 tmp = p[0 * 64];\ |
753 op1(sum1, (w1)[0 * 64], tmp);\ | 780 op1(sum1, (w1)[0 * 64], tmp);\ |
754 op2(sum2, (w2)[0 * 64], tmp);\ | 781 op2(sum2, (w2)[0 * 64], tmp);\ |
755 tmp = p[1 * 64];\ | 782 tmp = p[1 * 64];\ |
756 op1(sum1, (w1)[1 * 64], tmp);\ | 783 op1(sum1, (w1)[1 * 64], tmp);\ |
773 tmp = p[7 * 64];\ | 800 tmp = p[7 * 64];\ |
774 op1(sum1, (w1)[7 * 64], tmp);\ | 801 op1(sum1, (w1)[7 * 64], tmp);\ |
775 op2(sum2, (w2)[7 * 64], tmp);\ | 802 op2(sum2, (w2)[7 * 64], tmp);\ |
776 } | 803 } |
777 | 804 |
778 void av_cold ff_mpa_synth_init(MPA_INT *window) | 805 void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window) |
779 { | 806 { |
780 int i; | 807 int i; |
781 | 808 |
782 /* max = 18760, max sum over all 16 coefs : 44736 */ | 809 /* max = 18760, max sum over all 16 coefs : 44736 */ |
783 for(i=0;i<257;i++) { | 810 for(i=0;i<257;i++) { |
784 int v; | 811 INTFLOAT v; |
785 v = ff_mpa_enwindow[i]; | 812 v = ff_mpa_enwindow[i]; |
786 #if WFRAC_BITS < 16 | 813 #if CONFIG_FLOAT |
814 v *= 1.0 / (1LL<<(16 + FRAC_BITS)); | |
815 #elif WFRAC_BITS < 16 | |
787 v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS); | 816 v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS); |
788 #endif | 817 #endif |
789 window[i] = v; | 818 window[i] = v; |
790 if ((i & 63) != 0) | 819 if ((i & 63) != 0) |
791 v = -v; | 820 v = -v; |
795 } | 824 } |
796 | 825 |
797 /* 32 sub band synthesis filter. Input: 32 sub band samples, Output: | 826 /* 32 sub band synthesis filter. Input: 32 sub band samples, Output: |
798 32 samples. */ | 827 32 samples. */ |
799 /* XXX: optimize by avoiding ring buffer usage */ | 828 /* XXX: optimize by avoiding ring buffer usage */ |
800 void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, | 829 void RENAME(ff_mpa_synth_filter)(MPA_INT *synth_buf_ptr, int *synth_buf_offset, |
801 MPA_INT *window, int *dither_state, | 830 MPA_INT *window, int *dither_state, |
802 OUT_INT *samples, int incr, | 831 OUT_INT *samples, int incr, |
803 int32_t sb_samples[SBLIMIT]) | 832 INTFLOAT sb_samples[SBLIMIT]) |
804 { | 833 { |
805 register MPA_INT *synth_buf; | 834 register MPA_INT *synth_buf; |
806 register const MPA_INT *w, *w2, *p; | 835 register const MPA_INT *w, *w2, *p; |
807 int j, offset; | 836 int j, offset; |
808 OUT_INT *samples2; | 837 OUT_INT *samples2; |
809 #if FRAC_BITS <= 15 | 838 #if CONFIG_FLOAT |
839 float sum, sum2; | |
840 #elif FRAC_BITS <= 15 | |
810 int32_t tmp[32]; | 841 int32_t tmp[32]; |
811 int sum, sum2; | 842 int sum, sum2; |
812 #else | 843 #else |
813 int64_t sum, sum2; | 844 int64_t sum, sum2; |
814 #endif | 845 #endif |
815 | 846 |
816 offset = *synth_buf_offset; | 847 offset = *synth_buf_offset; |
817 synth_buf = synth_buf_ptr + offset; | 848 synth_buf = synth_buf_ptr + offset; |
818 | 849 |
819 #if FRAC_BITS <= 15 | 850 #if FRAC_BITS <= 15 |
851 assert(!CONFIG_FLOAT); | |
820 dct32(tmp, sb_samples); | 852 dct32(tmp, sb_samples); |
821 for(j=0;j<32;j++) { | 853 for(j=0;j<32;j++) { |
822 /* NOTE: can cause a loss in precision if very high amplitude | 854 /* NOTE: can cause a loss in precision if very high amplitude |
823 sound */ | 855 sound */ |
824 synth_buf[j] = av_clip_int16(tmp[j]); | 856 synth_buf[j] = av_clip_int16(tmp[j]); |
826 #else | 858 #else |
827 dct32(synth_buf, sb_samples); | 859 dct32(synth_buf, sb_samples); |
828 #endif | 860 #endif |
829 | 861 |
830 /* copy to avoid wrap */ | 862 /* copy to avoid wrap */ |
831 memcpy(synth_buf + 512, synth_buf, 32 * sizeof(MPA_INT)); | 863 memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf)); |
832 | 864 |
833 samples2 = samples + 31 * incr; | 865 samples2 = samples + 31 * incr; |
834 w = window; | 866 w = window; |
835 w2 = window + 31; | 867 w2 = window + 31; |
836 | 868 |
871 } | 903 } |
872 | 904 |
873 #define C3 FIXHR(0.86602540378443864676/2) | 905 #define C3 FIXHR(0.86602540378443864676/2) |
874 | 906 |
875 /* 0.5 / cos(pi*(2*i+1)/36) */ | 907 /* 0.5 / cos(pi*(2*i+1)/36) */ |
876 static const int icos36[9] = { | 908 static const INTFLOAT icos36[9] = { |
877 FIXR(0.50190991877167369479), | 909 FIXR(0.50190991877167369479), |
878 FIXR(0.51763809020504152469), //0 | 910 FIXR(0.51763809020504152469), //0 |
879 FIXR(0.55168895948124587824), | 911 FIXR(0.55168895948124587824), |
880 FIXR(0.61038729438072803416), | 912 FIXR(0.61038729438072803416), |
881 FIXR(0.70710678118654752439), //1 | 913 FIXR(0.70710678118654752439), //1 |
884 FIXR(1.93185165257813657349), //2 | 916 FIXR(1.93185165257813657349), //2 |
885 FIXR(5.73685662283492756461), | 917 FIXR(5.73685662283492756461), |
886 }; | 918 }; |
887 | 919 |
888 /* 0.5 / cos(pi*(2*i+1)/36) */ | 920 /* 0.5 / cos(pi*(2*i+1)/36) */ |
889 static const int icos36h[9] = { | 921 static const INTFLOAT icos36h[9] = { |
890 FIXHR(0.50190991877167369479/2), | 922 FIXHR(0.50190991877167369479/2), |
891 FIXHR(0.51763809020504152469/2), //0 | 923 FIXHR(0.51763809020504152469/2), //0 |
892 FIXHR(0.55168895948124587824/2), | 924 FIXHR(0.55168895948124587824/2), |
893 FIXHR(0.61038729438072803416/2), | 925 FIXHR(0.61038729438072803416/2), |
894 FIXHR(0.70710678118654752439/2), //1 | 926 FIXHR(0.70710678118654752439/2), //1 |
898 // FIXHR(5.73685662283492756461), | 930 // FIXHR(5.73685662283492756461), |
899 }; | 931 }; |
900 | 932 |
901 /* 12 points IMDCT. We compute it "by hand" by factorizing obvious | 933 /* 12 points IMDCT. We compute it "by hand" by factorizing obvious |
902 cases. */ | 934 cases. */ |
903 static void imdct12(int *out, int *in) | 935 static void imdct12(INTFLOAT *out, INTFLOAT *in) |
904 { | 936 { |
905 int in0, in1, in2, in3, in4, in5, t1, t2; | 937 INTFLOAT in0, in1, in2, in3, in4, in5, t1, t2; |
906 | 938 |
907 in0= in[0*3]; | 939 in0= in[0*3]; |
908 in1= in[1*3] + in[0*3]; | 940 in1= in[1*3] + in[0*3]; |
909 in2= in[2*3] + in[1*3]; | 941 in2= in[2*3] + in[1*3]; |
910 in3= in[3*3] + in[2*3]; | 942 in3= in[3*3] + in[2*3]; |
911 in4= in[4*3] + in[3*3]; | 943 in4= in[4*3] + in[3*3]; |
912 in5= in[5*3] + in[4*3]; | 944 in5= in[5*3] + in[4*3]; |
913 in5 += in3; | 945 in5 += in3; |
914 in3 += in1; | 946 in3 += in1; |
915 | 947 |
916 in2= MULH(2*in2, C3); | 948 in2= MULH3(in2, C3, 2); |
917 in3= MULH(4*in3, C3); | 949 in3= MULH3(in3, C3, 4); |
918 | 950 |
919 t1 = in0 - in4; | 951 t1 = in0 - in4; |
920 t2 = MULH(2*(in1 - in5), icos36h[4]); | 952 t2 = MULH3(in1 - in5, icos36h[4], 2); |
921 | 953 |
922 out[ 7]= | 954 out[ 7]= |
923 out[10]= t1 + t2; | 955 out[10]= t1 + t2; |
924 out[ 1]= | 956 out[ 1]= |
925 out[ 4]= t1 - t2; | 957 out[ 4]= t1 - t2; |
926 | 958 |
927 in0 += in4>>1; | 959 in0 += SHR(in4, 1); |
928 in4 = in0 + in2; | 960 in4 = in0 + in2; |
929 in5 += 2*in1; | 961 in5 += 2*in1; |
930 in1 = MULH(in5 + in3, icos36h[1]); | 962 in1 = MULH3(in5 + in3, icos36h[1], 1); |
931 out[ 8]= | 963 out[ 8]= |
932 out[ 9]= in4 + in1; | 964 out[ 9]= in4 + in1; |
933 out[ 2]= | 965 out[ 2]= |
934 out[ 3]= in4 - in1; | 966 out[ 3]= in4 - in1; |
935 | 967 |
936 in0 -= in2; | 968 in0 -= in2; |
937 in5 = MULH(2*(in5 - in3), icos36h[7]); | 969 in5 = MULH3(in5 - in3, icos36h[7], 2); |
938 out[ 0]= | 970 out[ 0]= |
939 out[ 5]= in0 - in5; | 971 out[ 5]= in0 - in5; |
940 out[ 6]= | 972 out[ 6]= |
941 out[11]= in0 + in5; | 973 out[11]= in0 + in5; |
942 } | 974 } |
951 #define C7 FIXHR(0.34202014332566873304/2) | 983 #define C7 FIXHR(0.34202014332566873304/2) |
952 #define C8 FIXHR(0.17364817766693034885/2) | 984 #define C8 FIXHR(0.17364817766693034885/2) |
953 | 985 |
954 | 986 |
955 /* using Lee like decomposition followed by hand coded 9 points DCT */ | 987 /* using Lee like decomposition followed by hand coded 9 points DCT */ |
956 static void imdct36(int *out, int *buf, int *in, int *win) | 988 static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win) |
957 { | 989 { |
958 int i, j, t0, t1, t2, t3, s0, s1, s2, s3; | 990 int i, j; |
959 int tmp[18], *tmp1, *in1; | 991 INTFLOAT t0, t1, t2, t3, s0, s1, s2, s3; |
992 INTFLOAT tmp[18], *tmp1, *in1; | |
960 | 993 |
961 for(i=17;i>=1;i--) | 994 for(i=17;i>=1;i--) |
962 in[i] += in[i-1]; | 995 in[i] += in[i-1]; |
963 for(i=17;i>=3;i-=2) | 996 for(i=17;i>=3;i-=2) |
964 in[i] += in[i-2]; | 997 in[i] += in[i-2]; |
965 | 998 |
966 for(j=0;j<2;j++) { | 999 for(j=0;j<2;j++) { |
967 tmp1 = tmp + j; | 1000 tmp1 = tmp + j; |
968 in1 = in + j; | 1001 in1 = in + j; |
969 #if 0 | 1002 |
970 //more accurate but slower | |
971 int64_t t0, t1, t2, t3; | |
972 t2 = in1[2*4] + in1[2*8] - in1[2*2]; | 1003 t2 = in1[2*4] + in1[2*8] - in1[2*2]; |
973 | 1004 |
974 t3 = (in1[2*0] + (int64_t)(in1[2*6]>>1))<<32; | 1005 t3 = in1[2*0] + SHR(in1[2*6],1); |
975 t1 = in1[2*0] - in1[2*6]; | 1006 t1 = in1[2*0] - in1[2*6]; |
976 tmp1[ 6] = t1 - (t2>>1); | 1007 tmp1[ 6] = t1 - SHR(t2,1); |
977 tmp1[16] = t1 + t2; | 1008 tmp1[16] = t1 + t2; |
978 | 1009 |
979 t0 = MUL64(2*(in1[2*2] + in1[2*4]), C2); | 1010 t0 = MULH3(in1[2*2] + in1[2*4] , C2, 2); |
980 t1 = MUL64( in1[2*4] - in1[2*8] , -2*C8); | 1011 t1 = MULH3(in1[2*4] - in1[2*8] , -2*C8, 1); |
981 t2 = MUL64(2*(in1[2*2] + in1[2*8]), -C4); | 1012 t2 = MULH3(in1[2*2] + in1[2*8] , -C4, 2); |
982 | |
983 tmp1[10] = (t3 - t0 - t2) >> 32; | |
984 tmp1[ 2] = (t3 + t0 + t1) >> 32; | |
985 tmp1[14] = (t3 + t2 - t1) >> 32; | |
986 | |
987 tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3); | |
988 t2 = MUL64(2*(in1[2*1] + in1[2*5]), C1); | |
989 t3 = MUL64( in1[2*5] - in1[2*7] , -2*C7); | |
990 t0 = MUL64(2*in1[2*3], C3); | |
991 | |
992 t1 = MUL64(2*(in1[2*1] + in1[2*7]), -C5); | |
993 | |
994 tmp1[ 0] = (t2 + t3 + t0) >> 32; | |
995 tmp1[12] = (t2 + t1 - t0) >> 32; | |
996 tmp1[ 8] = (t3 - t1 - t0) >> 32; | |
997 #else | |
998 t2 = in1[2*4] + in1[2*8] - in1[2*2]; | |
999 | |
1000 t3 = in1[2*0] + (in1[2*6]>>1); | |
1001 t1 = in1[2*0] - in1[2*6]; | |
1002 tmp1[ 6] = t1 - (t2>>1); | |
1003 tmp1[16] = t1 + t2; | |
1004 | |
1005 t0 = MULH(2*(in1[2*2] + in1[2*4]), C2); | |
1006 t1 = MULH( in1[2*4] - in1[2*8] , -2*C8); | |
1007 t2 = MULH(2*(in1[2*2] + in1[2*8]), -C4); | |
1008 | 1013 |
1009 tmp1[10] = t3 - t0 - t2; | 1014 tmp1[10] = t3 - t0 - t2; |
1010 tmp1[ 2] = t3 + t0 + t1; | 1015 tmp1[ 2] = t3 + t0 + t1; |
1011 tmp1[14] = t3 + t2 - t1; | 1016 tmp1[14] = t3 + t2 - t1; |
1012 | 1017 |
1013 tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3); | 1018 tmp1[ 4] = MULH3(in1[2*5] + in1[2*7] - in1[2*1], -C3, 2); |
1014 t2 = MULH(2*(in1[2*1] + in1[2*5]), C1); | 1019 t2 = MULH3(in1[2*1] + in1[2*5], C1, 2); |
1015 t3 = MULH( in1[2*5] - in1[2*7] , -2*C7); | 1020 t3 = MULH3(in1[2*5] - in1[2*7], -2*C7, 1); |
1016 t0 = MULH(2*in1[2*3], C3); | 1021 t0 = MULH3(in1[2*3], C3, 2); |
1017 | 1022 |
1018 t1 = MULH(2*(in1[2*1] + in1[2*7]), -C5); | 1023 t1 = MULH3(in1[2*1] + in1[2*7], -C5, 2); |
1019 | 1024 |
1020 tmp1[ 0] = t2 + t3 + t0; | 1025 tmp1[ 0] = t2 + t3 + t0; |
1021 tmp1[12] = t2 + t1 - t0; | 1026 tmp1[12] = t2 + t1 - t0; |
1022 tmp1[ 8] = t3 - t1 - t0; | 1027 tmp1[ 8] = t3 - t1 - t0; |
1023 #endif | |
1024 } | 1028 } |
1025 | 1029 |
1026 i = 0; | 1030 i = 0; |
1027 for(j=0;j<4;j++) { | 1031 for(j=0;j<4;j++) { |
1028 t0 = tmp[i]; | 1032 t0 = tmp[i]; |
1030 s0 = t1 + t0; | 1034 s0 = t1 + t0; |
1031 s2 = t1 - t0; | 1035 s2 = t1 - t0; |
1032 | 1036 |
1033 t2 = tmp[i + 1]; | 1037 t2 = tmp[i + 1]; |
1034 t3 = tmp[i + 3]; | 1038 t3 = tmp[i + 3]; |
1035 s1 = MULH(2*(t3 + t2), icos36h[j]); | 1039 s1 = MULH3(t3 + t2, icos36h[j], 2); |
1036 s3 = MULL(t3 - t2, icos36[8 - j], FRAC_BITS); | 1040 s3 = MULLx(t3 - t2, icos36[8 - j], FRAC_BITS); |
1037 | 1041 |
1038 t0 = s0 + s1; | 1042 t0 = s0 + s1; |
1039 t1 = s0 - s1; | 1043 t1 = s0 - s1; |
1040 out[(9 + j)*SBLIMIT] = MULH(t1, win[9 + j]) + buf[9 + j]; | 1044 out[(9 + j)*SBLIMIT] = MULH3(t1, win[9 + j], 1) + buf[9 + j]; |
1041 out[(8 - j)*SBLIMIT] = MULH(t1, win[8 - j]) + buf[8 - j]; | 1045 out[(8 - j)*SBLIMIT] = MULH3(t1, win[8 - j], 1) + buf[8 - j]; |
1042 buf[9 + j] = MULH(t0, win[18 + 9 + j]); | 1046 buf[9 + j] = MULH3(t0, win[18 + 9 + j], 1); |
1043 buf[8 - j] = MULH(t0, win[18 + 8 - j]); | 1047 buf[8 - j] = MULH3(t0, win[18 + 8 - j], 1); |
1044 | 1048 |
1045 t0 = s2 + s3; | 1049 t0 = s2 + s3; |
1046 t1 = s2 - s3; | 1050 t1 = s2 - s3; |
1047 out[(9 + 8 - j)*SBLIMIT] = MULH(t1, win[9 + 8 - j]) + buf[9 + 8 - j]; | 1051 out[(9 + 8 - j)*SBLIMIT] = MULH3(t1, win[9 + 8 - j], 1) + buf[9 + 8 - j]; |
1048 out[( j)*SBLIMIT] = MULH(t1, win[ j]) + buf[ j]; | 1052 out[( j)*SBLIMIT] = MULH3(t1, win[ j], 1) + buf[ j]; |
1049 buf[9 + 8 - j] = MULH(t0, win[18 + 9 + 8 - j]); | 1053 buf[9 + 8 - j] = MULH3(t0, win[18 + 9 + 8 - j], 1); |
1050 buf[ + j] = MULH(t0, win[18 + j]); | 1054 buf[ + j] = MULH3(t0, win[18 + j], 1); |
1051 i += 4; | 1055 i += 4; |
1052 } | 1056 } |
1053 | 1057 |
1054 s0 = tmp[16]; | 1058 s0 = tmp[16]; |
1055 s1 = MULH(2*tmp[17], icos36h[4]); | 1059 s1 = MULH3(tmp[17], icos36h[4], 2); |
1056 t0 = s0 + s1; | 1060 t0 = s0 + s1; |
1057 t1 = s0 - s1; | 1061 t1 = s0 - s1; |
1058 out[(9 + 4)*SBLIMIT] = MULH(t1, win[9 + 4]) + buf[9 + 4]; | 1062 out[(9 + 4)*SBLIMIT] = MULH3(t1, win[9 + 4], 1) + buf[9 + 4]; |
1059 out[(8 - 4)*SBLIMIT] = MULH(t1, win[8 - 4]) + buf[8 - 4]; | 1063 out[(8 - 4)*SBLIMIT] = MULH3(t1, win[8 - 4], 1) + buf[8 - 4]; |
1060 buf[9 + 4] = MULH(t0, win[18 + 9 + 4]); | 1064 buf[9 + 4] = MULH3(t0, win[18 + 9 + 4], 1); |
1061 buf[8 - 4] = MULH(t0, win[18 + 8 - 4]); | 1065 buf[8 - 4] = MULH3(t0, win[18 + 8 - 4], 1); |
1062 } | 1066 } |
1063 | 1067 |
1064 /* return the number of decoded frames */ | 1068 /* return the number of decoded frames */ |
1065 static int mp_decode_layer1(MPADecodeContext *s) | 1069 static int mp_decode_layer1(MPADecodeContext *s) |
1066 { | 1070 { |
1312 } | 1316 } |
1313 } | 1317 } |
1314 return 3 * 12; | 1318 return 3 * 12; |
1315 } | 1319 } |
1316 | 1320 |
1321 //FIXME optimze this shit | |
1317 static inline void lsf_sf_expand(int *slen, | 1322 static inline void lsf_sf_expand(int *slen, |
1318 int sf, int n1, int n2, int n3) | 1323 int sf, int n1, int n2, int n3) |
1319 { | 1324 { |
1320 if (n3) { | 1325 if (n3) { |
1321 slen[3] = sf % n3; | 1326 slen[3] = sf % n3; |
1422 continue; | 1427 continue; |
1423 } | 1428 } |
1424 | 1429 |
1425 /* read huffcode and compute each couple */ | 1430 /* read huffcode and compute each couple */ |
1426 for(;j>0;j--) { | 1431 for(;j>0;j--) { |
1427 int exponent, x, y, v; | 1432 int exponent, x, y; |
1433 INTFLOAT v; | |
1428 int pos= get_bits_count(&s->gb); | 1434 int pos= get_bits_count(&s->gb); |
1429 | 1435 |
1430 if (pos >= end_pos){ | 1436 if (pos >= end_pos){ |
1431 // av_log(NULL, AV_LOG_ERROR, "pos: %d %d %d %d\n", pos, end_pos, end_pos2, s_index); | 1437 // av_log(NULL, AV_LOG_ERROR, "pos: %d %d %d %d\n", pos, end_pos, end_pos2, s_index); |
1432 switch_buffer(s, &pos, &end_pos, &end_pos2); | 1438 switch_buffer(s, &pos, &end_pos, &end_pos2); |
1449 i, g->region_size[i] - j, x, y, exponent); | 1455 i, g->region_size[i] - j, x, y, exponent); |
1450 if(y&16){ | 1456 if(y&16){ |
1451 x = y >> 5; | 1457 x = y >> 5; |
1452 y = y & 0x0f; | 1458 y = y & 0x0f; |
1453 if (x < 15){ | 1459 if (x < 15){ |
1454 v = expval_table[ exponent ][ x ]; | 1460 v = RENAME(expval_table)[ exponent ][ x ]; |
1455 // v = expval_table[ (exponent&3) ][ x ] >> FFMIN(0 - (exponent>>2), 31); | 1461 // v = RENAME(expval_table)[ (exponent&3) ][ x ] >> FFMIN(0 - (exponent>>2), 31); |
1456 }else{ | 1462 }else{ |
1457 x += get_bitsz(&s->gb, linbits); | 1463 x += get_bitsz(&s->gb, linbits); |
1458 v = l3_unscale(x, exponent); | 1464 v = l3_unscale(x, exponent); |
1459 } | 1465 } |
1460 if (get_bits1(&s->gb)) | 1466 if (get_bits1(&s->gb)) |
1461 v = -v; | 1467 v = -v; |
1462 g->sb_hybrid[s_index] = v; | 1468 g->sb_hybrid[s_index] = v; |
1463 if (y < 15){ | 1469 if (y < 15){ |
1464 v = expval_table[ exponent ][ y ]; | 1470 v = RENAME(expval_table)[ exponent ][ y ]; |
1465 }else{ | 1471 }else{ |
1466 y += get_bitsz(&s->gb, linbits); | 1472 y += get_bitsz(&s->gb, linbits); |
1467 v = l3_unscale(y, exponent); | 1473 v = l3_unscale(y, exponent); |
1468 } | 1474 } |
1469 if (get_bits1(&s->gb)) | 1475 if (get_bits1(&s->gb)) |
1472 }else{ | 1478 }else{ |
1473 x = y >> 5; | 1479 x = y >> 5; |
1474 y = y & 0x0f; | 1480 y = y & 0x0f; |
1475 x += y; | 1481 x += y; |
1476 if (x < 15){ | 1482 if (x < 15){ |
1477 v = expval_table[ exponent ][ x ]; | 1483 v = RENAME(expval_table)[ exponent ][ x ]; |
1478 }else{ | 1484 }else{ |
1479 x += get_bitsz(&s->gb, linbits); | 1485 x += get_bitsz(&s->gb, linbits); |
1480 v = l3_unscale(x, exponent); | 1486 v = l3_unscale(x, exponent); |
1481 } | 1487 } |
1482 if (get_bits1(&s->gb)) | 1488 if (get_bits1(&s->gb)) |
1519 g->sb_hybrid[s_index+1]= | 1525 g->sb_hybrid[s_index+1]= |
1520 g->sb_hybrid[s_index+2]= | 1526 g->sb_hybrid[s_index+2]= |
1521 g->sb_hybrid[s_index+3]= 0; | 1527 g->sb_hybrid[s_index+3]= 0; |
1522 while(code){ | 1528 while(code){ |
1523 static const int idxtab[16]={3,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0}; | 1529 static const int idxtab[16]={3,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0}; |
1524 int v; | 1530 INTFLOAT v; |
1525 int pos= s_index+idxtab[code]; | 1531 int pos= s_index+idxtab[code]; |
1526 code ^= 8>>idxtab[code]; | 1532 code ^= 8>>idxtab[code]; |
1527 v = exp_table[ exponents[pos] ]; | 1533 v = RENAME(exp_table)[ exponents[pos] ]; |
1528 // v = exp_table[ (exponents[pos]&3) ] >> FFMIN(0 - (exponents[pos]>>2), 31); | 1534 // v = RENAME(exp_table)[ (exponents[pos]&3) ] >> FFMIN(0 - (exponents[pos]>>2), 31); |
1529 if(get_bits1(&s->gb)) | 1535 if(get_bits1(&s->gb)) //FIXME try to flip the sign bit in int32_t, same above |
1530 v = -v; | 1536 v = -v; |
1531 g->sb_hybrid[pos] = v; | 1537 g->sb_hybrid[pos] = v; |
1532 } | 1538 } |
1533 s_index+=4; | 1539 s_index+=4; |
1534 } | 1540 } |
1555 would be faster to do it in parsing, but the code would be far more | 1561 would be faster to do it in parsing, but the code would be far more |
1556 complicated */ | 1562 complicated */ |
1557 static void reorder_block(MPADecodeContext *s, GranuleDef *g) | 1563 static void reorder_block(MPADecodeContext *s, GranuleDef *g) |
1558 { | 1564 { |
1559 int i, j, len; | 1565 int i, j, len; |
1560 int32_t *ptr, *dst, *ptr1; | 1566 INTFLOAT *ptr, *dst, *ptr1; |
1561 int32_t tmp[576]; | 1567 INTFLOAT tmp[576]; |
1562 | 1568 |
1563 if (g->block_type != 2) | 1569 if (g->block_type != 2) |
1564 return; | 1570 return; |
1565 | 1571 |
1566 if (g->switch_point) { | 1572 if (g->switch_point) { |
1592 | 1598 |
1593 static void compute_stereo(MPADecodeContext *s, | 1599 static void compute_stereo(MPADecodeContext *s, |
1594 GranuleDef *g0, GranuleDef *g1) | 1600 GranuleDef *g0, GranuleDef *g1) |
1595 { | 1601 { |
1596 int i, j, k, l; | 1602 int i, j, k, l; |
1597 int32_t v1, v2; | 1603 int sf_max, sf, len, non_zero_found; |
1598 int sf_max, tmp0, tmp1, sf, len, non_zero_found; | 1604 INTFLOAT (*is_tab)[16], *tab0, *tab1, tmp0, tmp1, v1, v2; |
1599 int32_t (*is_tab)[16]; | |
1600 int32_t *tab0, *tab1; | |
1601 int non_zero_found_short[3]; | 1605 int non_zero_found_short[3]; |
1602 | 1606 |
1603 /* intensity stereo */ | 1607 /* intensity stereo */ |
1604 if (s->mode_ext & MODE_EXT_I_STEREO) { | 1608 if (s->mode_ext & MODE_EXT_I_STEREO) { |
1605 if (!s->lsf) { | 1609 if (!s->lsf) { |
1639 | 1643 |
1640 v1 = is_tab[0][sf]; | 1644 v1 = is_tab[0][sf]; |
1641 v2 = is_tab[1][sf]; | 1645 v2 = is_tab[1][sf]; |
1642 for(j=0;j<len;j++) { | 1646 for(j=0;j<len;j++) { |
1643 tmp0 = tab0[j]; | 1647 tmp0 = tab0[j]; |
1644 tab0[j] = MULL(tmp0, v1, FRAC_BITS); | 1648 tab0[j] = MULLx(tmp0, v1, FRAC_BITS); |
1645 tab1[j] = MULL(tmp0, v2, FRAC_BITS); | 1649 tab1[j] = MULLx(tmp0, v2, FRAC_BITS); |
1646 } | 1650 } |
1647 } else { | 1651 } else { |
1648 found1: | 1652 found1: |
1649 if (s->mode_ext & MODE_EXT_MS_STEREO) { | 1653 if (s->mode_ext & MODE_EXT_MS_STEREO) { |
1650 /* lower part of the spectrum : do ms stereo | 1654 /* lower part of the spectrum : do ms stereo |
1651 if enabled */ | 1655 if enabled */ |
1652 for(j=0;j<len;j++) { | 1656 for(j=0;j<len;j++) { |
1653 tmp0 = tab0[j]; | 1657 tmp0 = tab0[j]; |
1654 tmp1 = tab1[j]; | 1658 tmp1 = tab1[j]; |
1655 tab0[j] = MULL(tmp0 + tmp1, ISQRT2, FRAC_BITS); | 1659 tab0[j] = MULLx(tmp0 + tmp1, ISQRT2, FRAC_BITS); |
1656 tab1[j] = MULL(tmp0 - tmp1, ISQRT2, FRAC_BITS); | 1660 tab1[j] = MULLx(tmp0 - tmp1, ISQRT2, FRAC_BITS); |
1657 } | 1661 } |
1658 } | 1662 } |
1659 } | 1663 } |
1660 } | 1664 } |
1661 } | 1665 } |
1683 goto found2; | 1687 goto found2; |
1684 v1 = is_tab[0][sf]; | 1688 v1 = is_tab[0][sf]; |
1685 v2 = is_tab[1][sf]; | 1689 v2 = is_tab[1][sf]; |
1686 for(j=0;j<len;j++) { | 1690 for(j=0;j<len;j++) { |
1687 tmp0 = tab0[j]; | 1691 tmp0 = tab0[j]; |
1688 tab0[j] = MULL(tmp0, v1, FRAC_BITS); | 1692 tab0[j] = MULLx(tmp0, v1, FRAC_BITS); |
1689 tab1[j] = MULL(tmp0, v2, FRAC_BITS); | 1693 tab1[j] = MULLx(tmp0, v2, FRAC_BITS); |
1690 } | 1694 } |
1691 } else { | 1695 } else { |
1692 found2: | 1696 found2: |
1693 if (s->mode_ext & MODE_EXT_MS_STEREO) { | 1697 if (s->mode_ext & MODE_EXT_MS_STEREO) { |
1694 /* lower part of the spectrum : do ms stereo | 1698 /* lower part of the spectrum : do ms stereo |
1695 if enabled */ | 1699 if enabled */ |
1696 for(j=0;j<len;j++) { | 1700 for(j=0;j<len;j++) { |
1697 tmp0 = tab0[j]; | 1701 tmp0 = tab0[j]; |
1698 tmp1 = tab1[j]; | 1702 tmp1 = tab1[j]; |
1699 tab0[j] = MULL(tmp0 + tmp1, ISQRT2, FRAC_BITS); | 1703 tab0[j] = MULLx(tmp0 + tmp1, ISQRT2, FRAC_BITS); |
1700 tab1[j] = MULL(tmp0 - tmp1, ISQRT2, FRAC_BITS); | 1704 tab1[j] = MULLx(tmp0 - tmp1, ISQRT2, FRAC_BITS); |
1701 } | 1705 } |
1702 } | 1706 } |
1703 } | 1707 } |
1704 } | 1708 } |
1705 } else if (s->mode_ext & MODE_EXT_MS_STEREO) { | 1709 } else if (s->mode_ext & MODE_EXT_MS_STEREO) { |
1758 } | 1762 } |
1759 | 1763 |
1760 static void compute_antialias_float(MPADecodeContext *s, | 1764 static void compute_antialias_float(MPADecodeContext *s, |
1761 GranuleDef *g) | 1765 GranuleDef *g) |
1762 { | 1766 { |
1763 int32_t *ptr; | 1767 float *ptr; |
1764 int n, i; | 1768 int n, i; |
1765 | 1769 |
1766 /* we antialias only "long" bands */ | 1770 /* we antialias only "long" bands */ |
1767 if (g->block_type == 2) { | 1771 if (g->block_type == 2) { |
1768 if (!g->switch_point) | 1772 if (!g->switch_point) |
1778 float tmp0, tmp1; | 1782 float tmp0, tmp1; |
1779 float *csa = &csa_table_float[0][0]; | 1783 float *csa = &csa_table_float[0][0]; |
1780 #define FLOAT_AA(j)\ | 1784 #define FLOAT_AA(j)\ |
1781 tmp0= ptr[-1-j];\ | 1785 tmp0= ptr[-1-j];\ |
1782 tmp1= ptr[ j];\ | 1786 tmp1= ptr[ j];\ |
1783 ptr[-1-j] = lrintf(tmp0 * csa[0+4*j] - tmp1 * csa[1+4*j]);\ | 1787 ptr[-1-j] = tmp0 * csa[0+4*j] - tmp1 * csa[1+4*j];\ |
1784 ptr[ j] = lrintf(tmp0 * csa[1+4*j] + tmp1 * csa[0+4*j]); | 1788 ptr[ j] = tmp0 * csa[1+4*j] + tmp1 * csa[0+4*j]; |
1785 | 1789 |
1786 FLOAT_AA(0) | 1790 FLOAT_AA(0) |
1787 FLOAT_AA(1) | 1791 FLOAT_AA(1) |
1788 FLOAT_AA(2) | 1792 FLOAT_AA(2) |
1789 FLOAT_AA(3) | 1793 FLOAT_AA(3) |
1796 } | 1800 } |
1797 } | 1801 } |
1798 | 1802 |
1799 static void compute_imdct(MPADecodeContext *s, | 1803 static void compute_imdct(MPADecodeContext *s, |
1800 GranuleDef *g, | 1804 GranuleDef *g, |
1801 int32_t *sb_samples, | 1805 INTFLOAT *sb_samples, |
1802 int32_t *mdct_buf) | 1806 INTFLOAT *mdct_buf) |
1803 { | 1807 { |
1804 int32_t *ptr, *win, *win1, *buf, *out_ptr, *ptr1; | 1808 INTFLOAT *win, *win1, *out_ptr, *ptr, *buf, *ptr1; |
1805 int32_t out2[12]; | 1809 INTFLOAT out2[12]; |
1806 int i, j, mdct_long_end, v, sblimit; | 1810 int i, j, mdct_long_end, sblimit; |
1807 | 1811 |
1808 /* find last non zero block */ | 1812 /* find last non zero block */ |
1809 ptr = g->sb_hybrid + 576; | 1813 ptr = g->sb_hybrid + 576; |
1810 ptr1 = g->sb_hybrid + 2 * 18; | 1814 ptr1 = g->sb_hybrid + 2 * 18; |
1811 while (ptr >= ptr1) { | 1815 while (ptr >= ptr1) { |
1816 int32_t *p; | |
1812 ptr -= 6; | 1817 ptr -= 6; |
1813 v = ptr[0] | ptr[1] | ptr[2] | ptr[3] | ptr[4] | ptr[5]; | 1818 p= (int32_t*)ptr; |
1814 if (v != 0) | 1819 if(p[0] | p[1] | p[2] | p[3] | p[4] | p[5]) |
1815 break; | 1820 break; |
1816 } | 1821 } |
1817 sblimit = ((ptr - g->sb_hybrid) / 18) + 1; | 1822 sblimit = ((ptr - g->sb_hybrid) / 18) + 1; |
1818 | 1823 |
1819 if (g->block_type == 2) { | 1824 if (g->block_type == 2) { |
1852 *out_ptr = buf[i]; | 1857 *out_ptr = buf[i]; |
1853 out_ptr += SBLIMIT; | 1858 out_ptr += SBLIMIT; |
1854 } | 1859 } |
1855 imdct12(out2, ptr + 0); | 1860 imdct12(out2, ptr + 0); |
1856 for(i=0;i<6;i++) { | 1861 for(i=0;i<6;i++) { |
1857 *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*1]; | 1862 *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[i + 6*1]; |
1858 buf[i + 6*2] = MULH(out2[i + 6], win[i + 6]); | 1863 buf[i + 6*2] = MULH3(out2[i + 6], win[i + 6], 1); |
1859 out_ptr += SBLIMIT; | 1864 out_ptr += SBLIMIT; |
1860 } | 1865 } |
1861 imdct12(out2, ptr + 1); | 1866 imdct12(out2, ptr + 1); |
1862 for(i=0;i<6;i++) { | 1867 for(i=0;i<6;i++) { |
1863 *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*2]; | 1868 *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[i + 6*2]; |
1864 buf[i + 6*0] = MULH(out2[i + 6], win[i + 6]); | 1869 buf[i + 6*0] = MULH3(out2[i + 6], win[i + 6], 1); |
1865 out_ptr += SBLIMIT; | 1870 out_ptr += SBLIMIT; |
1866 } | 1871 } |
1867 imdct12(out2, ptr + 2); | 1872 imdct12(out2, ptr + 2); |
1868 for(i=0;i<6;i++) { | 1873 for(i=0;i<6;i++) { |
1869 buf[i + 6*0] = MULH(out2[i], win[i]) + buf[i + 6*0]; | 1874 buf[i + 6*0] = MULH3(out2[i ], win[i ], 1) + buf[i + 6*0]; |
1870 buf[i + 6*1] = MULH(out2[i + 6], win[i + 6]); | 1875 buf[i + 6*1] = MULH3(out2[i + 6], win[i + 6], 1); |
1871 buf[i + 6*2] = 0; | 1876 buf[i + 6*2] = 0; |
1872 } | 1877 } |
1873 ptr += 18; | 1878 ptr += 18; |
1874 buf += 18; | 1879 buf += 18; |
1875 } | 1880 } |
1890 static int mp_decode_layer3(MPADecodeContext *s) | 1895 static int mp_decode_layer3(MPADecodeContext *s) |
1891 { | 1896 { |
1892 int nb_granules, main_data_begin, private_bits; | 1897 int nb_granules, main_data_begin, private_bits; |
1893 int gr, ch, blocksplit_flag, i, j, k, n, bits_pos; | 1898 int gr, ch, blocksplit_flag, i, j, k, n, bits_pos; |
1894 GranuleDef *g; | 1899 GranuleDef *g; |
1895 int16_t exponents[576]; | 1900 int16_t exponents[576]; //FIXME try INTFLOAT |
1896 | 1901 |
1897 /* read side info */ | 1902 /* read side info */ |
1898 if (s->lsf) { | 1903 if (s->lsf) { |
1899 main_data_begin = get_bits(&s->gb, 8); | 1904 main_data_begin = get_bits(&s->gb, 8); |
1900 private_bits = get_bits(&s->gb, s->nb_channels); | 1905 private_bits = get_bits(&s->gb, s->nb_channels); |
2120 | 2125 |
2121 for(ch=0;ch<s->nb_channels;ch++) { | 2126 for(ch=0;ch<s->nb_channels;ch++) { |
2122 g = &s->granules[ch][gr]; | 2127 g = &s->granules[ch][gr]; |
2123 | 2128 |
2124 reorder_block(s, g); | 2129 reorder_block(s, g); |
2125 s->compute_antialias(s, g); | 2130 compute_antialias(s, g); |
2126 compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]); | 2131 compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]); |
2127 } | 2132 } |
2128 } /* gr */ | 2133 } /* gr */ |
2129 if(get_bits_count(&s->gb)<0) | 2134 if(get_bits_count(&s->gb)<0) |
2130 skip_bits_long(&s->gb, -get_bits_count(&s->gb)); | 2135 skip_bits_long(&s->gb, -get_bits_count(&s->gb)); |
2189 | 2194 |
2190 /* apply the synthesis filter */ | 2195 /* apply the synthesis filter */ |
2191 for(ch=0;ch<s->nb_channels;ch++) { | 2196 for(ch=0;ch<s->nb_channels;ch++) { |
2192 samples_ptr = samples + ch; | 2197 samples_ptr = samples + ch; |
2193 for(i=0;i<nb_frames;i++) { | 2198 for(i=0;i<nb_frames;i++) { |
2194 ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]), | 2199 RENAME(ff_mpa_synth_filter)(s->synth_buf[ch], &(s->synth_buf_offset[ch]), |
2195 ff_mpa_synth_window, &s->dither_state, | 2200 RENAME(ff_mpa_synth_window), &s->dither_state, |
2196 samples_ptr, s->nb_channels, | 2201 samples_ptr, s->nb_channels, |
2197 s->sb_samples[ch][i]); | 2202 s->sb_samples[ch][i]); |
2198 samples_ptr += 32 * s->nb_channels; | 2203 samples_ptr += 32 * s->nb_channels; |
2199 } | 2204 } |
2200 } | 2205 } |
2384 /* Create a separate codec/context for each frame (first is already ok). | 2389 /* Create a separate codec/context for each frame (first is already ok). |
2385 * Each frame is 1 or 2 channels - up to 5 frames allowed | 2390 * Each frame is 1 or 2 channels - up to 5 frames allowed |
2386 */ | 2391 */ |
2387 for (i = 1; i < s->frames; i++) { | 2392 for (i = 1; i < s->frames; i++) { |
2388 s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext)); | 2393 s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext)); |
2389 s->mp3decctx[i]->compute_antialias = s->mp3decctx[0]->compute_antialias; | |
2390 s->mp3decctx[i]->adu_mode = 1; | 2394 s->mp3decctx[i]->adu_mode = 1; |
2391 s->mp3decctx[i]->avctx = avctx; | 2395 s->mp3decctx[i]->avctx = avctx; |
2392 } | 2396 } |
2393 | 2397 |
2394 return 0; | 2398 return 0; |
2478 *data_size = out_size; | 2482 *data_size = out_size; |
2479 return buf_size; | 2483 return buf_size; |
2480 } | 2484 } |
2481 #endif /* CONFIG_MP3ON4_DECODER */ | 2485 #endif /* CONFIG_MP3ON4_DECODER */ |
2482 | 2486 |
2487 #if !CONFIG_FLOAT | |
2483 #if CONFIG_MP1_DECODER | 2488 #if CONFIG_MP1_DECODER |
2484 AVCodec mp1_decoder = | 2489 AVCodec mp1_decoder = |
2485 { | 2490 { |
2486 "mp1", | 2491 "mp1", |
2487 AVMEDIA_TYPE_AUDIO, | 2492 AVMEDIA_TYPE_AUDIO, |
2557 decode_frame_mp3on4, | 2562 decode_frame_mp3on4, |
2558 .flush= flush, | 2563 .flush= flush, |
2559 .long_name= NULL_IF_CONFIG_SMALL("MP3onMP4"), | 2564 .long_name= NULL_IF_CONFIG_SMALL("MP3onMP4"), |
2560 }; | 2565 }; |
2561 #endif | 2566 #endif |
2567 #endif |