Mercurial > libavcodec.hg
comparison flacenc.c @ 5724:96d1b6c30aad libavcodec
unroll encode_residual_lpc(). speedup varies between 1.2x and 1.8x depending on lpc order.
author | lorenm |
---|---|
date | Thu, 27 Sep 2007 02:42:00 +0000 |
parents | d1707b860013 |
children | 81fff8db79cc |
comparison
equal
deleted
inserted
replaced
5723:49a5d44423ef | 5724:96d1b6c30aad |
---|---|
832 for(i=order; i<n; i++) | 832 for(i=order; i<n; i++) |
833 res[i]= smp[i] - 4*smp[i-1] + 6*smp[i-2] - 4*smp[i-3] + smp[i-4]; | 833 res[i]= smp[i] - 4*smp[i-1] + 6*smp[i-2] - 4*smp[i-3] + smp[i-4]; |
834 } | 834 } |
835 } | 835 } |
836 | 836 |
837 #define LPC1(x) {\ | |
838 int s = smp[i-(x)+1];\ | |
839 p1 += c*s;\ | |
840 c = coefs[(x)-2];\ | |
841 p0 += c*s;\ | |
842 } | |
843 | |
844 static av_always_inline void encode_residual_lpc_unrolled( | |
845 int32_t *res, const int32_t *smp, int n, | |
846 int order, const int32_t *coefs, int shift, int big) | |
847 { | |
848 int i; | |
849 for(i=order; i<n; i+=2) { | |
850 int c = coefs[order-1]; | |
851 int p0 = c * smp[i-order]; | |
852 int p1 = 0; | |
853 if(big) { | |
854 switch(order) { | |
855 case 32: LPC1(32) | |
856 case 31: LPC1(31) | |
857 case 30: LPC1(30) | |
858 case 29: LPC1(29) | |
859 case 28: LPC1(28) | |
860 case 27: LPC1(27) | |
861 case 26: LPC1(26) | |
862 case 25: LPC1(25) | |
863 case 24: LPC1(24) | |
864 case 23: LPC1(23) | |
865 case 22: LPC1(22) | |
866 case 21: LPC1(21) | |
867 case 20: LPC1(20) | |
868 case 19: LPC1(19) | |
869 case 18: LPC1(18) | |
870 case 17: LPC1(17) | |
871 case 16: LPC1(16) | |
872 case 15: LPC1(15) | |
873 case 14: LPC1(14) | |
874 case 13: LPC1(13) | |
875 case 12: LPC1(12) | |
876 case 11: LPC1(11) | |
877 case 10: LPC1(10) | |
878 case 9: LPC1( 9) | |
879 LPC1( 8) | |
880 LPC1( 7) | |
881 LPC1( 6) | |
882 LPC1( 5) | |
883 LPC1( 4) | |
884 LPC1( 3) | |
885 LPC1( 2) | |
886 } | |
887 } else { | |
888 switch(order) { | |
889 case 8: LPC1( 8) | |
890 case 7: LPC1( 7) | |
891 case 6: LPC1( 6) | |
892 case 5: LPC1( 5) | |
893 case 4: LPC1( 4) | |
894 case 3: LPC1( 3) | |
895 case 2: LPC1( 2) | |
896 } | |
897 } | |
898 p1 += c * smp[i]; | |
899 res[i ] = smp[i ] - (p0 >> shift); | |
900 res[i+1] = smp[i+1] - (p1 >> shift); | |
901 } | |
902 } | |
903 | |
837 static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n, | 904 static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n, |
838 int order, const int32_t *coefs, int shift) | 905 int order, const int32_t *coefs, int shift) |
839 { | 906 { |
840 int i, j; | 907 int i; |
841 | |
842 for(i=0; i<order; i++) { | 908 for(i=0; i<order; i++) { |
843 res[i] = smp[i]; | 909 res[i] = smp[i]; |
844 } | 910 } |
911 #ifdef CONFIG_SMALL | |
845 for(i=order; i<n; i+=2) { | 912 for(i=order; i<n; i+=2) { |
913 int j; | |
846 int32_t c = coefs[0]; | 914 int32_t c = coefs[0]; |
847 int32_t p0 = 0, p1 = c*smp[i]; | 915 int32_t p0 = 0, p1 = c*smp[i]; |
848 for(j=1; j<order; j++) { | 916 for(j=1; j<order; j++) { |
849 int32_t s = smp[i-j]; | 917 int32_t s = smp[i-j]; |
850 p0 += c*s; | 918 p0 += c*s; |
853 } | 921 } |
854 p0 += c*smp[i-order]; | 922 p0 += c*smp[i-order]; |
855 res[i+0] = smp[i+0] - (p0 >> shift); | 923 res[i+0] = smp[i+0] - (p0 >> shift); |
856 res[i+1] = smp[i+1] - (p1 >> shift); | 924 res[i+1] = smp[i+1] - (p1 >> shift); |
857 } | 925 } |
926 #else | |
927 switch(order) { | |
928 case 1: encode_residual_lpc_unrolled(res, smp, n, 1, coefs, shift, 0); break; | |
929 case 2: encode_residual_lpc_unrolled(res, smp, n, 2, coefs, shift, 0); break; | |
930 case 3: encode_residual_lpc_unrolled(res, smp, n, 3, coefs, shift, 0); break; | |
931 case 4: encode_residual_lpc_unrolled(res, smp, n, 4, coefs, shift, 0); break; | |
932 case 5: encode_residual_lpc_unrolled(res, smp, n, 5, coefs, shift, 0); break; | |
933 case 6: encode_residual_lpc_unrolled(res, smp, n, 6, coefs, shift, 0); break; | |
934 case 7: encode_residual_lpc_unrolled(res, smp, n, 7, coefs, shift, 0); break; | |
935 case 8: encode_residual_lpc_unrolled(res, smp, n, 8, coefs, shift, 0); break; | |
936 default: encode_residual_lpc_unrolled(res, smp, n, order, coefs, shift, 1); break; | |
937 } | |
938 #endif | |
858 } | 939 } |
859 | 940 |
860 static int encode_residual(FlacEncodeContext *ctx, int ch) | 941 static int encode_residual(FlacEncodeContext *ctx, int ch) |
861 { | 942 { |
862 int i, n; | 943 int i, n; |