comparison flacenc.c @ 5724:96d1b6c30aad libavcodec

unroll encode_residual_lpc(). speedup varies between 1.2x and 1.8x depending on lpc order.
author lorenm
date Thu, 27 Sep 2007 02:42:00 +0000
parents d1707b860013
children 81fff8db79cc
comparison
equal deleted inserted replaced
5723:49a5d44423ef 5724:96d1b6c30aad
832 for(i=order; i<n; i++) 832 for(i=order; i<n; i++)
833 res[i]= smp[i] - 4*smp[i-1] + 6*smp[i-2] - 4*smp[i-3] + smp[i-4]; 833 res[i]= smp[i] - 4*smp[i-1] + 6*smp[i-2] - 4*smp[i-3] + smp[i-4];
834 } 834 }
835 } 835 }
836 836
837 #define LPC1(x) {\
838 int s = smp[i-(x)+1];\
839 p1 += c*s;\
840 c = coefs[(x)-2];\
841 p0 += c*s;\
842 }
843
844 static av_always_inline void encode_residual_lpc_unrolled(
845 int32_t *res, const int32_t *smp, int n,
846 int order, const int32_t *coefs, int shift, int big)
847 {
848 int i;
849 for(i=order; i<n; i+=2) {
850 int c = coefs[order-1];
851 int p0 = c * smp[i-order];
852 int p1 = 0;
853 if(big) {
854 switch(order) {
855 case 32: LPC1(32)
856 case 31: LPC1(31)
857 case 30: LPC1(30)
858 case 29: LPC1(29)
859 case 28: LPC1(28)
860 case 27: LPC1(27)
861 case 26: LPC1(26)
862 case 25: LPC1(25)
863 case 24: LPC1(24)
864 case 23: LPC1(23)
865 case 22: LPC1(22)
866 case 21: LPC1(21)
867 case 20: LPC1(20)
868 case 19: LPC1(19)
869 case 18: LPC1(18)
870 case 17: LPC1(17)
871 case 16: LPC1(16)
872 case 15: LPC1(15)
873 case 14: LPC1(14)
874 case 13: LPC1(13)
875 case 12: LPC1(12)
876 case 11: LPC1(11)
877 case 10: LPC1(10)
878 case 9: LPC1( 9)
879 LPC1( 8)
880 LPC1( 7)
881 LPC1( 6)
882 LPC1( 5)
883 LPC1( 4)
884 LPC1( 3)
885 LPC1( 2)
886 }
887 } else {
888 switch(order) {
889 case 8: LPC1( 8)
890 case 7: LPC1( 7)
891 case 6: LPC1( 6)
892 case 5: LPC1( 5)
893 case 4: LPC1( 4)
894 case 3: LPC1( 3)
895 case 2: LPC1( 2)
896 }
897 }
898 p1 += c * smp[i];
899 res[i ] = smp[i ] - (p0 >> shift);
900 res[i+1] = smp[i+1] - (p1 >> shift);
901 }
902 }
903
837 static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n, 904 static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n,
838 int order, const int32_t *coefs, int shift) 905 int order, const int32_t *coefs, int shift)
839 { 906 {
840 int i, j; 907 int i;
841
842 for(i=0; i<order; i++) { 908 for(i=0; i<order; i++) {
843 res[i] = smp[i]; 909 res[i] = smp[i];
844 } 910 }
911 #ifdef CONFIG_SMALL
845 for(i=order; i<n; i+=2) { 912 for(i=order; i<n; i+=2) {
913 int j;
846 int32_t c = coefs[0]; 914 int32_t c = coefs[0];
847 int32_t p0 = 0, p1 = c*smp[i]; 915 int32_t p0 = 0, p1 = c*smp[i];
848 for(j=1; j<order; j++) { 916 for(j=1; j<order; j++) {
849 int32_t s = smp[i-j]; 917 int32_t s = smp[i-j];
850 p0 += c*s; 918 p0 += c*s;
853 } 921 }
854 p0 += c*smp[i-order]; 922 p0 += c*smp[i-order];
855 res[i+0] = smp[i+0] - (p0 >> shift); 923 res[i+0] = smp[i+0] - (p0 >> shift);
856 res[i+1] = smp[i+1] - (p1 >> shift); 924 res[i+1] = smp[i+1] - (p1 >> shift);
857 } 925 }
926 #else
927 switch(order) {
928 case 1: encode_residual_lpc_unrolled(res, smp, n, 1, coefs, shift, 0); break;
929 case 2: encode_residual_lpc_unrolled(res, smp, n, 2, coefs, shift, 0); break;
930 case 3: encode_residual_lpc_unrolled(res, smp, n, 3, coefs, shift, 0); break;
931 case 4: encode_residual_lpc_unrolled(res, smp, n, 4, coefs, shift, 0); break;
932 case 5: encode_residual_lpc_unrolled(res, smp, n, 5, coefs, shift, 0); break;
933 case 6: encode_residual_lpc_unrolled(res, smp, n, 6, coefs, shift, 0); break;
934 case 7: encode_residual_lpc_unrolled(res, smp, n, 7, coefs, shift, 0); break;
935 case 8: encode_residual_lpc_unrolled(res, smp, n, 8, coefs, shift, 0); break;
936 default: encode_residual_lpc_unrolled(res, smp, n, order, coefs, shift, 1); break;
937 }
938 #endif
858 } 939 }
859 940
860 static int encode_residual(FlacEncodeContext *ctx, int ch) 941 static int encode_residual(FlacEncodeContext *ctx, int ch)
861 { 942 {
862 int i, n; 943 int i, n;