comparison celp_filters.c @ 10689:d124d9b688d0 libavcodec

Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
author vitor
date Wed, 16 Dec 2009 17:09:33 +0000
parents 8d536f190e6e
children 63451af5f8f9
comparison
equal deleted inserted replaced
10688:750102456f00 10689:d124d9b688d0
91 int buffer_length, 91 int buffer_length,
92 int filter_length) 92 int filter_length)
93 { 93 {
94 int i,n; 94 int i,n;
95 95
96 for (n = 0; n < buffer_length; n++) { 96 float out0, out1, out2, out3;
97 float old_out0, old_out1, old_out2, old_out3;
98 float a,b,c;
99
100 a = filter_coeffs[0];
101 b = filter_coeffs[1];
102 c = filter_coeffs[2];
103 b -= filter_coeffs[0] * filter_coeffs[0];
104 c -= filter_coeffs[1] * filter_coeffs[0];
105 c -= filter_coeffs[0] * b;
106
107 old_out0 = out[-4];
108 old_out1 = out[-3];
109 old_out2 = out[-2];
110 old_out3 = out[-1];
111 for (n = 0; n <= buffer_length - 4; n+=4) {
112 float tmp0,tmp1,tmp2,tmp3;
113 float val;
114
115 out0 = in[0];
116 out1 = in[1];
117 out2 = in[2];
118 out3 = in[3];
119
120 out0 -= filter_coeffs[2] * old_out1;
121 out1 -= filter_coeffs[2] * old_out2;
122 out2 -= filter_coeffs[2] * old_out3;
123
124 out0 -= filter_coeffs[1] * old_out2;
125 out1 -= filter_coeffs[1] * old_out3;
126
127 out0 -= filter_coeffs[0] * old_out3;
128
129 val = filter_coeffs[3];
130
131 out0 -= val * old_out0;
132 out1 -= val * old_out1;
133 out2 -= val * old_out2;
134 out3 -= val * old_out3;
135
136 old_out3 = out[-5];
137
138 for (i = 5; i <= filter_length; i += 2) {
139 val = filter_coeffs[i-1];
140
141 out0 -= val * old_out3;
142 out1 -= val * old_out0;
143 out2 -= val * old_out1;
144 out3 -= val * old_out2;
145
146 old_out2 = out[-i-1];
147
148 val = filter_coeffs[i];
149
150 out0 -= val * old_out2;
151 out1 -= val * old_out3;
152 out2 -= val * old_out0;
153 out3 -= val * old_out1;
154
155 FFSWAP(float, old_out0, old_out2);
156 old_out1 = old_out3;
157 old_out3 = out[-i-2];
158 }
159
160 tmp0 = out0;
161 tmp1 = out1;
162 tmp2 = out2;
163 tmp3 = out3;
164
165 out3 -= a * tmp2;
166 out2 -= a * tmp1;
167 out1 -= a * tmp0;
168
169 out3 -= b * tmp1;
170 out2 -= b * tmp0;
171
172 out3 -= c * tmp0;
173
174
175 out[0] = out0;
176 out[1] = out1;
177 out[2] = out2;
178 out[3] = out3;
179
180 old_out0 = out0;
181 old_out1 = out1;
182 old_out2 = out2;
183 old_out3 = out3;
184
185 out += 4;
186 in += 4;
187 }
188
189 out -= n;
190 in -= n;
191 for (; n < buffer_length; n++) {
97 out[n] = in[n]; 192 out[n] = in[n];
98 for (i = 1; i <= filter_length; i++) 193 for (i = 1; i <= filter_length; i++)
99 out[n] -= filter_coeffs[i-1] * out[n-i]; 194 out[n] -= filter_coeffs[i-1] * out[n-i];
100 } 195 }
101 } 196 }