annotate celp_filters.c @ 10689:d124d9b688d0 libavcodec

Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
author vitor
date Wed, 16 Dec 2009 17:09:33 +0000
parents 8d536f190e6e
children 63451af5f8f9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
1 /*
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
2 * various filters for ACELP-based codecs
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
3 *
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
4 * Copyright (c) 2008 Vladimir Voroshilov
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
5 *
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
6 * This file is part of FFmpeg.
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
7 *
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
8 * FFmpeg is free software; you can redistribute it and/or
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
10 * License as published by the Free Software Foundation; either
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
11 * version 2.1 of the License, or (at your option) any later version.
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
12 *
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
13 * FFmpeg is distributed in the hope that it will be useful,
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
16 * Lesser General Public License for more details.
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
17 *
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
19 * License along with FFmpeg; if not, write to the Free Software
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
21 */
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
22
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
23 #include <inttypes.h>
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
24
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
25 #include "avcodec.h"
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
26 #include "celp_filters.h"
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
27
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
28 void ff_celp_convolve_circ(int16_t* fc_out,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
29 const int16_t* fc_in,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
30 const int16_t* filter,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
31 int len)
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
32 {
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
33 int i, k;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
34
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
35 memset(fc_out, 0, len * sizeof(int16_t));
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
36
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
37 /* Since there are few pulses over an entire subframe (i.e. almost
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
38 all fc_in[i] are zero) it is faster to loop over fc_in first. */
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
39 for (i = 0; i < len; i++) {
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
40 if (fc_in[i]) {
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
41 for (k = 0; k < i; k++)
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
42 fc_out[k] += (fc_in[i] * filter[len + k - i]) >> 15;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
43
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
44 for (k = i; k < len; k++)
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
45 fc_out[k] += (fc_in[i] * filter[ k - i]) >> 15;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
46 }
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
47 }
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
48 }
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
49
10045
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
50 void ff_celp_circ_addf(float *out, const float *in,
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
51 const float *lagged, int lag, float fac, int n)
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
52 {
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
53 int k;
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
54 for (k = 0; k < lag; k++)
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
55 out[k] = in[k] + fac * lagged[n + k - lag];
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
56 for (; k < n; k++)
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
57 out[k] = in[k] + fac * lagged[ k - lag];
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
58 }
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
59
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
60 int ff_celp_lp_synthesis_filter(int16_t *out,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
61 const int16_t* filter_coeffs,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
62 const int16_t* in,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
63 int buffer_length,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
64 int filter_length,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
65 int stop_on_overflow,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
66 int rounder)
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
67 {
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
68 int i,n;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
69
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
70 for (n = 0; n < buffer_length; n++) {
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
71 int sum = rounder;
10503
8d536f190e6e And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents: 10045
diff changeset
72 for (i = 1; i <= filter_length; i++)
9446
1a3865d1b049 Fix possibly harmful outbound addressing. Patch by Kenan Gillet.
reynaldo
parents: 9017
diff changeset
73 sum -= filter_coeffs[i-1] * out[n-i];
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
74
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
75 sum = (sum >> 12) + in[n];
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
76
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
77 if (sum + 0x8000 > 0xFFFFU) {
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
78 if (stop_on_overflow)
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
79 return 1;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
80 sum = (sum >> 31) ^ 32767;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
81 }
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
82 out[n] = sum;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
83 }
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
84
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
85 return 0;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
86 }
8091
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
87
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
88 void ff_celp_lp_synthesis_filterf(float *out,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
89 const float* filter_coeffs,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
90 const float* in,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
91 int buffer_length,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
92 int filter_length)
8091
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
93 {
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
94 int i,n;
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
95
10689
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
96 float out0, out1, out2, out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
97 float old_out0, old_out1, old_out2, old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
98 float a,b,c;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
99
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
100 a = filter_coeffs[0];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
101 b = filter_coeffs[1];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
102 c = filter_coeffs[2];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
103 b -= filter_coeffs[0] * filter_coeffs[0];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
104 c -= filter_coeffs[1] * filter_coeffs[0];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
105 c -= filter_coeffs[0] * b;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
106
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
107 old_out0 = out[-4];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
108 old_out1 = out[-3];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
109 old_out2 = out[-2];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
110 old_out3 = out[-1];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
111 for (n = 0; n <= buffer_length - 4; n+=4) {
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
112 float tmp0,tmp1,tmp2,tmp3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
113 float val;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
114
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
115 out0 = in[0];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
116 out1 = in[1];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
117 out2 = in[2];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
118 out3 = in[3];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
119
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
120 out0 -= filter_coeffs[2] * old_out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
121 out1 -= filter_coeffs[2] * old_out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
122 out2 -= filter_coeffs[2] * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
123
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
124 out0 -= filter_coeffs[1] * old_out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
125 out1 -= filter_coeffs[1] * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
126
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
127 out0 -= filter_coeffs[0] * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
128
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
129 val = filter_coeffs[3];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
130
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
131 out0 -= val * old_out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
132 out1 -= val * old_out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
133 out2 -= val * old_out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
134 out3 -= val * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
135
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
136 old_out3 = out[-5];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
137
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
138 for (i = 5; i <= filter_length; i += 2) {
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
139 val = filter_coeffs[i-1];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
140
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
141 out0 -= val * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
142 out1 -= val * old_out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
143 out2 -= val * old_out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
144 out3 -= val * old_out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
145
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
146 old_out2 = out[-i-1];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
147
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
148 val = filter_coeffs[i];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
149
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
150 out0 -= val * old_out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
151 out1 -= val * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
152 out2 -= val * old_out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
153 out3 -= val * old_out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
154
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
155 FFSWAP(float, old_out0, old_out2);
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
156 old_out1 = old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
157 old_out3 = out[-i-2];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
158 }
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
159
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
160 tmp0 = out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
161 tmp1 = out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
162 tmp2 = out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
163 tmp3 = out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
164
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
165 out3 -= a * tmp2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
166 out2 -= a * tmp1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
167 out1 -= a * tmp0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
168
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
169 out3 -= b * tmp1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
170 out2 -= b * tmp0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
171
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
172 out3 -= c * tmp0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
173
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
174
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
175 out[0] = out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
176 out[1] = out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
177 out[2] = out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
178 out[3] = out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
179
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
180 old_out0 = out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
181 old_out1 = out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
182 old_out2 = out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
183 old_out3 = out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
184
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
185 out += 4;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
186 in += 4;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
187 }
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
188
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
189 out -= n;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
190 in -= n;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
191 for (; n < buffer_length; n++) {
8091
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
192 out[n] = in[n];
10503
8d536f190e6e And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents: 10045
diff changeset
193 for (i = 1; i <= filter_length; i++)
9446
1a3865d1b049 Fix possibly harmful outbound addressing. Patch by Kenan Gillet.
reynaldo
parents: 9017
diff changeset
194 out[n] -= filter_coeffs[i-1] * out[n-i];
8091
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
195 }
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
196 }
9509
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
197
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
198 void ff_celp_lp_zero_synthesis_filterf(float *out,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
199 const float* filter_coeffs,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
200 const float* in,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
201 int buffer_length,
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
202 int filter_length)
9509
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
203 {
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
204 int i,n;
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
205
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
206 for (n = 0; n < buffer_length; n++) {
9509
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
207 out[n] = in[n];
10503
8d536f190e6e And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents: 10045
diff changeset
208 for (i = 1; i <= filter_length; i++)
10013
454cb6aa43a3 Correct the sign of the arithmetic in ff_celp_lp_zero_synthesis_filterf()
superdump
parents: 10003
diff changeset
209 out[n] += filter_coeffs[i-1] * in[n-i];
9509
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
210 }
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
211 }