annotate celp_filters.c @ 12498:c997f09d1e10 libavcodec

Move hadamard_diff{,16}_{mmx,mmx2,sse2,ssse3}() from inline asm to yasm, which will hopefully solve the Win64/FATE failures caused by these functions.
author rbultje
date Fri, 17 Sep 2010 01:56:06 +0000
parents fb56293a501a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
1 /*
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
2 * various filters for ACELP-based codecs
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
3 *
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
4 * Copyright (c) 2008 Vladimir Voroshilov
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
5 *
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
6 * This file is part of FFmpeg.
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
7 *
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
8 * FFmpeg is free software; you can redistribute it and/or
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
9 * modify it under the terms of the GNU Lesser General Public
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
10 * License as published by the Free Software Foundation; either
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
11 * version 2.1 of the License, or (at your option) any later version.
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
12 *
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
13 * FFmpeg is distributed in the hope that it will be useful,
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
16 * Lesser General Public License for more details.
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
17 *
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
18 * You should have received a copy of the GNU Lesser General Public
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
19 * License along with FFmpeg; if not, write to the Free Software
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
21 */
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
22
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
23 #include <inttypes.h>
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
24
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
25 #include "avcodec.h"
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
26 #include "celp_filters.h"
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
27
10690
63451af5f8f9 Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents: 10689
diff changeset
28 void ff_celp_convolve_circ(int16_t* fc_out, const int16_t* fc_in,
63451af5f8f9 Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents: 10689
diff changeset
29 const int16_t* filter, int len)
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
30 {
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
31 int i, k;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
32
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
33 memset(fc_out, 0, len * sizeof(int16_t));
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
34
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
35 /* Since there are few pulses over an entire subframe (i.e. almost
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
36 all fc_in[i] are zero) it is faster to loop over fc_in first. */
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
37 for (i = 0; i < len; i++) {
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
38 if (fc_in[i]) {
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
39 for (k = 0; k < i; k++)
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
40 fc_out[k] += (fc_in[i] * filter[len + k - i]) >> 15;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
41
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
42 for (k = i; k < len; k++)
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
43 fc_out[k] += (fc_in[i] * filter[ k - i]) >> 15;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
44 }
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
45 }
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
46 }
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
47
10045
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
48 void ff_celp_circ_addf(float *out, const float *in,
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
49 const float *lagged, int lag, float fac, int n)
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
50 {
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
51 int k;
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
52 for (k = 0; k < lag; k++)
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
53 out[k] = in[k] + fac * lagged[n + k - lag];
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
54 for (; k < n; k++)
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
55 out[k] = in[k] + fac * lagged[ k - lag];
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
56 }
d35904b4fe3f Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents: 10013
diff changeset
57
10690
63451af5f8f9 Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents: 10689
diff changeset
58 int ff_celp_lp_synthesis_filter(int16_t *out, const int16_t *filter_coeffs,
63451af5f8f9 Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents: 10689
diff changeset
59 const int16_t *in, int buffer_length,
63451af5f8f9 Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents: 10689
diff changeset
60 int filter_length, int stop_on_overflow,
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
61 int rounder)
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
62 {
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
63 int i,n;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
64
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
65 for (n = 0; n < buffer_length; n++) {
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
66 int sum = rounder;
10503
8d536f190e6e And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents: 10045
diff changeset
67 for (i = 1; i <= filter_length; i++)
9446
1a3865d1b049 Fix possibly harmful outbound addressing. Patch by Kenan Gillet.
reynaldo
parents: 9017
diff changeset
68 sum -= filter_coeffs[i-1] * out[n-i];
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
69
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
70 sum = (sum >> 12) + in[n];
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
71
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
72 if (sum + 0x8000 > 0xFFFFU) {
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
73 if (stop_on_overflow)
8049
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
74 return 1;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
75 sum = (sum >> 31) ^ 32767;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
76 }
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
77 out[n] = sum;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
78 }
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
79
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
80 return 0;
611a21e4b01b Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff changeset
81 }
8091
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
82
10690
63451af5f8f9 Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents: 10689
diff changeset
83 void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs,
63451af5f8f9 Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents: 10689
diff changeset
84 const float* in, int buffer_length,
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
85 int filter_length)
8091
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
86 {
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
87 int i,n;
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
88
11329
fb56293a501a Fix spelling in comment
vitor
parents: 11328
diff changeset
89 #if 0 // Unoptimized code path for improved readability
11328
51d5b29fc4fc Add commented-out unoptimized code to improve readability
vitor
parents: 10690
diff changeset
90 for (n = 0; n < buffer_length; n++) {
51d5b29fc4fc Add commented-out unoptimized code to improve readability
vitor
parents: 10690
diff changeset
91 out[n] = in[n];
51d5b29fc4fc Add commented-out unoptimized code to improve readability
vitor
parents: 10690
diff changeset
92 for (i = 1; i <= filter_length; i++)
51d5b29fc4fc Add commented-out unoptimized code to improve readability
vitor
parents: 10690
diff changeset
93 out[n] -= filter_coeffs[i-1] * out[n-i];
51d5b29fc4fc Add commented-out unoptimized code to improve readability
vitor
parents: 10690
diff changeset
94 }
51d5b29fc4fc Add commented-out unoptimized code to improve readability
vitor
parents: 10690
diff changeset
95 #else
10689
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
96 float out0, out1, out2, out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
97 float old_out0, old_out1, old_out2, old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
98 float a,b,c;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
99
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
100 a = filter_coeffs[0];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
101 b = filter_coeffs[1];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
102 c = filter_coeffs[2];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
103 b -= filter_coeffs[0] * filter_coeffs[0];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
104 c -= filter_coeffs[1] * filter_coeffs[0];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
105 c -= filter_coeffs[0] * b;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
106
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
107 old_out0 = out[-4];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
108 old_out1 = out[-3];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
109 old_out2 = out[-2];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
110 old_out3 = out[-1];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
111 for (n = 0; n <= buffer_length - 4; n+=4) {
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
112 float tmp0,tmp1,tmp2,tmp3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
113 float val;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
114
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
115 out0 = in[0];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
116 out1 = in[1];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
117 out2 = in[2];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
118 out3 = in[3];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
119
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
120 out0 -= filter_coeffs[2] * old_out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
121 out1 -= filter_coeffs[2] * old_out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
122 out2 -= filter_coeffs[2] * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
123
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
124 out0 -= filter_coeffs[1] * old_out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
125 out1 -= filter_coeffs[1] * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
126
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
127 out0 -= filter_coeffs[0] * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
128
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
129 val = filter_coeffs[3];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
130
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
131 out0 -= val * old_out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
132 out1 -= val * old_out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
133 out2 -= val * old_out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
134 out3 -= val * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
135
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
136 old_out3 = out[-5];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
137
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
138 for (i = 5; i <= filter_length; i += 2) {
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
139 val = filter_coeffs[i-1];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
140
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
141 out0 -= val * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
142 out1 -= val * old_out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
143 out2 -= val * old_out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
144 out3 -= val * old_out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
145
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
146 old_out2 = out[-i-1];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
147
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
148 val = filter_coeffs[i];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
149
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
150 out0 -= val * old_out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
151 out1 -= val * old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
152 out2 -= val * old_out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
153 out3 -= val * old_out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
154
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
155 FFSWAP(float, old_out0, old_out2);
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
156 old_out1 = old_out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
157 old_out3 = out[-i-2];
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
158 }
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
159
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
160 tmp0 = out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
161 tmp1 = out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
162 tmp2 = out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
163 tmp3 = out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
164
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
165 out3 -= a * tmp2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
166 out2 -= a * tmp1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
167 out1 -= a * tmp0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
168
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
169 out3 -= b * tmp1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
170 out2 -= b * tmp0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
171
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
172 out3 -= c * tmp0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
173
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
174
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
175 out[0] = out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
176 out[1] = out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
177 out[2] = out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
178 out[3] = out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
179
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
180 old_out0 = out0;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
181 old_out1 = out1;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
182 old_out2 = out2;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
183 old_out3 = out3;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
184
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
185 out += 4;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
186 in += 4;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
187 }
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
188
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
189 out -= n;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
190 in -= n;
d124d9b688d0 Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents: 10503
diff changeset
191 for (; n < buffer_length; n++) {
8091
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
192 out[n] = in[n];
10503
8d536f190e6e And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents: 10045
diff changeset
193 for (i = 1; i <= filter_length; i++)
9446
1a3865d1b049 Fix possibly harmful outbound addressing. Patch by Kenan Gillet.
reynaldo
parents: 9017
diff changeset
194 out[n] -= filter_coeffs[i-1] * out[n-i];
8091
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
195 }
11328
51d5b29fc4fc Add commented-out unoptimized code to improve readability
vitor
parents: 10690
diff changeset
196 #endif
8091
4c95f44c4c23 Add a LPC filter
vitor
parents: 8049
diff changeset
197 }
9509
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
198
10690
63451af5f8f9 Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents: 10689
diff changeset
199 void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs,
63451af5f8f9 Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents: 10689
diff changeset
200 const float *in, int buffer_length,
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
201 int filter_length)
9509
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
202 {
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
203 int i,n;
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
204
10003
24952f1a8979 cosmetics: K&R coding style
diego
parents: 9509
diff changeset
205 for (n = 0; n < buffer_length; n++) {
9509
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
206 out[n] = in[n];
10503
8d536f190e6e And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents: 10045
diff changeset
207 for (i = 1; i <= filter_length; i++)
10013
454cb6aa43a3 Correct the sign of the arithmetic in ff_celp_lp_zero_synthesis_filterf()
superdump
parents: 10003
diff changeset
208 out[n] += filter_coeffs[i-1] * in[n-i];
9509
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
209 }
2838045383c5 Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents: 9447
diff changeset
210 }