Mercurial > libavcodec.hg
annotate celp_filters.c @ 12197:fbf4d5b1b664 libavcodec
Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than
regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag,
FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that
have been checked specifically on such CPUs and are actually faster than
their MMX counterparts.
In addition, use this flag to enable particular VP8 and LPC SSE2 functions
that are faster than their MMX counterparts.
Based on a patch by Loren Merritt <lorenm AT u washington edu>.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 22:38:23 +0000 |
parents | fb56293a501a |
children |
rev | line source |
---|---|
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
1 /* |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
2 * various filters for ACELP-based codecs |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
3 * |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
4 * Copyright (c) 2008 Vladimir Voroshilov |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
5 * |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
6 * This file is part of FFmpeg. |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
7 * |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
12 * |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
16 * Lesser General Public License for more details. |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
17 * |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
21 */ |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
22 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
23 #include <inttypes.h> |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
24 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
25 #include "avcodec.h" |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
26 #include "celp_filters.h" |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
27 |
10690
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
28 void ff_celp_convolve_circ(int16_t* fc_out, const int16_t* fc_in, |
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
29 const int16_t* filter, int len) |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
30 { |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
31 int i, k; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
32 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
33 memset(fc_out, 0, len * sizeof(int16_t)); |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
34 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
35 /* Since there are few pulses over an entire subframe (i.e. almost |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
36 all fc_in[i] are zero) it is faster to loop over fc_in first. */ |
10003 | 37 for (i = 0; i < len; i++) { |
38 if (fc_in[i]) { | |
39 for (k = 0; k < i; k++) | |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
40 fc_out[k] += (fc_in[i] * filter[len + k - i]) >> 15; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
41 |
10003 | 42 for (k = i; k < len; k++) |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
43 fc_out[k] += (fc_in[i] * filter[ k - i]) >> 15; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
44 } |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
45 } |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
46 } |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
47 |
10045
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
48 void ff_celp_circ_addf(float *out, const float *in, |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
49 const float *lagged, int lag, float fac, int n) |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
50 { |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
51 int k; |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
52 for (k = 0; k < lag; k++) |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
53 out[k] = in[k] + fac * lagged[n + k - lag]; |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
54 for (; k < n; k++) |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
55 out[k] = in[k] + fac * lagged[ k - lag]; |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
56 } |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
57 |
10690
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
58 int ff_celp_lp_synthesis_filter(int16_t *out, const int16_t *filter_coeffs, |
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
59 const int16_t *in, int buffer_length, |
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
60 int filter_length, int stop_on_overflow, |
10003 | 61 int rounder) |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
62 { |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
63 int i,n; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
64 |
10003 | 65 for (n = 0; n < buffer_length; n++) { |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
66 int sum = rounder; |
10503
8d536f190e6e
And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents:
10045
diff
changeset
|
67 for (i = 1; i <= filter_length; i++) |
9446
1a3865d1b049
Fix possibly harmful outbound addressing. Patch by Kenan Gillet.
reynaldo
parents:
9017
diff
changeset
|
68 sum -= filter_coeffs[i-1] * out[n-i]; |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
69 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
70 sum = (sum >> 12) + in[n]; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
71 |
10003 | 72 if (sum + 0x8000 > 0xFFFFU) { |
73 if (stop_on_overflow) | |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
74 return 1; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
75 sum = (sum >> 31) ^ 32767; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
76 } |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
77 out[n] = sum; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
78 } |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
79 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
80 return 0; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
81 } |
8091 | 82 |
10690
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
83 void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, |
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
84 const float* in, int buffer_length, |
10003 | 85 int filter_length) |
8091 | 86 { |
87 int i,n; | |
88 | |
11329 | 89 #if 0 // Unoptimized code path for improved readability |
11328
51d5b29fc4fc
Add commented-out unoptimized code to improve readability
vitor
parents:
10690
diff
changeset
|
90 for (n = 0; n < buffer_length; n++) { |
51d5b29fc4fc
Add commented-out unoptimized code to improve readability
vitor
parents:
10690
diff
changeset
|
91 out[n] = in[n]; |
51d5b29fc4fc
Add commented-out unoptimized code to improve readability
vitor
parents:
10690
diff
changeset
|
92 for (i = 1; i <= filter_length; i++) |
51d5b29fc4fc
Add commented-out unoptimized code to improve readability
vitor
parents:
10690
diff
changeset
|
93 out[n] -= filter_coeffs[i-1] * out[n-i]; |
51d5b29fc4fc
Add commented-out unoptimized code to improve readability
vitor
parents:
10690
diff
changeset
|
94 } |
51d5b29fc4fc
Add commented-out unoptimized code to improve readability
vitor
parents:
10690
diff
changeset
|
95 #else |
10689
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
96 float out0, out1, out2, out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
97 float old_out0, old_out1, old_out2, old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
98 float a,b,c; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
99 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
100 a = filter_coeffs[0]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
101 b = filter_coeffs[1]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
102 c = filter_coeffs[2]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
103 b -= filter_coeffs[0] * filter_coeffs[0]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
104 c -= filter_coeffs[1] * filter_coeffs[0]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
105 c -= filter_coeffs[0] * b; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
106 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
107 old_out0 = out[-4]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
108 old_out1 = out[-3]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
109 old_out2 = out[-2]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
110 old_out3 = out[-1]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
111 for (n = 0; n <= buffer_length - 4; n+=4) { |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
112 float tmp0,tmp1,tmp2,tmp3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
113 float val; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
114 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
115 out0 = in[0]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
116 out1 = in[1]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
117 out2 = in[2]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
118 out3 = in[3]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
119 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
120 out0 -= filter_coeffs[2] * old_out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
121 out1 -= filter_coeffs[2] * old_out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
122 out2 -= filter_coeffs[2] * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
123 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
124 out0 -= filter_coeffs[1] * old_out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
125 out1 -= filter_coeffs[1] * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
126 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
127 out0 -= filter_coeffs[0] * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
128 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
129 val = filter_coeffs[3]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
130 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
131 out0 -= val * old_out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
132 out1 -= val * old_out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
133 out2 -= val * old_out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
134 out3 -= val * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
135 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
136 old_out3 = out[-5]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
137 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
138 for (i = 5; i <= filter_length; i += 2) { |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
139 val = filter_coeffs[i-1]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
140 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
141 out0 -= val * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
142 out1 -= val * old_out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
143 out2 -= val * old_out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
144 out3 -= val * old_out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
145 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
146 old_out2 = out[-i-1]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
147 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
148 val = filter_coeffs[i]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
149 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
150 out0 -= val * old_out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
151 out1 -= val * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
152 out2 -= val * old_out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
153 out3 -= val * old_out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
154 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
155 FFSWAP(float, old_out0, old_out2); |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
156 old_out1 = old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
157 old_out3 = out[-i-2]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
158 } |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
159 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
160 tmp0 = out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
161 tmp1 = out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
162 tmp2 = out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
163 tmp3 = out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
164 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
165 out3 -= a * tmp2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
166 out2 -= a * tmp1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
167 out1 -= a * tmp0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
168 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
169 out3 -= b * tmp1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
170 out2 -= b * tmp0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
171 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
172 out3 -= c * tmp0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
173 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
174 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
175 out[0] = out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
176 out[1] = out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
177 out[2] = out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
178 out[3] = out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
179 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
180 old_out0 = out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
181 old_out1 = out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
182 old_out2 = out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
183 old_out3 = out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
184 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
185 out += 4; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
186 in += 4; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
187 } |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
188 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
189 out -= n; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
190 in -= n; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
191 for (; n < buffer_length; n++) { |
8091 | 192 out[n] = in[n]; |
10503
8d536f190e6e
And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents:
10045
diff
changeset
|
193 for (i = 1; i <= filter_length; i++) |
9446
1a3865d1b049
Fix possibly harmful outbound addressing. Patch by Kenan Gillet.
reynaldo
parents:
9017
diff
changeset
|
194 out[n] -= filter_coeffs[i-1] * out[n-i]; |
8091 | 195 } |
11328
51d5b29fc4fc
Add commented-out unoptimized code to improve readability
vitor
parents:
10690
diff
changeset
|
196 #endif |
8091 | 197 } |
9509
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
198 |
10690
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
199 void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, |
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
200 const float *in, int buffer_length, |
10003 | 201 int filter_length) |
9509
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
202 { |
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
203 int i,n; |
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
204 |
10003 | 205 for (n = 0; n < buffer_length; n++) { |
9509
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
206 out[n] = in[n]; |
10503
8d536f190e6e
And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents:
10045
diff
changeset
|
207 for (i = 1; i <= filter_length; i++) |
10013
454cb6aa43a3
Correct the sign of the arithmetic in ff_celp_lp_zero_synthesis_filterf()
superdump
parents:
10003
diff
changeset
|
208 out[n] += filter_coeffs[i-1] * in[n-i]; |
9509
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
209 } |
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
210 } |