Mercurial > libavcodec.hg
annotate celp_filters.c @ 11104:bb877c9cb102 libavcodec
Detect spatial direct MBs partitioned smaller than 16x16 that can be partitioned
as 16x16 (except ones changing interlacing relative to the colocated MB).
20 cycles slower during MV generation
175 cycles faster during MC
author | michael |
---|---|
date | Mon, 08 Feb 2010 16:23:05 +0000 |
parents | 63451af5f8f9 |
children | 51d5b29fc4fc |
rev | line source |
---|---|
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
1 /* |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
2 * various filters for ACELP-based codecs |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
3 * |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
4 * Copyright (c) 2008 Vladimir Voroshilov |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
5 * |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
6 * This file is part of FFmpeg. |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
7 * |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
12 * |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
16 * Lesser General Public License for more details. |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
17 * |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
21 */ |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
22 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
23 #include <inttypes.h> |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
24 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
25 #include "avcodec.h" |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
26 #include "celp_filters.h" |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
27 |
10690
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
28 void ff_celp_convolve_circ(int16_t* fc_out, const int16_t* fc_in, |
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
29 const int16_t* filter, int len) |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
30 { |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
31 int i, k; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
32 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
33 memset(fc_out, 0, len * sizeof(int16_t)); |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
34 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
35 /* Since there are few pulses over an entire subframe (i.e. almost |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
36 all fc_in[i] are zero) it is faster to loop over fc_in first. */ |
10003 | 37 for (i = 0; i < len; i++) { |
38 if (fc_in[i]) { | |
39 for (k = 0; k < i; k++) | |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
40 fc_out[k] += (fc_in[i] * filter[len + k - i]) >> 15; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
41 |
10003 | 42 for (k = i; k < len; k++) |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
43 fc_out[k] += (fc_in[i] * filter[ k - i]) >> 15; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
44 } |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
45 } |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
46 } |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
47 |
10045
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
48 void ff_celp_circ_addf(float *out, const float *in, |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
49 const float *lagged, int lag, float fac, int n) |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
50 { |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
51 int k; |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
52 for (k = 0; k < lag; k++) |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
53 out[k] = in[k] + fac * lagged[n + k - lag]; |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
54 for (; k < n; k++) |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
55 out[k] = in[k] + fac * lagged[ k - lag]; |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
56 } |
d35904b4fe3f
Add ff_celp_circ_addf() function to be used for sparse vector circular
superdump
parents:
10013
diff
changeset
|
57 |
10690
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
58 int ff_celp_lp_synthesis_filter(int16_t *out, const int16_t *filter_coeffs, |
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
59 const int16_t *in, int buffer_length, |
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
60 int filter_length, int stop_on_overflow, |
10003 | 61 int rounder) |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
62 { |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
63 int i,n; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
64 |
10003 | 65 for (n = 0; n < buffer_length; n++) { |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
66 int sum = rounder; |
10503
8d536f190e6e
And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents:
10045
diff
changeset
|
67 for (i = 1; i <= filter_length; i++) |
9446
1a3865d1b049
Fix possibly harmful outbound addressing. Patch by Kenan Gillet.
reynaldo
parents:
9017
diff
changeset
|
68 sum -= filter_coeffs[i-1] * out[n-i]; |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
69 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
70 sum = (sum >> 12) + in[n]; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
71 |
10003 | 72 if (sum + 0x8000 > 0xFFFFU) { |
73 if (stop_on_overflow) | |
8049
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
74 return 1; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
75 sum = (sum >> 31) ^ 32767; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
76 } |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
77 out[n] = sum; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
78 } |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
79 |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
80 return 0; |
611a21e4b01b
Split off celp_filters.[ch] from acelp_filters.[ch] for the QCELP decoder.
diego
parents:
diff
changeset
|
81 } |
8091 | 82 |
10690
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
83 void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, |
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
84 const float* in, int buffer_length, |
10003 | 85 int filter_length) |
8091 | 86 { |
87 int i,n; | |
88 | |
10689
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
89 float out0, out1, out2, out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
90 float old_out0, old_out1, old_out2, old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
91 float a,b,c; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
92 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
93 a = filter_coeffs[0]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
94 b = filter_coeffs[1]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
95 c = filter_coeffs[2]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
96 b -= filter_coeffs[0] * filter_coeffs[0]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
97 c -= filter_coeffs[1] * filter_coeffs[0]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
98 c -= filter_coeffs[0] * b; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
99 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
100 old_out0 = out[-4]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
101 old_out1 = out[-3]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
102 old_out2 = out[-2]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
103 old_out3 = out[-1]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
104 for (n = 0; n <= buffer_length - 4; n+=4) { |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
105 float tmp0,tmp1,tmp2,tmp3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
106 float val; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
107 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
108 out0 = in[0]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
109 out1 = in[1]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
110 out2 = in[2]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
111 out3 = in[3]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
112 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
113 out0 -= filter_coeffs[2] * old_out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
114 out1 -= filter_coeffs[2] * old_out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
115 out2 -= filter_coeffs[2] * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
116 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
117 out0 -= filter_coeffs[1] * old_out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
118 out1 -= filter_coeffs[1] * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
119 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
120 out0 -= filter_coeffs[0] * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
121 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
122 val = filter_coeffs[3]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
123 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
124 out0 -= val * old_out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
125 out1 -= val * old_out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
126 out2 -= val * old_out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
127 out3 -= val * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
128 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
129 old_out3 = out[-5]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
130 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
131 for (i = 5; i <= filter_length; i += 2) { |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
132 val = filter_coeffs[i-1]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
133 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
134 out0 -= val * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
135 out1 -= val * old_out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
136 out2 -= val * old_out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
137 out3 -= val * old_out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
138 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
139 old_out2 = out[-i-1]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
140 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
141 val = filter_coeffs[i]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
142 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
143 out0 -= val * old_out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
144 out1 -= val * old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
145 out2 -= val * old_out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
146 out3 -= val * old_out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
147 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
148 FFSWAP(float, old_out0, old_out2); |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
149 old_out1 = old_out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
150 old_out3 = out[-i-2]; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
151 } |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
152 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
153 tmp0 = out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
154 tmp1 = out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
155 tmp2 = out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
156 tmp3 = out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
157 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
158 out3 -= a * tmp2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
159 out2 -= a * tmp1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
160 out1 -= a * tmp0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
161 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
162 out3 -= b * tmp1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
163 out2 -= b * tmp0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
164 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
165 out3 -= c * tmp0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
166 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
167 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
168 out[0] = out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
169 out[1] = out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
170 out[2] = out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
171 out[3] = out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
172 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
173 old_out0 = out0; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
174 old_out1 = out1; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
175 old_out2 = out2; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
176 old_out3 = out3; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
177 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
178 out += 4; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
179 in += 4; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
180 } |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
181 |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
182 out -= n; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
183 in -= n; |
d124d9b688d0
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
vitor
parents:
10503
diff
changeset
|
184 for (; n < buffer_length; n++) { |
8091 | 185 out[n] = in[n]; |
10503
8d536f190e6e
And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents:
10045
diff
changeset
|
186 for (i = 1; i <= filter_length; i++) |
9446
1a3865d1b049
Fix possibly harmful outbound addressing. Patch by Kenan Gillet.
reynaldo
parents:
9017
diff
changeset
|
187 out[n] -= filter_coeffs[i-1] * out[n-i]; |
8091 | 188 } |
189 } | |
9509
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
190 |
10690
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
191 void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, |
63451af5f8f9
Cosmetics: make celp_filters.* formatting more consistent with the rest of
vitor
parents:
10689
diff
changeset
|
192 const float *in, int buffer_length, |
10003 | 193 int filter_length) |
9509
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
194 { |
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
195 int i,n; |
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
196 |
10003 | 197 for (n = 0; n < buffer_length; n++) { |
9509
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
198 out[n] = in[n]; |
10503
8d536f190e6e
And on the sixth day, God gave us the <= operand, which makes the loop
rbultje
parents:
10045
diff
changeset
|
199 for (i = 1; i <= filter_length; i++) |
10013
454cb6aa43a3
Correct the sign of the arithmetic in ff_celp_lp_zero_synthesis_filterf()
superdump
parents:
10003
diff
changeset
|
200 out[n] += filter_coeffs[i-1] * in[n-i]; |
9509
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
201 } |
2838045383c5
Add LP zero synthesis filter. Patch by Kenan Gillet.
reynaldo
parents:
9447
diff
changeset
|
202 } |