Mercurial > libavcodec.hg
annotate vorbis.c @ 12197:fbf4d5b1b664 libavcodec
Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than
regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag,
FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that
have been checked specifically on such CPUs and are actually faster than
their MMX counterparts.
In addition, use this flag to enable particular VP8 and LPC SSE2 functions
that are faster than their MMX counterparts.
Based on a patch by Loren Merritt <lorenm AT u washington edu>.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 22:38:23 +0000 |
parents | 7dd2a45249a9 |
children |
rev | line source |
---|---|
4971 | 1 /** |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
10316
diff
changeset
|
2 * @file |
4971 | 3 * Common code for Vorbis I encoder and decoder |
4 * @author Denes Balatoni ( dbalatoni programozo hu ) | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
5 * |
4971 | 6 * This file is part of FFmpeg. |
7 * | |
8 * FFmpeg is free software; you can redistribute it and/or | |
9 * modify it under the terms of the GNU Lesser General Public | |
10 * License as published by the Free Software Foundation; either | |
11 * version 2.1 of the License, or (at your option) any later version. | |
12 * | |
13 * FFmpeg is distributed in the hope that it will be useful, | |
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 * Lesser General Public License for more details. | |
17 * | |
18 * You should have received a copy of the GNU Lesser General Public | |
19 * License along with FFmpeg; if not, write to the Free Software | |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 */ | |
22 | |
23 #undef V_DEBUG | |
24 //#define V_DEBUG | |
25 | |
26 #define ALT_BITSTREAM_READER_LE | |
27 #include "avcodec.h" | |
9428 | 28 #include "get_bits.h" |
4971 | 29 |
30 #include "vorbis.h" | |
31 | |
32 | |
33 /* Helper functions */ | |
34 | |
10309 | 35 // x^(1/n) |
36 unsigned int ff_vorbis_nth_root(unsigned int x, unsigned int n) | |
37 { | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
38 unsigned int ret = 0, i, j; |
4971 | 39 |
40 do { | |
41 ++ret; | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
42 for (i = 0, j = ret; i < n - 1; i++) |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
43 j *= ret; |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
44 } while (j <= x); |
4971 | 45 |
6750 | 46 return ret - 1; |
4971 | 47 } |
48 | |
49 // Generate vlc codes from vorbis huffman code lengths | |
50 | |
9934 | 51 // the two bits[p] > 32 checks should be redundant, all calling code should |
52 // already ensure that, but since it allows overwriting the stack it seems | |
53 // reasonable to check redundantly. | |
10309 | 54 int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num) |
55 { | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
56 uint_fast32_t exit_at_level[33] = { |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
57 404, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; |
4971 | 59 |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
60 uint_fast8_t i, j; |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
61 uint_fast32_t code, p; |
4971 | 62 |
63 #ifdef V_DEBUG | |
64 GetBitContext gb; | |
65 #endif | |
66 | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
67 for (p = 0; (bits[p] == 0) && (p < num); ++p) |
10310 | 68 ; |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
69 if (p == num) { |
4971 | 70 // av_log(vc->avccontext, AV_LOG_INFO, "An empty codebook. Heh?! \n"); |
71 return 0; | |
72 } | |
73 | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
74 codes[p] = 0; |
10310 | 75 if (bits[p] > 32) |
76 return 1; | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
77 for (i = 0; i < bits[p]; ++i) |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
78 exit_at_level[i+1] = 1 << i; |
4971 | 79 |
80 #ifdef V_DEBUG | |
81 av_log(NULL, AV_LOG_INFO, " %d. of %d code len %d code %d - ", p, num, bits[p], codes[p]); | |
82 init_get_bits(&gb, (uint_fast8_t *)&codes[p], bits[p]); | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
83 for (i = 0; i < bits[p]; ++i) |
4971 | 84 av_log(NULL, AV_LOG_INFO, "%s", get_bits1(&gb) ? "1" : "0"); |
85 av_log(NULL, AV_LOG_INFO, "\n"); | |
86 #endif | |
87 | |
88 ++p; | |
89 | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
90 for (; p < num; ++p) { |
10310 | 91 if (bits[p] > 32) |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
92 return 1; |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
93 if (bits[p] == 0) |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
94 continue; |
4971 | 95 // find corresponding exit(node which the tree can grow further from) |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
96 for (i = bits[p]; i > 0; --i) |
10310 | 97 if (exit_at_level[i]) |
98 break; | |
99 if (!i) // overspecified tree | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
100 return 1; |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
101 code = exit_at_level[i]; |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
102 exit_at_level[i] = 0; |
4971 | 103 // construct code (append 0s to end) and introduce new exits |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
104 for (j = i + 1 ;j <= bits[p]; ++j) |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
105 exit_at_level[j] = code + (1 << (j - 1)); |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
106 codes[p] = code; |
4971 | 107 |
108 #ifdef V_DEBUG | |
109 av_log(NULL, AV_LOG_INFO, " %d. code len %d code %d - ", p, bits[p], codes[p]); | |
110 init_get_bits(&gb, (uint_fast8_t *)&codes[p], bits[p]); | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
111 for (i = 0; i < bits[p]; ++i) |
4971 | 112 av_log(NULL, AV_LOG_INFO, "%s", get_bits1(&gb) ? "1" : "0"); |
113 av_log(NULL, AV_LOG_INFO, "\n"); | |
114 #endif | |
115 | |
116 } | |
117 | |
118 //no exits should be left (underspecified tree - ie. unused valid vlcs - not allowed by SPEC) | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
119 for (p = 1; p < 33; p++) |
10310 | 120 if (exit_at_level[p]) |
121 return 1; | |
4971 | 122 |
123 return 0; | |
124 } | |
125 | |
10309 | 126 void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values) |
127 { | |
4971 | 128 int i; |
129 list[0].sort = 0; | |
130 list[1].sort = 1; | |
131 for (i = 2; i < values; i++) { | |
132 int j; | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
133 list[i].low = 0; |
4971 | 134 list[i].high = 1; |
135 list[i].sort = i; | |
136 for (j = 2; j < i; j++) { | |
137 int tmp = list[j].x; | |
138 if (tmp < list[i].x) { | |
10310 | 139 if (tmp > list[list[i].low].x) |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
140 list[i].low = j; |
4971 | 141 } else { |
10310 | 142 if (tmp < list[list[i].high].x) |
143 list[i].high = j; | |
4971 | 144 } |
145 } | |
146 } | |
147 for (i = 0; i < values - 1; i++) { | |
148 int j; | |
149 for (j = i + 1; j < values; j++) { | |
150 if (list[list[i].sort].x > list[list[j].sort].x) { | |
151 int tmp = list[i].sort; | |
152 list[i].sort = list[j].sort; | |
153 list[j].sort = tmp; | |
154 } | |
155 } | |
156 } | |
157 } | |
158 | |
10309 | 159 static inline void render_line_unrolled(intptr_t x, intptr_t y, int x1, |
160 intptr_t sy, int ady, int adx, | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
161 float *buf) |
10309 | 162 { |
10102
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
163 int err = -adx; |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
164 x -= x1 - 1; |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
165 buf += x1 - 1; |
10102
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
166 while (++x < 0) { |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
167 err += ady; |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
168 if (err >= 0) { |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
169 err += ady - adx; |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
170 y += sy; |
10102
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
171 buf[x++] = ff_vorbis_floor1_inverse_db_table[y]; |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
172 } |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
173 buf[x] = ff_vorbis_floor1_inverse_db_table[y]; |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
174 } |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
175 if (x <= 0) { |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
176 if (err + ady >= 0) |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
177 y += sy; |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
178 buf[x] = ff_vorbis_floor1_inverse_db_table[y]; |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
179 } |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
180 } |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
181 |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
182 static void render_line(int x0, int y0, int x1, int y1, float *buf) |
10309 | 183 { |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
184 int dy = y1 - y0; |
4971 | 185 int adx = x1 - x0; |
10102
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
186 int ady = FFABS(dy); |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
187 int sy = dy < 0 ? -1 : 1; |
10102
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
188 buf[x0] = ff_vorbis_floor1_inverse_db_table[y0]; |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
189 if (ady*2 <= adx) { // optimized common case |
10102
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
190 render_line_unrolled(x0, y0, x1, sy, ady, adx, buf); |
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
191 } else { |
10103 | 192 int base = dy / adx; |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
193 int x = x0; |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
194 int y = y0; |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
195 int err = -adx; |
10103 | 196 ady -= FFABS(base) * adx; |
197 while (++x < x1) { | |
198 y += base; | |
199 err += ady; | |
200 if (err >= 0) { | |
201 err -= adx; | |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
202 y += sy; |
10103 | 203 } |
204 buf[x] = ff_vorbis_floor1_inverse_db_table[y]; | |
4971 | 205 } |
10102
76eeb9e3599b
1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2.
lorenm
parents:
9934
diff
changeset
|
206 } |
4971 | 207 } |
208 | |
10309 | 209 void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values, |
10316
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
210 uint_fast16_t *y_list, int *flag, |
7bfcc66f807d
whitespace cosmetics, prettyprinting, K&R coding style
diego
parents:
10310
diff
changeset
|
211 int multiplier, float *out, int samples) |
10309 | 212 { |
4971 | 213 int lx, ly, i; |
214 lx = 0; | |
215 ly = y_list[0] * multiplier; | |
216 for (i = 1; i < values; i++) { | |
217 int pos = list[i].sort; | |
218 if (flag[pos]) { | |
6340
575e0a847f0c
30% faster ff_vorbis_floor1_render_list, 3% faster overall
lorenm
parents:
5215
diff
changeset
|
219 int x1 = list[pos].x; |
575e0a847f0c
30% faster ff_vorbis_floor1_render_list, 3% faster overall
lorenm
parents:
5215
diff
changeset
|
220 int y1 = y_list[pos] * multiplier; |
575e0a847f0c
30% faster ff_vorbis_floor1_render_list, 3% faster overall
lorenm
parents:
5215
diff
changeset
|
221 if (lx < samples) |
575e0a847f0c
30% faster ff_vorbis_floor1_render_list, 3% faster overall
lorenm
parents:
5215
diff
changeset
|
222 render_line(lx, ly, FFMIN(x1,samples), y1, out); |
575e0a847f0c
30% faster ff_vorbis_floor1_render_list, 3% faster overall
lorenm
parents:
5215
diff
changeset
|
223 lx = x1; |
575e0a847f0c
30% faster ff_vorbis_floor1_render_list, 3% faster overall
lorenm
parents:
5215
diff
changeset
|
224 ly = y1; |
4971 | 225 } |
10310 | 226 if (lx >= samples) |
227 break; | |
4971 | 228 } |
10310 | 229 if (lx < samples) |
230 render_line(lx, ly, samples, ly, out); | |
4971 | 231 } |