Mercurial > libavcodec.hg
comparison bitstream.c @ 11547:aba20ba60384 libavcodec
optimize init_vlc().
Reduce worst case time from O(N^2) to O(N*log(N)).
Speedup average case by a factor of 10 in ffv2 (total decoding speed +4-25%),
factor of 1.3 in ffvhuff (total +0.5%),
factor of 1.8 in indeo5 (total +1%),
factor of 1.1 in mjpeg (total +0.1%).
author | lorenm |
---|---|
date | Mon, 29 Mar 2010 02:50:23 +0000 |
parents | 2d49996fe7d1 |
children | e023a196a73e |
comparison
equal
deleted
inserted
replaced
11546:1d81cd330928 | 11547:aba20ba60384 |
---|---|
1 /* | 1 /* |
2 * Common bit i/o utils | 2 * Common bit i/o utils |
3 * Copyright (c) 2000, 2001 Fabrice Bellard | 3 * Copyright (c) 2000, 2001 Fabrice Bellard |
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
5 * Copyright (c) 2010 Loren Merritt | |
5 * | 6 * |
6 * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at> | 7 * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at> |
7 * | 8 * |
8 * This file is part of FFmpeg. | 9 * This file is part of FFmpeg. |
9 * | 10 * |
114 return -1; | 115 return -1; |
115 } | 116 } |
116 return index; | 117 return index; |
117 } | 118 } |
118 | 119 |
119 static int build_table(VLC *vlc, int table_nb_bits, | 120 static av_always_inline uint32_t bitswap_32(uint32_t x) { |
120 int nb_codes, | 121 return av_reverse[x&0xFF]<<24 |
121 const void *bits, int bits_wrap, int bits_size, | 122 | av_reverse[(x>>8)&0xFF]<<16 |
122 const void *codes, int codes_wrap, int codes_size, | 123 | av_reverse[(x>>16)&0xFF]<<8 |
123 const void *symbols, int symbols_wrap, int symbols_size, | 124 | av_reverse[x>>24]; |
124 uint32_t code_prefix, int n_prefix, int flags) | 125 } |
125 { | 126 |
126 int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2, symbol; | 127 typedef struct { |
128 uint8_t bits; | |
129 uint16_t symbol; | |
130 /** codeword, with the first bit-to-be-read in the msb | |
131 * (even if intended for a little-endian bitstream reader) */ | |
132 uint32_t code; | |
133 } VLCcode; | |
134 | |
135 static int compare_vlcspec(const void *a, const void *b) | |
136 { | |
137 const VLCcode *sa=a, *sb=b; | |
138 return (sa->code >> 1) - (sb->code >> 1); | |
139 } | |
140 | |
141 /** | |
142 * Build VLC decoding tables suitable for use with get_vlc(). | |
143 * | |
144 * @param vlc the context to be initted | |
145 * | |
146 * @param table_nb_bits max length of vlc codes to store directly in this table | |
147 * (Longer codes are delegated to subtables.) | |
148 * | |
149 * @param nb_codes number of elements in codes[] | |
150 * | |
151 * @param codes descriptions of the vlc codes | |
152 * These must be ordered such that codes going into the same subtable are contiguous. | |
153 * Sorting by VLCcode.code is sufficient, though not necessary. | |
154 */ | |
155 static int build_table(VLC *vlc, int table_nb_bits, int nb_codes, | |
156 VLCcode *codes, int flags) | |
157 { | |
158 int table_size, table_index, index, code_prefix, symbol, subtable_bits; | |
159 int i, j, k, n, nb, inc; | |
127 uint32_t code; | 160 uint32_t code; |
128 VLC_TYPE (*table)[2]; | 161 VLC_TYPE (*table)[2]; |
129 | 162 |
130 table_size = 1 << table_nb_bits; | 163 table_size = 1 << table_nb_bits; |
131 table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC); | 164 table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC); |
132 #ifdef DEBUG_VLC | 165 #ifdef DEBUG_VLC |
133 av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d code_prefix=%x n=%d\n", | 166 av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n", |
134 table_index, table_size, code_prefix, n_prefix); | 167 table_index, table_size); |
135 #endif | 168 #endif |
136 if (table_index < 0) | 169 if (table_index < 0) |
137 return -1; | 170 return -1; |
138 table = &vlc->table[table_index]; | 171 table = &vlc->table[table_index]; |
139 | 172 |
142 table[i][0] = -1; //codes | 175 table[i][0] = -1; //codes |
143 } | 176 } |
144 | 177 |
145 /* first pass: map codes and compute auxillary table sizes */ | 178 /* first pass: map codes and compute auxillary table sizes */ |
146 for(i=0;i<nb_codes;i++) { | 179 for(i=0;i<nb_codes;i++) { |
147 GET_DATA(n, bits, i, bits_wrap, bits_size); | 180 n = codes[i].bits; |
148 GET_DATA(code, codes, i, codes_wrap, codes_size); | 181 code = codes[i].code; |
149 /* we accept tables with holes */ | 182 symbol = codes[i].symbol; |
150 if (n <= 0) | |
151 continue; | |
152 if (!symbols) | |
153 symbol = i; | |
154 else | |
155 GET_DATA(symbol, symbols, i, symbols_wrap, symbols_size); | |
156 #if defined(DEBUG_VLC) && 0 | 183 #if defined(DEBUG_VLC) && 0 |
157 av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code); | 184 av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code); |
158 #endif | 185 #endif |
159 /* if code matches the prefix, it is in the table */ | |
160 n -= n_prefix; | |
161 if (n > 0) { | |
162 if(flags & INIT_VLC_LE) | |
163 code_prefix2= code & (n_prefix>=32 ? 0xffffffff : (1 << n_prefix)-1); | |
164 else | |
165 code_prefix2= code >> n; | |
166 if (code_prefix2 == code_prefix) { | |
167 if (n <= table_nb_bits) { | 186 if (n <= table_nb_bits) { |
168 /* no need to add another table */ | 187 /* no need to add another table */ |
169 j = (code << (table_nb_bits - n)) & (table_size - 1); | 188 j = code >> (32 - table_nb_bits); |
170 nb = 1 << (table_nb_bits - n); | 189 nb = 1 << (table_nb_bits - n); |
190 inc = 1; | |
191 if (flags & INIT_VLC_LE) { | |
192 j = bitswap_32(code); | |
193 inc = 1 << n; | |
194 } | |
171 for(k=0;k<nb;k++) { | 195 for(k=0;k<nb;k++) { |
172 if(flags & INIT_VLC_LE) | |
173 j = (code >> n_prefix) + (k<<n); | |
174 #ifdef DEBUG_VLC | 196 #ifdef DEBUG_VLC |
175 av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n", | 197 av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n", |
176 j, i, n); | 198 j, i, n); |
177 #endif | 199 #endif |
178 if (table[j][1] /*bits*/ != 0) { | 200 if (table[j][1] /*bits*/ != 0) { |
179 av_log(NULL, AV_LOG_ERROR, "incorrect codes\n"); | 201 av_log(NULL, AV_LOG_ERROR, "incorrect codes\n"); |
180 return -1; | 202 return -1; |
181 } | 203 } |
182 table[j][1] = n; //bits | 204 table[j][1] = n; //bits |
183 table[j][0] = symbol; | 205 table[j][0] = symbol; |
184 j++; | 206 j += inc; |
185 } | 207 } |
186 } else { | 208 } else { |
209 /* fill auxiliary table recursively */ | |
187 n -= table_nb_bits; | 210 n -= table_nb_bits; |
188 j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1); | 211 code_prefix = code >> (32 - table_nb_bits); |
212 subtable_bits = n; | |
213 codes[i].bits = n; | |
214 codes[i].code = code << table_nb_bits; | |
215 for (k = i+1; k < nb_codes; k++) { | |
216 n = codes[k].bits - table_nb_bits; | |
217 if (n <= 0) | |
218 break; | |
219 code = codes[k].code; | |
220 if (code >> (32 - table_nb_bits) != code_prefix) | |
221 break; | |
222 codes[k].bits = n; | |
223 codes[k].code = code << table_nb_bits; | |
224 subtable_bits = FFMAX(subtable_bits, n); | |
225 } | |
226 subtable_bits = FFMIN(subtable_bits, table_nb_bits); | |
227 j = (flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix; | |
228 table[j][1] = -subtable_bits; | |
189 #ifdef DEBUG_VLC | 229 #ifdef DEBUG_VLC |
190 av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n", | 230 av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n", |
191 j, n); | 231 j, codes[i].bits + table_nb_bits); |
192 #endif | 232 #endif |
193 /* compute table size */ | 233 index = build_table(vlc, subtable_bits, k-i, codes+i, flags); |
194 n1 = -table[j][1]; //bits | 234 if (index < 0) |
195 if (n > n1) | 235 return -1; |
196 n1 = n; | 236 /* note: realloc has been done, so reload tables */ |
197 table[j][1] = -n1; //bits | 237 table = &vlc->table[table_index]; |
238 table[j][0] = index; //code | |
239 i = k-1; | |
198 } | 240 } |
199 } | |
200 } | |
201 } | |
202 | |
203 /* second pass : fill auxillary tables recursively */ | |
204 for(i=0;i<table_size;i++) { | |
205 n = table[i][1]; //bits | |
206 if (n < 0) { | |
207 n = -n; | |
208 if (n > table_nb_bits) { | |
209 n = table_nb_bits; | |
210 table[i][1] = -n; //bits | |
211 } | |
212 index = build_table(vlc, n, nb_codes, | |
213 bits, bits_wrap, bits_size, | |
214 codes, codes_wrap, codes_size, | |
215 symbols, symbols_wrap, symbols_size, | |
216 (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i), | |
217 n_prefix + table_nb_bits, flags); | |
218 if (index < 0) | |
219 return -1; | |
220 /* note: realloc has been done, so reload tables */ | |
221 table = &vlc->table[table_index]; | |
222 table[i][0] = index; //code | |
223 } | |
224 } | 241 } |
225 return table_index; | 242 return table_index; |
226 } | 243 } |
227 | 244 |
228 | 245 |
256 const void *bits, int bits_wrap, int bits_size, | 273 const void *bits, int bits_wrap, int bits_size, |
257 const void *codes, int codes_wrap, int codes_size, | 274 const void *codes, int codes_wrap, int codes_size, |
258 const void *symbols, int symbols_wrap, int symbols_size, | 275 const void *symbols, int symbols_wrap, int symbols_size, |
259 int flags) | 276 int flags) |
260 { | 277 { |
278 VLCcode buf[nb_codes]; | |
279 int i, j; | |
280 | |
261 vlc->bits = nb_bits; | 281 vlc->bits = nb_bits; |
262 if(flags & INIT_VLC_USE_NEW_STATIC){ | 282 if(flags & INIT_VLC_USE_NEW_STATIC){ |
263 if(vlc->table_size && vlc->table_size == vlc->table_allocated){ | 283 if(vlc->table_size && vlc->table_size == vlc->table_allocated){ |
264 return 0; | 284 return 0; |
265 }else if(vlc->table_size){ | 285 }else if(vlc->table_size){ |
273 | 293 |
274 #ifdef DEBUG_VLC | 294 #ifdef DEBUG_VLC |
275 av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes); | 295 av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes); |
276 #endif | 296 #endif |
277 | 297 |
278 if (build_table(vlc, nb_bits, nb_codes, | 298 assert(symbols_size <= 2 || !symbols); |
279 bits, bits_wrap, bits_size, | 299 j = 0; |
280 codes, codes_wrap, codes_size, | 300 #define COPY(condition)\ |
281 symbols, symbols_wrap, symbols_size, | 301 for (i = 0; i < nb_codes; i++) {\ |
282 0, 0, flags) < 0) { | 302 GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);\ |
303 if (!(condition))\ | |
304 continue;\ | |
305 GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);\ | |
306 if (flags & INIT_VLC_LE)\ | |
307 buf[j].code = bitswap_32(buf[j].code);\ | |
308 else\ | |
309 buf[j].code <<= 32 - buf[j].bits;\ | |
310 if (symbols)\ | |
311 GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size)\ | |
312 else\ | |
313 buf[j].symbol = i;\ | |
314 j++;\ | |
315 } | |
316 COPY(buf[j].bits > nb_bits); | |
317 // qsort is the slowest part of init_vlc, and could probably be improved or avoided | |
318 qsort(buf, j, sizeof(VLCcode), compare_vlcspec); | |
319 COPY(buf[j].bits && buf[j].bits <= nb_bits); | |
320 nb_codes = j; | |
321 | |
322 if (build_table(vlc, nb_bits, nb_codes, buf, flags) < 0) { | |
283 av_freep(&vlc->table); | 323 av_freep(&vlc->table); |
284 return -1; | 324 return -1; |
285 } | 325 } |
286 if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated) | 326 if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated) |
287 av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated); | 327 av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated); |