comparison bitstream.c @ 11547:aba20ba60384 libavcodec

optimize init_vlc(). Reduce worst case time from O(N^2) to O(N*log(N)). Speedup average case by a factor of 10 in ffv2 (total decoding speed +4-25%), factor of 1.3 in ffvhuff (total +0.5%), factor of 1.8 in indeo5 (total +1%), factor of 1.1 in mjpeg (total +0.1%).
author lorenm
date Mon, 29 Mar 2010 02:50:23 +0000
parents 2d49996fe7d1
children e023a196a73e
comparison
equal deleted inserted replaced
11546:1d81cd330928 11547:aba20ba60384
1 /* 1 /*
2 * Common bit i/o utils 2 * Common bit i/o utils
3 * Copyright (c) 2000, 2001 Fabrice Bellard 3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 * Copyright (c) 2010 Loren Merritt
5 * 6 *
6 * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at> 7 * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
7 * 8 *
8 * This file is part of FFmpeg. 9 * This file is part of FFmpeg.
9 * 10 *
114 return -1; 115 return -1;
115 } 116 }
116 return index; 117 return index;
117 } 118 }
118 119
119 static int build_table(VLC *vlc, int table_nb_bits, 120 static av_always_inline uint32_t bitswap_32(uint32_t x) {
120 int nb_codes, 121 return av_reverse[x&0xFF]<<24
121 const void *bits, int bits_wrap, int bits_size, 122 | av_reverse[(x>>8)&0xFF]<<16
122 const void *codes, int codes_wrap, int codes_size, 123 | av_reverse[(x>>16)&0xFF]<<8
123 const void *symbols, int symbols_wrap, int symbols_size, 124 | av_reverse[x>>24];
124 uint32_t code_prefix, int n_prefix, int flags) 125 }
125 { 126
126 int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2, symbol; 127 typedef struct {
128 uint8_t bits;
129 uint16_t symbol;
130 /** codeword, with the first bit-to-be-read in the msb
131 * (even if intended for a little-endian bitstream reader) */
132 uint32_t code;
133 } VLCcode;
134
135 static int compare_vlcspec(const void *a, const void *b)
136 {
137 const VLCcode *sa=a, *sb=b;
138 return (sa->code >> 1) - (sb->code >> 1);
139 }
140
141 /**
142 * Build VLC decoding tables suitable for use with get_vlc().
143 *
144 * @param vlc the context to be initted
145 *
146 * @param table_nb_bits max length of vlc codes to store directly in this table
147 * (Longer codes are delegated to subtables.)
148 *
149 * @param nb_codes number of elements in codes[]
150 *
151 * @param codes descriptions of the vlc codes
152 * These must be ordered such that codes going into the same subtable are contiguous.
153 * Sorting by VLCcode.code is sufficient, though not necessary.
154 */
155 static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
156 VLCcode *codes, int flags)
157 {
158 int table_size, table_index, index, code_prefix, symbol, subtable_bits;
159 int i, j, k, n, nb, inc;
127 uint32_t code; 160 uint32_t code;
128 VLC_TYPE (*table)[2]; 161 VLC_TYPE (*table)[2];
129 162
130 table_size = 1 << table_nb_bits; 163 table_size = 1 << table_nb_bits;
131 table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC); 164 table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
132 #ifdef DEBUG_VLC 165 #ifdef DEBUG_VLC
133 av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d code_prefix=%x n=%d\n", 166 av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n",
134 table_index, table_size, code_prefix, n_prefix); 167 table_index, table_size);
135 #endif 168 #endif
136 if (table_index < 0) 169 if (table_index < 0)
137 return -1; 170 return -1;
138 table = &vlc->table[table_index]; 171 table = &vlc->table[table_index];
139 172
142 table[i][0] = -1; //codes 175 table[i][0] = -1; //codes
143 } 176 }
144 177
145 /* first pass: map codes and compute auxillary table sizes */ 178 /* first pass: map codes and compute auxillary table sizes */
146 for(i=0;i<nb_codes;i++) { 179 for(i=0;i<nb_codes;i++) {
147 GET_DATA(n, bits, i, bits_wrap, bits_size); 180 n = codes[i].bits;
148 GET_DATA(code, codes, i, codes_wrap, codes_size); 181 code = codes[i].code;
149 /* we accept tables with holes */ 182 symbol = codes[i].symbol;
150 if (n <= 0)
151 continue;
152 if (!symbols)
153 symbol = i;
154 else
155 GET_DATA(symbol, symbols, i, symbols_wrap, symbols_size);
156 #if defined(DEBUG_VLC) && 0 183 #if defined(DEBUG_VLC) && 0
157 av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code); 184 av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code);
158 #endif 185 #endif
159 /* if code matches the prefix, it is in the table */
160 n -= n_prefix;
161 if (n > 0) {
162 if(flags & INIT_VLC_LE)
163 code_prefix2= code & (n_prefix>=32 ? 0xffffffff : (1 << n_prefix)-1);
164 else
165 code_prefix2= code >> n;
166 if (code_prefix2 == code_prefix) {
167 if (n <= table_nb_bits) { 186 if (n <= table_nb_bits) {
168 /* no need to add another table */ 187 /* no need to add another table */
169 j = (code << (table_nb_bits - n)) & (table_size - 1); 188 j = code >> (32 - table_nb_bits);
170 nb = 1 << (table_nb_bits - n); 189 nb = 1 << (table_nb_bits - n);
190 inc = 1;
191 if (flags & INIT_VLC_LE) {
192 j = bitswap_32(code);
193 inc = 1 << n;
194 }
171 for(k=0;k<nb;k++) { 195 for(k=0;k<nb;k++) {
172 if(flags & INIT_VLC_LE)
173 j = (code >> n_prefix) + (k<<n);
174 #ifdef DEBUG_VLC 196 #ifdef DEBUG_VLC
175 av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n", 197 av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n",
176 j, i, n); 198 j, i, n);
177 #endif 199 #endif
178 if (table[j][1] /*bits*/ != 0) { 200 if (table[j][1] /*bits*/ != 0) {
179 av_log(NULL, AV_LOG_ERROR, "incorrect codes\n"); 201 av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
180 return -1; 202 return -1;
181 } 203 }
182 table[j][1] = n; //bits 204 table[j][1] = n; //bits
183 table[j][0] = symbol; 205 table[j][0] = symbol;
184 j++; 206 j += inc;
185 } 207 }
186 } else { 208 } else {
209 /* fill auxiliary table recursively */
187 n -= table_nb_bits; 210 n -= table_nb_bits;
188 j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1); 211 code_prefix = code >> (32 - table_nb_bits);
212 subtable_bits = n;
213 codes[i].bits = n;
214 codes[i].code = code << table_nb_bits;
215 for (k = i+1; k < nb_codes; k++) {
216 n = codes[k].bits - table_nb_bits;
217 if (n <= 0)
218 break;
219 code = codes[k].code;
220 if (code >> (32 - table_nb_bits) != code_prefix)
221 break;
222 codes[k].bits = n;
223 codes[k].code = code << table_nb_bits;
224 subtable_bits = FFMAX(subtable_bits, n);
225 }
226 subtable_bits = FFMIN(subtable_bits, table_nb_bits);
227 j = (flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix;
228 table[j][1] = -subtable_bits;
189 #ifdef DEBUG_VLC 229 #ifdef DEBUG_VLC
190 av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n", 230 av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n",
191 j, n); 231 j, codes[i].bits + table_nb_bits);
192 #endif 232 #endif
193 /* compute table size */ 233 index = build_table(vlc, subtable_bits, k-i, codes+i, flags);
194 n1 = -table[j][1]; //bits 234 if (index < 0)
195 if (n > n1) 235 return -1;
196 n1 = n; 236 /* note: realloc has been done, so reload tables */
197 table[j][1] = -n1; //bits 237 table = &vlc->table[table_index];
238 table[j][0] = index; //code
239 i = k-1;
198 } 240 }
199 }
200 }
201 }
202
203 /* second pass : fill auxillary tables recursively */
204 for(i=0;i<table_size;i++) {
205 n = table[i][1]; //bits
206 if (n < 0) {
207 n = -n;
208 if (n > table_nb_bits) {
209 n = table_nb_bits;
210 table[i][1] = -n; //bits
211 }
212 index = build_table(vlc, n, nb_codes,
213 bits, bits_wrap, bits_size,
214 codes, codes_wrap, codes_size,
215 symbols, symbols_wrap, symbols_size,
216 (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i),
217 n_prefix + table_nb_bits, flags);
218 if (index < 0)
219 return -1;
220 /* note: realloc has been done, so reload tables */
221 table = &vlc->table[table_index];
222 table[i][0] = index; //code
223 }
224 } 241 }
225 return table_index; 242 return table_index;
226 } 243 }
227 244
228 245
256 const void *bits, int bits_wrap, int bits_size, 273 const void *bits, int bits_wrap, int bits_size,
257 const void *codes, int codes_wrap, int codes_size, 274 const void *codes, int codes_wrap, int codes_size,
258 const void *symbols, int symbols_wrap, int symbols_size, 275 const void *symbols, int symbols_wrap, int symbols_size,
259 int flags) 276 int flags)
260 { 277 {
278 VLCcode buf[nb_codes];
279 int i, j;
280
261 vlc->bits = nb_bits; 281 vlc->bits = nb_bits;
262 if(flags & INIT_VLC_USE_NEW_STATIC){ 282 if(flags & INIT_VLC_USE_NEW_STATIC){
263 if(vlc->table_size && vlc->table_size == vlc->table_allocated){ 283 if(vlc->table_size && vlc->table_size == vlc->table_allocated){
264 return 0; 284 return 0;
265 }else if(vlc->table_size){ 285 }else if(vlc->table_size){
273 293
274 #ifdef DEBUG_VLC 294 #ifdef DEBUG_VLC
275 av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes); 295 av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes);
276 #endif 296 #endif
277 297
278 if (build_table(vlc, nb_bits, nb_codes, 298 assert(symbols_size <= 2 || !symbols);
279 bits, bits_wrap, bits_size, 299 j = 0;
280 codes, codes_wrap, codes_size, 300 #define COPY(condition)\
281 symbols, symbols_wrap, symbols_size, 301 for (i = 0; i < nb_codes; i++) {\
282 0, 0, flags) < 0) { 302 GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);\
303 if (!(condition))\
304 continue;\
305 GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);\
306 if (flags & INIT_VLC_LE)\
307 buf[j].code = bitswap_32(buf[j].code);\
308 else\
309 buf[j].code <<= 32 - buf[j].bits;\
310 if (symbols)\
311 GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size)\
312 else\
313 buf[j].symbol = i;\
314 j++;\
315 }
316 COPY(buf[j].bits > nb_bits);
317 // qsort is the slowest part of init_vlc, and could probably be improved or avoided
318 qsort(buf, j, sizeof(VLCcode), compare_vlcspec);
319 COPY(buf[j].bits && buf[j].bits <= nb_bits);
320 nb_codes = j;
321
322 if (build_table(vlc, nb_bits, nb_codes, buf, flags) < 0) {
283 av_freep(&vlc->table); 323 av_freep(&vlc->table);
284 return -1; 324 return -1;
285 } 325 }
286 if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated) 326 if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated)
287 av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated); 327 av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated);