Mercurial > libavcodec.hg
comparison sonic.c @ 2182:113732773bda libavcodec
new opensource lossy/lossless audio codec based on speech compression techniques (actually based on bonk)
author | alex |
---|---|
date | Sat, 21 Aug 2004 19:19:35 +0000 |
parents | |
children | 6d40885b03ad |
comparison
equal
deleted
inserted
replaced
2181:3b84b5fea968 | 2182:113732773bda |
---|---|
1 /* | |
2 * Simple free lossless/lossy audio codec | |
3 * Copyright (c) 2004 Alex Beregszaszi | |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Lesser General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Lesser General Public | |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 */ | |
19 #include "avcodec.h" | |
20 #include "golomb.h" | |
21 | |
22 /** | |
23 * @file sonic.c | |
24 * Simple free lossless/lossy audio codec | |
25 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk) | |
26 * Written and designed by Alex Beregszaszi | |
27 * | |
28 * TODO: | |
29 * - CABAC put/get_symbol | |
30 * - independent quantizer for channels | |
31 * - >2 channels support | |
32 * - more decorrelation types | |
33 * - more tap_quant tests | |
34 * - selectable intlist writers/readers (bonk-style, golomb, cabac) | |
35 */ | |
36 | |
37 #define MAX_CHANNELS 2 | |
38 | |
39 typedef struct SonicContext { | |
40 int lossless, mid_side; | |
41 | |
42 int num_taps, downsampling; | |
43 double quantization; | |
44 | |
45 int channels, samplerate, block_align, frame_size; | |
46 | |
47 int *tap_quant; | |
48 int *int_samples; | |
49 int *coded_samples[MAX_CHANNELS]; | |
50 | |
51 // for encoding | |
52 int *tail; | |
53 int tail_size; | |
54 int *window; | |
55 int window_size; | |
56 | |
57 // for decoding | |
58 int *predictor_k; | |
59 int *predictor_state[MAX_CHANNELS]; | |
60 } SonicContext; | |
61 | |
62 #define LATTICE_SHIFT 10 | |
63 #define SAMPLE_SHIFT 4 | |
64 #define LATTICE_FACTOR (1 << LATTICE_SHIFT) | |
65 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT) | |
66 | |
67 #define BASE_QUANT 0.6 | |
68 #define RATE_VARIATION 3.0 | |
69 | |
70 static inline int divide(int a, int b) | |
71 { | |
72 if (a < 0) | |
73 return -( (-a + b/2)/b ); | |
74 else | |
75 return (a + b/2)/b; | |
76 } | |
77 | |
78 static inline int shift(int a,int b) | |
79 { | |
80 return (a+(1<<(b-1))) >> b; | |
81 } | |
82 | |
83 static inline int shift_down(int a,int b) | |
84 { | |
85 return (a>>b)+((a<0)?1:0); | |
86 } | |
87 | |
88 #if 1 | |
89 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) | |
90 { | |
91 int i; | |
92 | |
93 for (i = 0; i < entries; i++) | |
94 set_se_golomb(pb, buf[i]); | |
95 | |
96 return 1; | |
97 } | |
98 | |
99 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) | |
100 { | |
101 int i; | |
102 | |
103 for (i = 0; i < entries; i++) | |
104 buf[i] = get_se_golomb(gb); | |
105 | |
106 return 1; | |
107 } | |
108 | |
109 #else | |
110 | |
111 #define ADAPT_LEVEL 8 | |
112 | |
113 static int bits_to_store(uint64_t x) | |
114 { | |
115 int res = 0; | |
116 | |
117 while(x) | |
118 { | |
119 res++; | |
120 x >>= 1; | |
121 } | |
122 return res; | |
123 } | |
124 | |
125 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max) | |
126 { | |
127 int i, bits; | |
128 | |
129 if (!max) | |
130 return; | |
131 | |
132 bits = bits_to_store(max); | |
133 | |
134 for (i = 0; i < bits-1; i++) | |
135 put_bits(pb, 1, value & (1 << i)); | |
136 | |
137 if ( (value | (1 << (bits-1))) <= max) | |
138 put_bits(pb, 1, value & (1 << (bits-1))); | |
139 } | |
140 | |
141 static unsigned int read_uint_max(GetBitContext *gb, int max) | |
142 { | |
143 int i, bits, value = 0; | |
144 | |
145 if (!max) | |
146 return 0; | |
147 | |
148 bits = bits_to_store(max); | |
149 | |
150 for (i = 0; i < bits-1; i++) | |
151 if (get_bits1(gb)) | |
152 value += 1 << i; | |
153 | |
154 if ( (value | (1<<(bits-1))) <= max) | |
155 if (get_bits1(gb)) | |
156 value += 1 << (bits-1); | |
157 | |
158 return value; | |
159 } | |
160 | |
161 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) | |
162 { | |
163 int i, j, x = 0, low_bits = 0, max = 0; | |
164 int step = 256, pos = 0, dominant = 0, any = 0; | |
165 int *copy, *bits; | |
166 | |
167 copy = av_mallocz(4* entries); | |
168 if (!copy) | |
169 return -1; | |
170 | |
171 if (base_2_part) | |
172 { | |
173 int energy = 0; | |
174 | |
175 for (i = 0; i < entries; i++) | |
176 energy += abs(buf[i]); | |
177 | |
178 low_bits = bits_to_store(energy / (entries * 2)); | |
179 if (low_bits > 15) | |
180 low_bits = 15; | |
181 | |
182 put_bits(pb, 4, low_bits); | |
183 } | |
184 | |
185 for (i = 0; i < entries; i++) | |
186 { | |
187 put_bits(pb, low_bits, abs(buf[i])); | |
188 copy[i] = abs(buf[i]) >> low_bits; | |
189 if (copy[i] > max) | |
190 max = abs(copy[i]); | |
191 } | |
192 | |
193 bits = av_mallocz(4* entries*max); | |
194 if (!bits) | |
195 { | |
196 // av_free(copy); | |
197 return -1; | |
198 } | |
199 | |
200 for (i = 0; i <= max; i++) | |
201 { | |
202 for (j = 0; j < entries; j++) | |
203 if (copy[j] >= i) | |
204 bits[x++] = copy[j] > i; | |
205 } | |
206 | |
207 // store bitstream | |
208 while (pos < x) | |
209 { | |
210 int steplet = step >> 8; | |
211 | |
212 if (pos + steplet > x) | |
213 steplet = x - pos; | |
214 | |
215 for (i = 0; i < steplet; i++) | |
216 if (bits[i+pos] != dominant) | |
217 any = 1; | |
218 | |
219 put_bits(pb, 1, any); | |
220 | |
221 if (!any) | |
222 { | |
223 pos += steplet; | |
224 step += step / ADAPT_LEVEL; | |
225 } | |
226 else | |
227 { | |
228 int interloper = 0; | |
229 | |
230 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant)) | |
231 interloper++; | |
232 | |
233 // note change | |
234 write_uint_max(pb, interloper, (step >> 8) - 1); | |
235 | |
236 pos += interloper + 1; | |
237 step -= step / ADAPT_LEVEL; | |
238 } | |
239 | |
240 if (step < 256) | |
241 { | |
242 step = 65536 / step; | |
243 dominant = !dominant; | |
244 } | |
245 } | |
246 | |
247 // store signs | |
248 for (i = 0; i < entries; i++) | |
249 if (buf[i]) | |
250 put_bits(pb, 1, buf[i] < 0); | |
251 | |
252 // av_free(bits); | |
253 // av_free(copy); | |
254 | |
255 return 0; | |
256 } | |
257 | |
258 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) | |
259 { | |
260 int i, low_bits = 0, x = 0; | |
261 int n_zeros = 0, step = 256, dominant = 0; | |
262 int pos = 0, level = 0; | |
263 int *bits = av_mallocz(4* entries); | |
264 | |
265 if (!bits) | |
266 return -1; | |
267 | |
268 if (base_2_part) | |
269 { | |
270 low_bits = get_bits(gb, 4); | |
271 | |
272 if (low_bits) | |
273 for (i = 0; i < entries; i++) | |
274 buf[i] = get_bits(gb, low_bits); | |
275 } | |
276 | |
277 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits); | |
278 | |
279 while (n_zeros < entries) | |
280 { | |
281 int steplet = step >> 8; | |
282 | |
283 if (!get_bits1(gb)) | |
284 { | |
285 for (i = 0; i < steplet; i++) | |
286 bits[x++] = dominant; | |
287 | |
288 if (!dominant) | |
289 n_zeros += steplet; | |
290 | |
291 step += step / ADAPT_LEVEL; | |
292 } | |
293 else | |
294 { | |
295 int actual_run = read_uint_max(gb, steplet-1); | |
296 | |
297 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run); | |
298 | |
299 for (i = 0; i < actual_run; i++) | |
300 bits[x++] = dominant; | |
301 | |
302 bits[x++] = !dominant; | |
303 | |
304 if (!dominant) | |
305 n_zeros += actual_run; | |
306 else | |
307 n_zeros++; | |
308 | |
309 step -= step / ADAPT_LEVEL; | |
310 } | |
311 | |
312 if (step < 256) | |
313 { | |
314 step = 65536 / step; | |
315 dominant = !dominant; | |
316 } | |
317 } | |
318 | |
319 // reconstruct unsigned values | |
320 n_zeros = 0; | |
321 for (i = 0; n_zeros < entries; i++) | |
322 { | |
323 while(1) | |
324 { | |
325 if (pos >= entries) | |
326 { | |
327 pos = 0; | |
328 level += 1 << low_bits; | |
329 } | |
330 | |
331 if (buf[pos] >= level) | |
332 break; | |
333 | |
334 pos++; | |
335 } | |
336 | |
337 if (bits[i]) | |
338 buf[pos] += 1 << low_bits; | |
339 else | |
340 n_zeros++; | |
341 | |
342 pos++; | |
343 } | |
344 // av_free(bits); | |
345 | |
346 // read signs | |
347 for (i = 0; i < entries; i++) | |
348 if (buf[i] && get_bits1(gb)) | |
349 buf[i] = -buf[i]; | |
350 | |
351 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos); | |
352 | |
353 return 0; | |
354 } | |
355 #endif | |
356 | |
357 static void predictor_init_state(int *k, int *state, int order) | |
358 { | |
359 int i; | |
360 | |
361 for (i = order-2; i >= 0; i--) | |
362 { | |
363 int j, p, x = state[i]; | |
364 | |
365 for (j = 0, p = i+1; p < order; j++,p++) | |
366 { | |
367 int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT); | |
368 state[p] += shift_down(k[j]*x, LATTICE_SHIFT); | |
369 x = tmp; | |
370 } | |
371 } | |
372 } | |
373 | |
374 static int predictor_calc_error(int *k, int *state, int order, int error) | |
375 { | |
376 int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT); | |
377 | |
378 #if 1 | |
379 int *k_ptr = &(k[order-2]), | |
380 *state_ptr = &(state[order-2]); | |
381 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--) | |
382 { | |
383 int k_value = *k_ptr, state_value = *state_ptr; | |
384 x -= shift_down(k_value * state_value, LATTICE_SHIFT); | |
385 state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT); | |
386 } | |
387 #else | |
388 for (i = order-2; i >= 0; i--) | |
389 { | |
390 x -= shift_down(k[i] * state[i], LATTICE_SHIFT); | |
391 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT); | |
392 } | |
393 #endif | |
394 | |
395 // don't drift too far, to avoid overflows | |
396 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16); | |
397 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16); | |
398 | |
399 state[0] = x; | |
400 | |
401 return x; | |
402 } | |
403 | |
404 // Heavily modified Levinson-Durbin algorithm which | |
405 // copes better with quantization, and calculates the | |
406 // actual whitened result as it goes. | |
407 | |
408 static void modified_levinson_durbin(int *window, int window_entries, | |
409 int *out, int out_entries, int channels, int *tap_quant) | |
410 { | |
411 int i; | |
412 int *state = av_mallocz(4* window_entries); | |
413 | |
414 memcpy(state, window, 4* window_entries); | |
415 | |
416 for (i = 0; i < out_entries; i++) | |
417 { | |
418 int step = (i+1)*channels, k, j; | |
419 double xx = 0.0, xy = 0.0; | |
420 #if 1 | |
421 int *x_ptr = &(window[step]), *state_ptr = &(state[0]); | |
422 j = window_entries - step; | |
423 for (;j>=0;j--,x_ptr++,state_ptr++) | |
424 { | |
425 double x_value = *x_ptr, state_value = *state_ptr; | |
426 xx += state_value*state_value; | |
427 xy += x_value*state_value; | |
428 } | |
429 #else | |
430 for (j = 0; j <= (window_entries - step); j++); | |
431 { | |
432 double stepval = window[step+j], stateval = window[j]; | |
433 // xx += (double)window[j]*(double)window[j]; | |
434 // xy += (double)window[step+j]*(double)window[j]; | |
435 xx += stateval*stateval; | |
436 xy += stepval*stateval; | |
437 } | |
438 #endif | |
439 if (xx == 0.0) | |
440 k = 0; | |
441 else | |
442 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5)); | |
443 | |
444 if (k > (LATTICE_FACTOR/tap_quant[i])) | |
445 k = LATTICE_FACTOR/tap_quant[i]; | |
446 if (-k > (LATTICE_FACTOR/tap_quant[i])) | |
447 k = -(LATTICE_FACTOR/tap_quant[i]); | |
448 | |
449 out[i] = k; | |
450 k *= tap_quant[i]; | |
451 | |
452 #if 1 | |
453 x_ptr = &(window[step]); | |
454 state_ptr = &(state[0]); | |
455 j = window_entries - step; | |
456 for (;j>=0;j--,x_ptr++,state_ptr++) | |
457 { | |
458 int x_value = *x_ptr, state_value = *state_ptr; | |
459 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT); | |
460 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT); | |
461 } | |
462 #else | |
463 for (j=0; j <= (window_entries - step); j++) | |
464 { | |
465 int stepval = window[step+j], stateval=state[j]; | |
466 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT); | |
467 state[j] += shift_down(k * stepval, LATTICE_SHIFT); | |
468 } | |
469 #endif | |
470 } | |
471 | |
472 av_free(state); | |
473 } | |
474 | |
475 static int samplerate_table[] = | |
476 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 }; | |
477 | |
478 #ifdef CONFIG_ENCODERS | |
479 | |
480 static inline int code_samplerate(int samplerate) | |
481 { | |
482 switch (samplerate) | |
483 { | |
484 case 44100: return 0; | |
485 case 22050: return 1; | |
486 case 11025: return 2; | |
487 case 96000: return 3; | |
488 case 48000: return 4; | |
489 case 32000: return 5; | |
490 case 24000: return 6; | |
491 case 16000: return 7; | |
492 case 8000: return 8; | |
493 } | |
494 return -1; | |
495 } | |
496 | |
497 static int sonic_encode_init(AVCodecContext *avctx) | |
498 { | |
499 SonicContext *s = avctx->priv_data; | |
500 PutBitContext pb; | |
501 int i, version = 0; | |
502 | |
503 if (avctx->channels > MAX_CHANNELS) | |
504 return -1; /* only stereo or mono for now */ | |
505 | |
506 if (avctx->channels == 2) | |
507 s->mid_side = 1; | |
508 if (avctx->codec->id == CODEC_ID_SONIC_LS) | |
509 { | |
510 s->lossless = 1; | |
511 s->num_taps = 32; | |
512 s->downsampling = 1; | |
513 s->quantization = 0.0; | |
514 } | |
515 else | |
516 { | |
517 s->num_taps = 128; | |
518 s->downsampling = 2; | |
519 s->quantization = 1.0; | |
520 } | |
521 | |
522 // max tap 2048 | |
523 if ((s->num_taps < 32) || (s->num_taps > 1024) || | |
524 ((s->num_taps>>5)<<5 != s->num_taps)) | |
525 { | |
526 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n"); | |
527 return -1; | |
528 } | |
529 | |
530 // generate taps | |
531 s->tap_quant = av_mallocz(4* s->num_taps); | |
532 for (i = 0; i < s->num_taps; i++) | |
533 s->tap_quant[i] = (int)(sqrt(i+1)); | |
534 | |
535 s->channels = avctx->channels; | |
536 s->samplerate = avctx->sample_rate; | |
537 | |
538 s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling; | |
539 s->frame_size = s->channels*s->block_align*s->downsampling; | |
540 | |
541 s->tail = av_mallocz(4* s->num_taps*s->channels); | |
542 if (!s->tail) | |
543 return -1; | |
544 s->tail_size = s->num_taps*s->channels; | |
545 | |
546 s->predictor_k = av_mallocz(4 * s->num_taps); | |
547 if (!s->predictor_k) | |
548 return -1; | |
549 | |
550 for (i = 0; i < s->channels; i++) | |
551 { | |
552 s->coded_samples[i] = av_mallocz(4* s->block_align); | |
553 if (!s->coded_samples[i]) | |
554 return -1; | |
555 } | |
556 | |
557 s->int_samples = av_mallocz(4* s->frame_size); | |
558 | |
559 s->window_size = ((2*s->tail_size)+s->frame_size); | |
560 s->window = av_mallocz(4* s->window_size); | |
561 if (!s->window) | |
562 return -1; | |
563 | |
564 avctx->extradata = av_mallocz(16); | |
565 if (!avctx->extradata) | |
566 return -1; | |
567 init_put_bits(&pb, avctx->extradata, 16*8); | |
568 | |
569 put_bits(&pb, 2, version); // version | |
570 if (version == 1) | |
571 { | |
572 put_bits(&pb, 2, s->channels); | |
573 put_bits(&pb, 4, code_samplerate(s->samplerate)); | |
574 } | |
575 put_bits(&pb, 1, s->lossless); | |
576 if (!s->lossless) | |
577 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision | |
578 put_bits(&pb, 1, s->mid_side); | |
579 put_bits(&pb, 2, s->downsampling); | |
580 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024 | |
581 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table | |
582 | |
583 flush_put_bits(&pb); | |
584 avctx->extradata_size = put_bits_count(&pb)/8; | |
585 | |
586 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d ms: %d taps: %d block: %d frame: %d downsamp: %d\n", | |
587 version, s->lossless, s->mid_side, s->num_taps, s->block_align, s->frame_size, s->downsampling); | |
588 | |
589 avctx->coded_frame = avcodec_alloc_frame(); | |
590 if (!avctx->coded_frame) | |
591 return -ENOMEM; | |
592 avctx->coded_frame->key_frame = 1; | |
593 avctx->frame_size = s->block_align*s->downsampling; | |
594 | |
595 return 0; | |
596 } | |
597 | |
598 static int sonic_encode_close(AVCodecContext *avctx) | |
599 { | |
600 SonicContext *s = avctx->priv_data; | |
601 int i; | |
602 | |
603 av_freep(&avctx->coded_frame); | |
604 | |
605 for (i = 0; i < s->channels; i++) | |
606 av_free(s->coded_samples[i]); | |
607 | |
608 av_free(s->predictor_k); | |
609 av_free(s->tail); | |
610 av_free(s->tap_quant); | |
611 av_free(s->window); | |
612 av_free(s->int_samples); | |
613 | |
614 return 0; | |
615 } | |
616 | |
617 static int sonic_encode_frame(AVCodecContext *avctx, | |
618 uint8_t *buf, int buf_size, void *data) | |
619 { | |
620 SonicContext *s = avctx->priv_data; | |
621 PutBitContext pb; | |
622 int i, j, ch, quant = 0, x = 0; | |
623 short *samples = data; | |
624 | |
625 init_put_bits(&pb, buf, buf_size*8); | |
626 | |
627 // short -> internal | |
628 for (i = 0; i < s->frame_size; i++) | |
629 { | |
630 if (samples[i] < 0) | |
631 s->int_samples[i] = samples[i]+32768; | |
632 else | |
633 s->int_samples[i] = samples[i]-32768; | |
634 } | |
635 | |
636 if (!s->lossless) | |
637 for (i = 0; i < s->frame_size; i++) | |
638 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT; | |
639 | |
640 if (s->mid_side) | |
641 for (i = 0; i < s->frame_size; i += s->channels) | |
642 { | |
643 s->int_samples[i] += s->int_samples[i+1]; | |
644 s->int_samples[i+1] -= shift(s->int_samples[i], 1); | |
645 } | |
646 | |
647 memset(s->window, 0, 4* s->window_size); | |
648 | |
649 for (i = 0; i < s->tail_size; i++) | |
650 s->window[x++] = s->tail[i]; | |
651 | |
652 for (i = 0; i < s->frame_size; i++) | |
653 s->window[x++] = s->int_samples[i]; | |
654 | |
655 for (i = 0; i < s->tail_size; i++) | |
656 s->window[x++] = 0; | |
657 | |
658 for (i = 0; i < s->tail_size; i++) | |
659 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i]; | |
660 | |
661 // generate taps | |
662 modified_levinson_durbin(s->window, s->window_size, | |
663 s->predictor_k, s->num_taps, s->channels, s->tap_quant); | |
664 if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0) | |
665 return -1; | |
666 | |
667 for (ch = 0; ch < s->channels; ch++) | |
668 { | |
669 x = s->tail_size+ch; | |
670 for (i = 0; i < s->block_align; i++) | |
671 { | |
672 int sum = 0; | |
673 for (j = 0; j < s->downsampling; j++, x += s->channels) | |
674 sum += s->window[x]; | |
675 s->coded_samples[ch][i] = sum; | |
676 } | |
677 } | |
678 | |
679 // simple rate control code | |
680 if (!s->lossless) | |
681 { | |
682 double energy1 = 0.0, energy2 = 0.0; | |
683 for (ch = 0; ch < s->channels; ch++) | |
684 { | |
685 for (i = 0; i < s->block_align; i++) | |
686 { | |
687 double sample = s->coded_samples[ch][i]; | |
688 energy2 += sample*sample; | |
689 energy1 += fabs(sample); | |
690 } | |
691 } | |
692 | |
693 energy2 = sqrt(energy2/(s->channels*s->block_align)); | |
694 energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align); | |
695 | |
696 // increase bitrate when samples are like a gaussian distribution | |
697 // reduce bitrate when samples are like a two-tailed exponential distribution | |
698 | |
699 if (energy2 > energy1) | |
700 energy2 += (energy2-energy1)*RATE_VARIATION; | |
701 | |
702 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR); | |
703 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2); | |
704 | |
705 if (quant < 1) | |
706 quant = 1; | |
707 if (quant > 65535) | |
708 quant = 65535; | |
709 | |
710 set_ue_golomb(&pb, quant); | |
711 | |
712 quant *= SAMPLE_FACTOR; | |
713 } | |
714 | |
715 // write out coded samples | |
716 for (ch = 0; ch < s->channels; ch++) | |
717 { | |
718 if (!s->lossless) | |
719 for (i = 0; i < s->block_align; i++) | |
720 s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant); | |
721 | |
722 if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0) | |
723 return -1; | |
724 } | |
725 | |
726 // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8); | |
727 | |
728 flush_put_bits(&pb); | |
729 return (put_bits_count(&pb)+7)/8; | |
730 } | |
731 #endif //CONFIG_ENCODERS | |
732 | |
733 static int sonic_decode_init(AVCodecContext *avctx) | |
734 { | |
735 SonicContext *s = avctx->priv_data; | |
736 GetBitContext gb; | |
737 int i, version; | |
738 | |
739 s->channels = avctx->channels; | |
740 s->samplerate = avctx->sample_rate; | |
741 | |
742 if (!avctx->extradata) | |
743 { | |
744 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n"); | |
745 return -1; | |
746 } | |
747 | |
748 init_get_bits(&gb, avctx->extradata, avctx->extradata_size); | |
749 | |
750 version = get_bits(&gb, 2); | |
751 if (version > 1) | |
752 { | |
753 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n"); | |
754 return -1; | |
755 } | |
756 | |
757 if (version == 1) | |
758 { | |
759 s->channels = get_bits(&gb, 2); | |
760 s->samplerate = samplerate_table[get_bits(&gb, 4)]; | |
761 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n", | |
762 s->channels, s->samplerate); | |
763 } | |
764 | |
765 if (s->channels > MAX_CHANNELS) | |
766 { | |
767 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); | |
768 return -1; | |
769 } | |
770 | |
771 s->lossless = get_bits1(&gb); | |
772 if (!s->lossless) | |
773 skip_bits(&gb, 3); // XXX FIXME | |
774 s->mid_side = get_bits1(&gb); | |
775 | |
776 s->downsampling = get_bits(&gb, 2); | |
777 s->num_taps = (get_bits(&gb, 5)+1)<<5; | |
778 if (get_bits1(&gb)) // XXX FIXME | |
779 av_log(avctx, AV_LOG_INFO, "Custom quant table\n"); | |
780 | |
781 s->block_align = (int)(2048.0*(s->samplerate/44100))/s->downsampling; | |
782 s->frame_size = s->channels*s->block_align*s->downsampling; | |
783 // avctx->frame_size = s->block_align; | |
784 | |
785 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d ms: %d taps: %d block: %d frame: %d downsamp: %d\n", | |
786 version, s->lossless, s->mid_side, s->num_taps, s->block_align, s->frame_size, s->downsampling); | |
787 | |
788 // generate taps | |
789 s->tap_quant = av_mallocz(4* s->num_taps); | |
790 for (i = 0; i < s->num_taps; i++) | |
791 s->tap_quant[i] = (int)(sqrt(i+1)); | |
792 | |
793 s->predictor_k = av_mallocz(4* s->num_taps); | |
794 | |
795 for (i = 0; i < s->channels; i++) | |
796 { | |
797 s->predictor_state[i] = av_mallocz(4* s->num_taps); | |
798 if (!s->predictor_state[i]) | |
799 return -1; | |
800 } | |
801 | |
802 for (i = 0; i < s->channels; i++) | |
803 { | |
804 s->coded_samples[i] = av_mallocz(4* s->block_align); | |
805 if (!s->coded_samples[i]) | |
806 return -1; | |
807 } | |
808 s->int_samples = av_mallocz(4* s->frame_size); | |
809 | |
810 return 0; | |
811 } | |
812 | |
813 static int sonic_decode_close(AVCodecContext *avctx) | |
814 { | |
815 SonicContext *s = avctx->priv_data; | |
816 int i; | |
817 | |
818 av_free(s->int_samples); | |
819 av_free(s->tap_quant); | |
820 av_free(s->predictor_k); | |
821 | |
822 for (i = 0; i < s->channels; i++) | |
823 { | |
824 av_free(s->predictor_state[i]); | |
825 av_free(s->coded_samples[i]); | |
826 } | |
827 | |
828 return 0; | |
829 } | |
830 | |
831 static int sonic_decode_frame(AVCodecContext *avctx, | |
832 int16_t *data, int *data_size, | |
833 uint8_t *buf, int buf_size) | |
834 { | |
835 SonicContext *s = avctx->priv_data; | |
836 GetBitContext gb; | |
837 int i, quant, ch, j; | |
838 short *samples = data; | |
839 | |
840 if (buf_size == 0) return 0; | |
841 | |
842 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size); | |
843 | |
844 init_get_bits(&gb, buf, buf_size*8); | |
845 | |
846 intlist_read(&gb, s->predictor_k, s->num_taps, 0); | |
847 | |
848 // dequantize | |
849 for (i = 0; i < s->num_taps; i++) | |
850 s->predictor_k[i] *= s->tap_quant[i]; | |
851 | |
852 if (s->lossless) | |
853 quant = 1; | |
854 else | |
855 quant = get_ue_golomb(&gb) * SAMPLE_FACTOR; | |
856 | |
857 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant); | |
858 | |
859 for (ch = 0; ch < s->channels; ch++) | |
860 { | |
861 int x = ch; | |
862 | |
863 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps); | |
864 | |
865 intlist_read(&gb, s->coded_samples[ch], s->block_align, 1); | |
866 | |
867 for (i = 0; i < s->block_align; i++) | |
868 { | |
869 for (j = 0; j < s->downsampling - 1; j++) | |
870 { | |
871 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0); | |
872 x += s->channels; | |
873 } | |
874 | |
875 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant); | |
876 x += s->channels; | |
877 } | |
878 | |
879 for (i = 0; i < s->num_taps; i++) | |
880 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels]; | |
881 } | |
882 | |
883 if (s->mid_side) | |
884 for (i = 0; i < s->frame_size; i += s->channels) | |
885 { | |
886 s->int_samples[i+1] += shift(s->int_samples[i], 1); | |
887 s->int_samples[i] -= s->int_samples[i+1]; | |
888 } | |
889 | |
890 if (!s->lossless) | |
891 for (i = 0; i < s->frame_size; i++) | |
892 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT); | |
893 | |
894 // internal -> short | |
895 for (i = 0; i < s->frame_size; i++) | |
896 { | |
897 if (s->int_samples[i] > 32767) | |
898 samples[i] = 32767; | |
899 else if (s->int_samples[i] < -32768) | |
900 samples[i] = -32768; | |
901 else | |
902 samples[i] = s->int_samples[i]; | |
903 } | |
904 | |
905 align_get_bits(&gb); | |
906 | |
907 // if (buf_size != (get_bits_count(&gb)+7)/8) | |
908 // av_log(NULL, AV_LOG_INFO, "buf_size (%d) and used bytes (%d) differs\n", buf_size, (get_bits_count(&gb)+7)/8); | |
909 | |
910 *data_size = s->frame_size * 2; | |
911 | |
912 return (get_bits_count(&gb)+7)/8; | |
913 } | |
914 | |
915 #ifdef CONFIG_ENCODERS | |
916 AVCodec sonic_encoder = { | |
917 "sonic", | |
918 CODEC_TYPE_AUDIO, | |
919 CODEC_ID_SONIC, | |
920 sizeof(SonicContext), | |
921 sonic_encode_init, | |
922 sonic_encode_frame, | |
923 sonic_encode_close, | |
924 NULL, | |
925 }; | |
926 | |
927 AVCodec sonic_ls_encoder = { | |
928 "sonicls", | |
929 CODEC_TYPE_AUDIO, | |
930 CODEC_ID_SONIC_LS, | |
931 sizeof(SonicContext), | |
932 sonic_encode_init, | |
933 sonic_encode_frame, | |
934 sonic_encode_close, | |
935 NULL, | |
936 }; | |
937 #endif | |
938 | |
939 #ifdef CONFIG_DECODERS | |
940 AVCodec sonic_decoder = { | |
941 "sonic", | |
942 CODEC_TYPE_AUDIO, | |
943 CODEC_ID_SONIC, | |
944 sizeof(SonicContext), | |
945 sonic_decode_init, | |
946 NULL, | |
947 sonic_decode_close, | |
948 sonic_decode_frame, | |
949 }; | |
950 #endif |