808
|
1 /*
|
|
2 * Simple free lossless/lossy audio codec
|
|
3 * Copyright (c) 2004 Alex Beregszaszi
|
|
4 *
|
|
5 * This file is part of FFmpeg.
|
|
6 *
|
|
7 * FFmpeg is free software; you can redistribute it and/or
|
|
8 * modify it under the terms of the GNU Lesser General Public
|
|
9 * License as published by the Free Software Foundation; either
|
|
10 * version 2.1 of the License, or (at your option) any later version.
|
|
11 *
|
|
12 * FFmpeg is distributed in the hope that it will be useful,
|
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15 * Lesser General Public License for more details.
|
|
16 *
|
|
17 * You should have received a copy of the GNU Lesser General Public
|
|
18 * License along with FFmpeg; if not, write to the Free Software
|
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
20 */
|
|
21 #include "avcodec.h"
|
|
22 #include "bitstream.h"
|
|
23 #include "golomb.h"
|
|
24
|
|
25 /**
|
|
26 * @file sonic.c
|
|
27 * Simple free lossless/lossy audio codec
|
|
28 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
|
|
29 * Written and designed by Alex Beregszaszi
|
|
30 *
|
|
31 * TODO:
|
|
32 * - CABAC put/get_symbol
|
|
33 * - independent quantizer for channels
|
|
34 * - >2 channels support
|
|
35 * - more decorrelation types
|
|
36 * - more tap_quant tests
|
|
37 * - selectable intlist writers/readers (bonk-style, golomb, cabac)
|
|
38 */
|
|
39
|
|
40 #define MAX_CHANNELS 2
|
|
41
|
|
42 #define MID_SIDE 0
|
|
43 #define LEFT_SIDE 1
|
|
44 #define RIGHT_SIDE 2
|
|
45
|
|
46 typedef struct SonicContext {
|
|
47 int lossless, decorrelation;
|
|
48
|
|
49 int num_taps, downsampling;
|
|
50 double quantization;
|
|
51
|
|
52 int channels, samplerate, block_align, frame_size;
|
|
53
|
|
54 int *tap_quant;
|
|
55 int *int_samples;
|
|
56 int *coded_samples[MAX_CHANNELS];
|
|
57
|
|
58 // for encoding
|
|
59 int *tail;
|
|
60 int tail_size;
|
|
61 int *window;
|
|
62 int window_size;
|
|
63
|
|
64 // for decoding
|
|
65 int *predictor_k;
|
|
66 int *predictor_state[MAX_CHANNELS];
|
|
67 } SonicContext;
|
|
68
|
|
69 #define LATTICE_SHIFT 10
|
|
70 #define SAMPLE_SHIFT 4
|
|
71 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
|
|
72 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
|
|
73
|
|
74 #define BASE_QUANT 0.6
|
|
75 #define RATE_VARIATION 3.0
|
|
76
|
|
77 static inline int divide(int a, int b)
|
|
78 {
|
|
79 if (a < 0)
|
|
80 return -( (-a + b/2)/b );
|
|
81 else
|
|
82 return (a + b/2)/b;
|
|
83 }
|
|
84
|
|
85 static inline int shift(int a,int b)
|
|
86 {
|
|
87 return (a+(1<<(b-1))) >> b;
|
|
88 }
|
|
89
|
|
90 static inline int shift_down(int a,int b)
|
|
91 {
|
|
92 return (a>>b)+((a<0)?1:0);
|
|
93 }
|
|
94
|
|
95 #if 1
|
|
96 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
|
|
97 {
|
|
98 int i;
|
|
99
|
|
100 for (i = 0; i < entries; i++)
|
|
101 set_se_golomb(pb, buf[i]);
|
|
102
|
|
103 return 1;
|
|
104 }
|
|
105
|
|
106 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
|
|
107 {
|
|
108 int i;
|
|
109
|
|
110 for (i = 0; i < entries; i++)
|
|
111 buf[i] = get_se_golomb(gb);
|
|
112
|
|
113 return 1;
|
|
114 }
|
|
115
|
|
116 #else
|
|
117
|
|
118 #define ADAPT_LEVEL 8
|
|
119
|
|
120 static int bits_to_store(uint64_t x)
|
|
121 {
|
|
122 int res = 0;
|
|
123
|
|
124 while(x)
|
|
125 {
|
|
126 res++;
|
|
127 x >>= 1;
|
|
128 }
|
|
129 return res;
|
|
130 }
|
|
131
|
|
132 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
|
|
133 {
|
|
134 int i, bits;
|
|
135
|
|
136 if (!max)
|
|
137 return;
|
|
138
|
|
139 bits = bits_to_store(max);
|
|
140
|
|
141 for (i = 0; i < bits-1; i++)
|
|
142 put_bits(pb, 1, value & (1 << i));
|
|
143
|
|
144 if ( (value | (1 << (bits-1))) <= max)
|
|
145 put_bits(pb, 1, value & (1 << (bits-1)));
|
|
146 }
|
|
147
|
|
148 static unsigned int read_uint_max(GetBitContext *gb, int max)
|
|
149 {
|
|
150 int i, bits, value = 0;
|
|
151
|
|
152 if (!max)
|
|
153 return 0;
|
|
154
|
|
155 bits = bits_to_store(max);
|
|
156
|
|
157 for (i = 0; i < bits-1; i++)
|
|
158 if (get_bits1(gb))
|
|
159 value += 1 << i;
|
|
160
|
|
161 if ( (value | (1<<(bits-1))) <= max)
|
|
162 if (get_bits1(gb))
|
|
163 value += 1 << (bits-1);
|
|
164
|
|
165 return value;
|
|
166 }
|
|
167
|
|
168 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
|
|
169 {
|
|
170 int i, j, x = 0, low_bits = 0, max = 0;
|
|
171 int step = 256, pos = 0, dominant = 0, any = 0;
|
|
172 int *copy, *bits;
|
|
173
|
|
174 copy = av_mallocz(4* entries);
|
|
175 if (!copy)
|
|
176 return -1;
|
|
177
|
|
178 if (base_2_part)
|
|
179 {
|
|
180 int energy = 0;
|
|
181
|
|
182 for (i = 0; i < entries; i++)
|
|
183 energy += abs(buf[i]);
|
|
184
|
|
185 low_bits = bits_to_store(energy / (entries * 2));
|
|
186 if (low_bits > 15)
|
|
187 low_bits = 15;
|
|
188
|
|
189 put_bits(pb, 4, low_bits);
|
|
190 }
|
|
191
|
|
192 for (i = 0; i < entries; i++)
|
|
193 {
|
|
194 put_bits(pb, low_bits, abs(buf[i]));
|
|
195 copy[i] = abs(buf[i]) >> low_bits;
|
|
196 if (copy[i] > max)
|
|
197 max = abs(copy[i]);
|
|
198 }
|
|
199
|
|
200 bits = av_mallocz(4* entries*max);
|
|
201 if (!bits)
|
|
202 {
|
|
203 // av_free(copy);
|
|
204 return -1;
|
|
205 }
|
|
206
|
|
207 for (i = 0; i <= max; i++)
|
|
208 {
|
|
209 for (j = 0; j < entries; j++)
|
|
210 if (copy[j] >= i)
|
|
211 bits[x++] = copy[j] > i;
|
|
212 }
|
|
213
|
|
214 // store bitstream
|
|
215 while (pos < x)
|
|
216 {
|
|
217 int steplet = step >> 8;
|
|
218
|
|
219 if (pos + steplet > x)
|
|
220 steplet = x - pos;
|
|
221
|
|
222 for (i = 0; i < steplet; i++)
|
|
223 if (bits[i+pos] != dominant)
|
|
224 any = 1;
|
|
225
|
|
226 put_bits(pb, 1, any);
|
|
227
|
|
228 if (!any)
|
|
229 {
|
|
230 pos += steplet;
|
|
231 step += step / ADAPT_LEVEL;
|
|
232 }
|
|
233 else
|
|
234 {
|
|
235 int interloper = 0;
|
|
236
|
|
237 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
|
|
238 interloper++;
|
|
239
|
|
240 // note change
|
|
241 write_uint_max(pb, interloper, (step >> 8) - 1);
|
|
242
|
|
243 pos += interloper + 1;
|
|
244 step -= step / ADAPT_LEVEL;
|
|
245 }
|
|
246
|
|
247 if (step < 256)
|
|
248 {
|
|
249 step = 65536 / step;
|
|
250 dominant = !dominant;
|
|
251 }
|
|
252 }
|
|
253
|
|
254 // store signs
|
|
255 for (i = 0; i < entries; i++)
|
|
256 if (buf[i])
|
|
257 put_bits(pb, 1, buf[i] < 0);
|
|
258
|
|
259 // av_free(bits);
|
|
260 // av_free(copy);
|
|
261
|
|
262 return 0;
|
|
263 }
|
|
264
|
|
265 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
|
|
266 {
|
|
267 int i, low_bits = 0, x = 0;
|
|
268 int n_zeros = 0, step = 256, dominant = 0;
|
|
269 int pos = 0, level = 0;
|
|
270 int *bits = av_mallocz(4* entries);
|
|
271
|
|
272 if (!bits)
|
|
273 return -1;
|
|
274
|
|
275 if (base_2_part)
|
|
276 {
|
|
277 low_bits = get_bits(gb, 4);
|
|
278
|
|
279 if (low_bits)
|
|
280 for (i = 0; i < entries; i++)
|
|
281 buf[i] = get_bits(gb, low_bits);
|
|
282 }
|
|
283
|
|
284 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
|
|
285
|
|
286 while (n_zeros < entries)
|
|
287 {
|
|
288 int steplet = step >> 8;
|
|
289
|
|
290 if (!get_bits1(gb))
|
|
291 {
|
|
292 for (i = 0; i < steplet; i++)
|
|
293 bits[x++] = dominant;
|
|
294
|
|
295 if (!dominant)
|
|
296 n_zeros += steplet;
|
|
297
|
|
298 step += step / ADAPT_LEVEL;
|
|
299 }
|
|
300 else
|
|
301 {
|
|
302 int actual_run = read_uint_max(gb, steplet-1);
|
|
303
|
|
304 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
|
|
305
|
|
306 for (i = 0; i < actual_run; i++)
|
|
307 bits[x++] = dominant;
|
|
308
|
|
309 bits[x++] = !dominant;
|
|
310
|
|
311 if (!dominant)
|
|
312 n_zeros += actual_run;
|
|
313 else
|
|
314 n_zeros++;
|
|
315
|
|
316 step -= step / ADAPT_LEVEL;
|
|
317 }
|
|
318
|
|
319 if (step < 256)
|
|
320 {
|
|
321 step = 65536 / step;
|
|
322 dominant = !dominant;
|
|
323 }
|
|
324 }
|
|
325
|
|
326 // reconstruct unsigned values
|
|
327 n_zeros = 0;
|
|
328 for (i = 0; n_zeros < entries; i++)
|
|
329 {
|
|
330 while(1)
|
|
331 {
|
|
332 if (pos >= entries)
|
|
333 {
|
|
334 pos = 0;
|
|
335 level += 1 << low_bits;
|
|
336 }
|
|
337
|
|
338 if (buf[pos] >= level)
|
|
339 break;
|
|
340
|
|
341 pos++;
|
|
342 }
|
|
343
|
|
344 if (bits[i])
|
|
345 buf[pos] += 1 << low_bits;
|
|
346 else
|
|
347 n_zeros++;
|
|
348
|
|
349 pos++;
|
|
350 }
|
|
351 // av_free(bits);
|
|
352
|
|
353 // read signs
|
|
354 for (i = 0; i < entries; i++)
|
|
355 if (buf[i] && get_bits1(gb))
|
|
356 buf[i] = -buf[i];
|
|
357
|
|
358 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
|
|
359
|
|
360 return 0;
|
|
361 }
|
|
362 #endif
|
|
363
|
|
364 static void predictor_init_state(int *k, int *state, int order)
|
|
365 {
|
|
366 int i;
|
|
367
|
|
368 for (i = order-2; i >= 0; i--)
|
|
369 {
|
|
370 int j, p, x = state[i];
|
|
371
|
|
372 for (j = 0, p = i+1; p < order; j++,p++)
|
|
373 {
|
|
374 int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
|
|
375 state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
|
|
376 x = tmp;
|
|
377 }
|
|
378 }
|
|
379 }
|
|
380
|
|
381 static int predictor_calc_error(int *k, int *state, int order, int error)
|
|
382 {
|
|
383 int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
|
|
384
|
|
385 #if 1
|
|
386 int *k_ptr = &(k[order-2]),
|
|
387 *state_ptr = &(state[order-2]);
|
|
388 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
|
|
389 {
|
|
390 int k_value = *k_ptr, state_value = *state_ptr;
|
|
391 x -= shift_down(k_value * state_value, LATTICE_SHIFT);
|
|
392 state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
|
|
393 }
|
|
394 #else
|
|
395 for (i = order-2; i >= 0; i--)
|
|
396 {
|
|
397 x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
|
|
398 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
|
|
399 }
|
|
400 #endif
|
|
401
|
|
402 // don't drift too far, to avoid overflows
|
|
403 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
|
|
404 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
|
|
405
|
|
406 state[0] = x;
|
|
407
|
|
408 return x;
|
|
409 }
|
|
410
|
|
411 #ifdef CONFIG_ENCODERS
|
|
412 // Heavily modified Levinson-Durbin algorithm which
|
|
413 // copes better with quantization, and calculates the
|
|
414 // actual whitened result as it goes.
|
|
415
|
|
416 static void modified_levinson_durbin(int *window, int window_entries,
|
|
417 int *out, int out_entries, int channels, int *tap_quant)
|
|
418 {
|
|
419 int i;
|
|
420 int *state = av_mallocz(4* window_entries);
|
|
421
|
|
422 memcpy(state, window, 4* window_entries);
|
|
423
|
|
424 for (i = 0; i < out_entries; i++)
|
|
425 {
|
|
426 int step = (i+1)*channels, k, j;
|
|
427 double xx = 0.0, xy = 0.0;
|
|
428 #if 1
|
|
429 int *x_ptr = &(window[step]), *state_ptr = &(state[0]);
|
|
430 j = window_entries - step;
|
|
431 for (;j>=0;j--,x_ptr++,state_ptr++)
|
|
432 {
|
|
433 double x_value = *x_ptr, state_value = *state_ptr;
|
|
434 xx += state_value*state_value;
|
|
435 xy += x_value*state_value;
|
|
436 }
|
|
437 #else
|
|
438 for (j = 0; j <= (window_entries - step); j++);
|
|
439 {
|
|
440 double stepval = window[step+j], stateval = window[j];
|
|
441 // xx += (double)window[j]*(double)window[j];
|
|
442 // xy += (double)window[step+j]*(double)window[j];
|
|
443 xx += stateval*stateval;
|
|
444 xy += stepval*stateval;
|
|
445 }
|
|
446 #endif
|
|
447 if (xx == 0.0)
|
|
448 k = 0;
|
|
449 else
|
|
450 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
|
|
451
|
|
452 if (k > (LATTICE_FACTOR/tap_quant[i]))
|
|
453 k = LATTICE_FACTOR/tap_quant[i];
|
|
454 if (-k > (LATTICE_FACTOR/tap_quant[i]))
|
|
455 k = -(LATTICE_FACTOR/tap_quant[i]);
|
|
456
|
|
457 out[i] = k;
|
|
458 k *= tap_quant[i];
|
|
459
|
|
460 #if 1
|
|
461 x_ptr = &(window[step]);
|
|
462 state_ptr = &(state[0]);
|
|
463 j = window_entries - step;
|
|
464 for (;j>=0;j--,x_ptr++,state_ptr++)
|
|
465 {
|
|
466 int x_value = *x_ptr, state_value = *state_ptr;
|
|
467 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
|
|
468 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
|
|
469 }
|
|
470 #else
|
|
471 for (j=0; j <= (window_entries - step); j++)
|
|
472 {
|
|
473 int stepval = window[step+j], stateval=state[j];
|
|
474 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
|
|
475 state[j] += shift_down(k * stepval, LATTICE_SHIFT);
|
|
476 }
|
|
477 #endif
|
|
478 }
|
|
479
|
|
480 av_free(state);
|
|
481 }
|
|
482 #endif /* CONFIG_ENCODERS */
|
|
483
|
|
484 static int samplerate_table[] =
|
|
485 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
|
|
486
|
|
487 #ifdef CONFIG_ENCODERS
|
|
488
|
|
489 static inline int code_samplerate(int samplerate)
|
|
490 {
|
|
491 switch (samplerate)
|
|
492 {
|
|
493 case 44100: return 0;
|
|
494 case 22050: return 1;
|
|
495 case 11025: return 2;
|
|
496 case 96000: return 3;
|
|
497 case 48000: return 4;
|
|
498 case 32000: return 5;
|
|
499 case 24000: return 6;
|
|
500 case 16000: return 7;
|
|
501 case 8000: return 8;
|
|
502 }
|
|
503 return -1;
|
|
504 }
|
|
505
|
|
506 static int sonic_encode_init(AVCodecContext *avctx)
|
|
507 {
|
|
508 SonicContext *s = avctx->priv_data;
|
|
509 PutBitContext pb;
|
|
510 int i, version = 0;
|
|
511
|
|
512 if (avctx->channels > MAX_CHANNELS)
|
|
513 {
|
|
514 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
|
|
515 return -1; /* only stereo or mono for now */
|
|
516 }
|
|
517
|
|
518 if (avctx->channels == 2)
|
|
519 s->decorrelation = MID_SIDE;
|
|
520
|
|
521 if (avctx->codec->id == CODEC_ID_SONIC_LS)
|
|
522 {
|
|
523 s->lossless = 1;
|
|
524 s->num_taps = 32;
|
|
525 s->downsampling = 1;
|
|
526 s->quantization = 0.0;
|
|
527 }
|
|
528 else
|
|
529 {
|
|
530 s->num_taps = 128;
|
|
531 s->downsampling = 2;
|
|
532 s->quantization = 1.0;
|
|
533 }
|
|
534
|
|
535 // max tap 2048
|
|
536 if ((s->num_taps < 32) || (s->num_taps > 1024) ||
|
|
537 ((s->num_taps>>5)<<5 != s->num_taps))
|
|
538 {
|
|
539 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
|
|
540 return -1;
|
|
541 }
|
|
542
|
|
543 // generate taps
|
|
544 s->tap_quant = av_mallocz(4* s->num_taps);
|
|
545 for (i = 0; i < s->num_taps; i++)
|
|
546 s->tap_quant[i] = (int)(sqrt(i+1));
|
|
547
|
|
548 s->channels = avctx->channels;
|
|
549 s->samplerate = avctx->sample_rate;
|
|
550
|
|
551 s->block_align = (int)(2048.0*s->samplerate/44100)/s->downsampling;
|
|
552 s->frame_size = s->channels*s->block_align*s->downsampling;
|
|
553
|
|
554 s->tail = av_mallocz(4* s->num_taps*s->channels);
|
|
555 if (!s->tail)
|
|
556 return -1;
|
|
557 s->tail_size = s->num_taps*s->channels;
|
|
558
|
|
559 s->predictor_k = av_mallocz(4 * s->num_taps);
|
|
560 if (!s->predictor_k)
|
|
561 return -1;
|
|
562
|
|
563 for (i = 0; i < s->channels; i++)
|
|
564 {
|
|
565 s->coded_samples[i] = av_mallocz(4* s->block_align);
|
|
566 if (!s->coded_samples[i])
|
|
567 return -1;
|
|
568 }
|
|
569
|
|
570 s->int_samples = av_mallocz(4* s->frame_size);
|
|
571
|
|
572 s->window_size = ((2*s->tail_size)+s->frame_size);
|
|
573 s->window = av_mallocz(4* s->window_size);
|
|
574 if (!s->window)
|
|
575 return -1;
|
|
576
|
|
577 avctx->extradata = av_mallocz(16);
|
|
578 if (!avctx->extradata)
|
|
579 return -1;
|
|
580 init_put_bits(&pb, avctx->extradata, 16*8);
|
|
581
|
|
582 put_bits(&pb, 2, version); // version
|
|
583 if (version == 1)
|
|
584 {
|
|
585 put_bits(&pb, 2, s->channels);
|
|
586 put_bits(&pb, 4, code_samplerate(s->samplerate));
|
|
587 }
|
|
588 put_bits(&pb, 1, s->lossless);
|
|
589 if (!s->lossless)
|
|
590 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
|
|
591 put_bits(&pb, 2, s->decorrelation);
|
|
592 put_bits(&pb, 2, s->downsampling);
|
|
593 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
|
|
594 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
|
|
595
|
|
596 flush_put_bits(&pb);
|
|
597 avctx->extradata_size = put_bits_count(&pb)/8;
|
|
598
|
|
599 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
|
|
600 version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
|
|
601
|
|
602 avctx->coded_frame = avcodec_alloc_frame();
|
|
603 if (!avctx->coded_frame)
|
|
604 return -ENOMEM;
|
|
605 avctx->coded_frame->key_frame = 1;
|
|
606 avctx->frame_size = s->block_align*s->downsampling;
|
|
607
|
|
608 return 0;
|
|
609 }
|
|
610
|
|
611 static int sonic_encode_close(AVCodecContext *avctx)
|
|
612 {
|
|
613 SonicContext *s = avctx->priv_data;
|
|
614 int i;
|
|
615
|
|
616 av_freep(&avctx->coded_frame);
|
|
617
|
|
618 for (i = 0; i < s->channels; i++)
|
|
619 av_free(s->coded_samples[i]);
|
|
620
|
|
621 av_free(s->predictor_k);
|
|
622 av_free(s->tail);
|
|
623 av_free(s->tap_quant);
|
|
624 av_free(s->window);
|
|
625 av_free(s->int_samples);
|
|
626
|
|
627 return 0;
|
|
628 }
|
|
629
|
|
630 static int sonic_encode_frame(AVCodecContext *avctx,
|
|
631 uint8_t *buf, int buf_size, void *data)
|
|
632 {
|
|
633 SonicContext *s = avctx->priv_data;
|
|
634 PutBitContext pb;
|
|
635 int i, j, ch, quant = 0, x = 0;
|
|
636 short *samples = data;
|
|
637
|
|
638 init_put_bits(&pb, buf, buf_size*8);
|
|
639
|
|
640 // short -> internal
|
|
641 for (i = 0; i < s->frame_size; i++)
|
|
642 s->int_samples[i] = samples[i];
|
|
643
|
|
644 if (!s->lossless)
|
|
645 for (i = 0; i < s->frame_size; i++)
|
|
646 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
|
|
647
|
|
648 switch(s->decorrelation)
|
|
649 {
|
|
650 case MID_SIDE:
|
|
651 for (i = 0; i < s->frame_size; i += s->channels)
|
|
652 {
|
|
653 s->int_samples[i] += s->int_samples[i+1];
|
|
654 s->int_samples[i+1] -= shift(s->int_samples[i], 1);
|
|
655 }
|
|
656 break;
|
|
657 case LEFT_SIDE:
|
|
658 for (i = 0; i < s->frame_size; i += s->channels)
|
|
659 s->int_samples[i+1] -= s->int_samples[i];
|
|
660 break;
|
|
661 case RIGHT_SIDE:
|
|
662 for (i = 0; i < s->frame_size; i += s->channels)
|
|
663 s->int_samples[i] -= s->int_samples[i+1];
|
|
664 break;
|
|
665 }
|
|
666
|
|
667 memset(s->window, 0, 4* s->window_size);
|
|
668
|
|
669 for (i = 0; i < s->tail_size; i++)
|
|
670 s->window[x++] = s->tail[i];
|
|
671
|
|
672 for (i = 0; i < s->frame_size; i++)
|
|
673 s->window[x++] = s->int_samples[i];
|
|
674
|
|
675 for (i = 0; i < s->tail_size; i++)
|
|
676 s->window[x++] = 0;
|
|
677
|
|
678 for (i = 0; i < s->tail_size; i++)
|
|
679 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
|
|
680
|
|
681 // generate taps
|
|
682 modified_levinson_durbin(s->window, s->window_size,
|
|
683 s->predictor_k, s->num_taps, s->channels, s->tap_quant);
|
|
684 if (intlist_write(&pb, s->predictor_k, s->num_taps, 0) < 0)
|
|
685 return -1;
|
|
686
|
|
687 for (ch = 0; ch < s->channels; ch++)
|
|
688 {
|
|
689 x = s->tail_size+ch;
|
|
690 for (i = 0; i < s->block_align; i++)
|
|
691 {
|
|
692 int sum = 0;
|
|
693 for (j = 0; j < s->downsampling; j++, x += s->channels)
|
|
694 sum += s->window[x];
|
|
695 s->coded_samples[ch][i] = sum;
|
|
696 }
|
|
697 }
|
|
698
|
|
699 // simple rate control code
|
|
700 if (!s->lossless)
|
|
701 {
|
|
702 double energy1 = 0.0, energy2 = 0.0;
|
|
703 for (ch = 0; ch < s->channels; ch++)
|
|
704 {
|
|
705 for (i = 0; i < s->block_align; i++)
|
|
706 {
|
|
707 double sample = s->coded_samples[ch][i];
|
|
708 energy2 += sample*sample;
|
|
709 energy1 += fabs(sample);
|
|
710 }
|
|
711 }
|
|
712
|
|
713 energy2 = sqrt(energy2/(s->channels*s->block_align));
|
|
714 energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
|
|
715
|
|
716 // increase bitrate when samples are like a gaussian distribution
|
|
717 // reduce bitrate when samples are like a two-tailed exponential distribution
|
|
718
|
|
719 if (energy2 > energy1)
|
|
720 energy2 += (energy2-energy1)*RATE_VARIATION;
|
|
721
|
|
722 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
|
|
723 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
|
|
724
|
|
725 if (quant < 1)
|
|
726 quant = 1;
|
|
727 if (quant > 65535)
|
|
728 quant = 65535;
|
|
729
|
|
730 set_ue_golomb(&pb, quant);
|
|
731
|
|
732 quant *= SAMPLE_FACTOR;
|
|
733 }
|
|
734
|
|
735 // write out coded samples
|
|
736 for (ch = 0; ch < s->channels; ch++)
|
|
737 {
|
|
738 if (!s->lossless)
|
|
739 for (i = 0; i < s->block_align; i++)
|
|
740 s->coded_samples[ch][i] = divide(s->coded_samples[ch][i], quant);
|
|
741
|
|
742 if (intlist_write(&pb, s->coded_samples[ch], s->block_align, 1) < 0)
|
|
743 return -1;
|
|
744 }
|
|
745
|
|
746 // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
|
|
747
|
|
748 flush_put_bits(&pb);
|
|
749 return (put_bits_count(&pb)+7)/8;
|
|
750 }
|
|
751 #endif //CONFIG_ENCODERS
|
|
752
|
|
753 #ifdef CONFIG_DECODERS
|
|
754 static int sonic_decode_init(AVCodecContext *avctx)
|
|
755 {
|
|
756 SonicContext *s = avctx->priv_data;
|
|
757 GetBitContext gb;
|
|
758 int i, version;
|
|
759
|
|
760 s->channels = avctx->channels;
|
|
761 s->samplerate = avctx->sample_rate;
|
|
762
|
|
763 if (!avctx->extradata)
|
|
764 {
|
|
765 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
|
|
766 return -1;
|
|
767 }
|
|
768
|
|
769 init_get_bits(&gb, avctx->extradata, avctx->extradata_size);
|
|
770
|
|
771 version = get_bits(&gb, 2);
|
|
772 if (version > 1)
|
|
773 {
|
|
774 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
|
|
775 return -1;
|
|
776 }
|
|
777
|
|
778 if (version == 1)
|
|
779 {
|
|
780 s->channels = get_bits(&gb, 2);
|
|
781 s->samplerate = samplerate_table[get_bits(&gb, 4)];
|
|
782 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
|
|
783 s->channels, s->samplerate);
|
|
784 }
|
|
785
|
|
786 if (s->channels > MAX_CHANNELS)
|
|
787 {
|
|
788 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
|
|
789 return -1;
|
|
790 }
|
|
791
|
|
792 s->lossless = get_bits1(&gb);
|
|
793 if (!s->lossless)
|
|
794 skip_bits(&gb, 3); // XXX FIXME
|
|
795 s->decorrelation = get_bits(&gb, 2);
|
|
796
|
|
797 s->downsampling = get_bits(&gb, 2);
|
|
798 s->num_taps = (get_bits(&gb, 5)+1)<<5;
|
|
799 if (get_bits1(&gb)) // XXX FIXME
|
|
800 av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
|
|
801
|
|
802 s->block_align = (int)(2048.0*(s->samplerate/44100))/s->downsampling;
|
|
803 s->frame_size = s->channels*s->block_align*s->downsampling;
|
|
804 // avctx->frame_size = s->block_align;
|
|
805
|
|
806 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
|
|
807 version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
|
|
808
|
|
809 // generate taps
|
|
810 s->tap_quant = av_mallocz(4* s->num_taps);
|
|
811 for (i = 0; i < s->num_taps; i++)
|
|
812 s->tap_quant[i] = (int)(sqrt(i+1));
|
|
813
|
|
814 s->predictor_k = av_mallocz(4* s->num_taps);
|
|
815
|
|
816 for (i = 0; i < s->channels; i++)
|
|
817 {
|
|
818 s->predictor_state[i] = av_mallocz(4* s->num_taps);
|
|
819 if (!s->predictor_state[i])
|
|
820 return -1;
|
|
821 }
|
|
822
|
|
823 for (i = 0; i < s->channels; i++)
|
|
824 {
|
|
825 s->coded_samples[i] = av_mallocz(4* s->block_align);
|
|
826 if (!s->coded_samples[i])
|
|
827 return -1;
|
|
828 }
|
|
829 s->int_samples = av_mallocz(4* s->frame_size);
|
|
830
|
|
831 return 0;
|
|
832 }
|
|
833
|
|
834 static int sonic_decode_close(AVCodecContext *avctx)
|
|
835 {
|
|
836 SonicContext *s = avctx->priv_data;
|
|
837 int i;
|
|
838
|
|
839 av_free(s->int_samples);
|
|
840 av_free(s->tap_quant);
|
|
841 av_free(s->predictor_k);
|
|
842
|
|
843 for (i = 0; i < s->channels; i++)
|
|
844 {
|
|
845 av_free(s->predictor_state[i]);
|
|
846 av_free(s->coded_samples[i]);
|
|
847 }
|
|
848
|
|
849 return 0;
|
|
850 }
|
|
851
|
|
852 static int sonic_decode_frame(AVCodecContext *avctx,
|
|
853 void *data, int *data_size,
|
|
854 uint8_t *buf, int buf_size)
|
|
855 {
|
|
856 SonicContext *s = avctx->priv_data;
|
|
857 GetBitContext gb;
|
|
858 int i, quant, ch, j;
|
|
859 short *samples = data;
|
|
860
|
|
861 if (buf_size == 0) return 0;
|
|
862
|
|
863 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
|
|
864
|
|
865 init_get_bits(&gb, buf, buf_size*8);
|
|
866
|
|
867 intlist_read(&gb, s->predictor_k, s->num_taps, 0);
|
|
868
|
|
869 // dequantize
|
|
870 for (i = 0; i < s->num_taps; i++)
|
|
871 s->predictor_k[i] *= s->tap_quant[i];
|
|
872
|
|
873 if (s->lossless)
|
|
874 quant = 1;
|
|
875 else
|
|
876 quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
|
|
877
|
|
878 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
|
|
879
|
|
880 for (ch = 0; ch < s->channels; ch++)
|
|
881 {
|
|
882 int x = ch;
|
|
883
|
|
884 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
|
|
885
|
|
886 intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
|
|
887
|
|
888 for (i = 0; i < s->block_align; i++)
|
|
889 {
|
|
890 for (j = 0; j < s->downsampling - 1; j++)
|
|
891 {
|
|
892 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
|
|
893 x += s->channels;
|
|
894 }
|
|
895
|
|
896 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
|
|
897 x += s->channels;
|
|
898 }
|
|
899
|
|
900 for (i = 0; i < s->num_taps; i++)
|
|
901 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
|
|
902 }
|
|
903
|
|
904 switch(s->decorrelation)
|
|
905 {
|
|
906 case MID_SIDE:
|
|
907 for (i = 0; i < s->frame_size; i += s->channels)
|
|
908 {
|
|
909 s->int_samples[i+1] += shift(s->int_samples[i], 1);
|
|
910 s->int_samples[i] -= s->int_samples[i+1];
|
|
911 }
|
|
912 break;
|
|
913 case LEFT_SIDE:
|
|
914 for (i = 0; i < s->frame_size; i += s->channels)
|
|
915 s->int_samples[i+1] += s->int_samples[i];
|
|
916 break;
|
|
917 case RIGHT_SIDE:
|
|
918 for (i = 0; i < s->frame_size; i += s->channels)
|
|
919 s->int_samples[i] += s->int_samples[i+1];
|
|
920 break;
|
|
921 }
|
|
922
|
|
923 if (!s->lossless)
|
|
924 for (i = 0; i < s->frame_size; i++)
|
|
925 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
|
|
926
|
|
927 // internal -> short
|
|
928 for (i = 0; i < s->frame_size; i++)
|
|
929 {
|
|
930 if (s->int_samples[i] > 32767)
|
|
931 samples[i] = 32767;
|
|
932 else if (s->int_samples[i] < -32768)
|
|
933 samples[i] = -32768;
|
|
934 else
|
|
935 samples[i] = s->int_samples[i];
|
|
936 }
|
|
937
|
|
938 align_get_bits(&gb);
|
|
939
|
|
940 *data_size = s->frame_size * 2;
|
|
941
|
|
942 return (get_bits_count(&gb)+7)/8;
|
|
943 }
|
|
944 #endif
|
|
945
|
|
946 #ifdef CONFIG_ENCODERS
|
|
947 AVCodec sonic_encoder = {
|
|
948 "sonic",
|
|
949 CODEC_TYPE_AUDIO,
|
|
950 CODEC_ID_SONIC,
|
|
951 sizeof(SonicContext),
|
|
952 sonic_encode_init,
|
|
953 sonic_encode_frame,
|
|
954 sonic_encode_close,
|
|
955 NULL,
|
|
956 };
|
|
957
|
|
958 AVCodec sonic_ls_encoder = {
|
|
959 "sonicls",
|
|
960 CODEC_TYPE_AUDIO,
|
|
961 CODEC_ID_SONIC_LS,
|
|
962 sizeof(SonicContext),
|
|
963 sonic_encode_init,
|
|
964 sonic_encode_frame,
|
|
965 sonic_encode_close,
|
|
966 NULL,
|
|
967 };
|
|
968 #endif
|
|
969
|
|
970 #ifdef CONFIG_DECODERS
|
|
971 AVCodec sonic_decoder = {
|
|
972 "sonic",
|
|
973 CODEC_TYPE_AUDIO,
|
|
974 CODEC_ID_SONIC,
|
|
975 sizeof(SonicContext),
|
|
976 sonic_decode_init,
|
|
977 NULL,
|
|
978 sonic_decode_close,
|
|
979 sonic_decode_frame,
|
|
980 };
|
|
981 #endif
|