comparison adpcm.c @ 3336:4d807145f29a libavcodec

ADPCM: trellis quantization
author lorenm
date Sat, 03 Jun 2006 19:04:56 +0000
parents 97d9937d4ce7
children 2d042ed9dd2c
comparison
equal deleted inserted replaced
3335:97af1b315f59 3336:4d807145f29a
255 c->step = clip(c->step, 127, 24567); 255 c->step = clip(c->step, 127, 24567);
256 256
257 return nibble; 257 return nibble;
258 } 258 }
259 259
260 typedef struct TrellisPath {
261 int nibble;
262 int prev;
263 } TrellisPath;
264
265 typedef struct TrellisNode {
266 uint32_t ssd;
267 int path;
268 int sample1;
269 int sample2;
270 int step;
271 } TrellisNode;
272
273 static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
274 uint8_t *dst, ADPCMChannelStatus *c, int n)
275 {
276 #define FREEZE_INTERVAL 128
277 //FIXME 6% faster if frontier is a compile-time constant
278 const int frontier = 1 << avctx->trellis;
279 const int stride = avctx->channels;
280 const int version = avctx->codec->id;
281 const int max_paths = frontier*FREEZE_INTERVAL;
282 TrellisPath paths[max_paths], *p;
283 TrellisNode node_buf[2][frontier];
284 TrellisNode *nodep_buf[2][frontier];
285 TrellisNode **nodes = nodep_buf[0]; // nodes[] is always sorted by .ssd
286 TrellisNode **nodes_next = nodep_buf[1];
287 int pathn = 0, froze = -1, i, j, k;
288
289 assert(!(max_paths&(max_paths-1)));
290
291 memset(nodep_buf, 0, sizeof(nodep_buf));
292 nodes[0] = &node_buf[1][0];
293 nodes[0]->ssd = 0;
294 nodes[0]->path = 0;
295 nodes[0]->step = c->step_index;
296 nodes[0]->sample1 = c->sample1;
297 nodes[0]->sample2 = c->sample2;
298 if(version == CODEC_ID_ADPCM_IMA_WAV)
299 nodes[0]->sample1 = c->prev_sample;
300 if(version == CODEC_ID_ADPCM_MS)
301 nodes[0]->step = c->idelta;
302 if(version == CODEC_ID_ADPCM_YAMAHA) {
303 if(c->step == 0) {
304 nodes[0]->step = 127;
305 nodes[0]->sample1 = 0;
306 } else {
307 nodes[0]->step = c->step;
308 nodes[0]->sample1 = c->predictor;
309 }
310 }
311
312 for(i=0; i<n; i++) {
313 TrellisNode *t = node_buf[i&1];
314 TrellisNode **u;
315 int sample = samples[i*stride];
316 memset(nodes_next, 0, frontier*sizeof(TrellisNode*));
317 for(j=0; j<frontier && nodes[j]; j++) {
318 // higher j have higher ssd already, so they're unlikely to use a suboptimal next sample too
319 const int range = (j < frontier/2) ? 1 : 0;
320 const int step = nodes[j]->step;
321 int nidx;
322 if(version == CODEC_ID_ADPCM_MS) {
323 const int predictor = ((nodes[j]->sample1 * c->coeff1) + (nodes[j]->sample2 * c->coeff2)) / 256;
324 const int div = (sample - predictor) / step;
325 const int nmin = clip(div-range, -8, 6);
326 const int nmax = clip(div+range, -7, 7);
327 for(nidx=nmin; nidx<=nmax; nidx++) {
328 const int nibble = nidx & 0xf;
329 int dec_sample = predictor + nidx * step;
330 #define STORE_NODE(NAME, STEP_INDEX)\
331 int d;\
332 uint32_t ssd;\
333 CLAMP_TO_SHORT(dec_sample);\
334 d = sample - dec_sample;\
335 ssd = nodes[j]->ssd + d*d;\
336 if(nodes_next[frontier-1] && ssd >= nodes_next[frontier-1]->ssd)\
337 continue;\
338 /* Collapse any two states with the same previous sample value. \
339 * One could also distinguish states by step and by 2nd to last
340 * sample, but the effects of that are negligible. */\
341 for(k=0; k<frontier && nodes_next[k]; k++) {\
342 if(dec_sample == nodes_next[k]->sample1) {\
343 assert(ssd >= nodes_next[k]->ssd);\
344 goto next_##NAME;\
345 }\
346 }\
347 for(k=0; k<frontier; k++) {\
348 if(!nodes_next[k] || ssd < nodes_next[k]->ssd) {\
349 TrellisNode *u = nodes_next[frontier-1];\
350 if(!u) {\
351 assert(pathn < max_paths);\
352 u = t++;\
353 u->path = pathn++;\
354 }\
355 u->ssd = ssd;\
356 u->step = STEP_INDEX;\
357 u->sample2 = nodes[j]->sample1;\
358 u->sample1 = dec_sample;\
359 paths[u->path].nibble = nibble;\
360 paths[u->path].prev = nodes[j]->path;\
361 memmove(&nodes_next[k+1], &nodes_next[k], (frontier-k-1)*sizeof(TrellisNode*));\
362 nodes_next[k] = u;\
363 break;\
364 }\
365 }\
366 next_##NAME:;
367 STORE_NODE(ms, FFMAX(16, (AdaptationTable[nibble] * step) >> 8));
368 }
369 } else if(version == CODEC_ID_ADPCM_IMA_WAV) {
370 #define LOOP_NODES(NAME, STEP_TABLE, STEP_INDEX)\
371 const int predictor = nodes[j]->sample1;\
372 const int div = (sample - predictor) * 4 / STEP_TABLE;\
373 int nmin = clip(div-range, -7, 6);\
374 int nmax = clip(div+range, -6, 7);\
375 if(nmin<=0) nmin--; /* distinguish -0 from +0 */\
376 if(nmax<0) nmax--;\
377 for(nidx=nmin; nidx<=nmax; nidx++) {\
378 const int nibble = nidx<0 ? 7-nidx : nidx;\
379 int dec_sample = predictor + (STEP_TABLE * yamaha_difflookup[nibble]) / 8;\
380 STORE_NODE(NAME, STEP_INDEX);\
381 }
382 LOOP_NODES(ima, step_table[step], clip(step + index_table[nibble], 0, 88));
383 } else { //CODEC_ID_ADPCM_YAMAHA
384 LOOP_NODES(yamaha, step, clip((step * yamaha_indexscale[nibble]) >> 8, 127, 24567));
385 #undef LOOP_NODES
386 #undef STORE_NODE
387 }
388 }
389
390 u = nodes;
391 nodes = nodes_next;
392 nodes_next = u;
393
394 // prevent overflow
395 if(nodes[0]->ssd > (1<<28)) {
396 for(j=1; j<frontier && nodes[j]; j++)
397 nodes[j]->ssd -= nodes[0]->ssd;
398 nodes[0]->ssd = 0;
399 }
400
401 // merge old paths to save memory
402 if(i == froze + FREEZE_INTERVAL) {
403 p = &paths[nodes[0]->path];
404 for(k=i; k>froze; k--) {
405 dst[k] = p->nibble;
406 p = &paths[p->prev];
407 }
408 froze = i;
409 pathn = 0;
410 // other nodes might use paths that don't coincide with the frozen one.
411 // checking which nodes do so is too slow, so just kill them all.
412 // this also slightly improves quality, but I don't know why.
413 memset(nodes+1, 0, (frontier-1)*sizeof(TrellisNode*));
414 }
415 }
416
417 p = &paths[nodes[0]->path];
418 for(i=n-1; i>froze; i--) {
419 dst[i] = p->nibble;
420 p = &paths[p->prev];
421 }
422
423 c->predictor = nodes[0]->sample1;
424 c->sample1 = nodes[0]->sample1;
425 c->sample2 = nodes[0]->sample2;
426 c->step_index = nodes[0]->step;
427 c->step = nodes[0]->step;
428 c->idelta = nodes[0]->step;
429 }
430
260 static int adpcm_encode_frame(AVCodecContext *avctx, 431 static int adpcm_encode_frame(AVCodecContext *avctx,
261 unsigned char *frame, int buf_size, void *data) 432 unsigned char *frame, int buf_size, void *data)
262 { 433 {
263 int n, i, st; 434 int n, i, st;
264 short *samples; 435 short *samples;
291 *dst++ = 0; 462 *dst++ = 0;
292 samples++; 463 samples++;
293 } 464 }
294 465
295 /* stereo: 4 bytes (8 samples) for left, 4 bytes for right, 4 bytes left, ... */ 466 /* stereo: 4 bytes (8 samples) for left, 4 bytes for right, 4 bytes left, ... */
467 if(avctx->trellis > 0) {
468 uint8_t buf[2][n*8];
469 adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n*8);
470 if(avctx->channels == 2)
471 adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n*8);
472 for(i=0; i<n; i++) {
473 *dst++ = buf[0][8*i+0] | (buf[0][8*i+1] << 4);
474 *dst++ = buf[0][8*i+2] | (buf[0][8*i+3] << 4);
475 *dst++ = buf[0][8*i+4] | (buf[0][8*i+5] << 4);
476 *dst++ = buf[0][8*i+6] | (buf[0][8*i+7] << 4);
477 if (avctx->channels == 2) {
478 *dst++ = buf[1][8*i+0] | (buf[1][8*i+1] << 4);
479 *dst++ = buf[1][8*i+2] | (buf[1][8*i+3] << 4);
480 *dst++ = buf[1][8*i+4] | (buf[1][8*i+5] << 4);
481 *dst++ = buf[1][8*i+6] | (buf[1][8*i+7] << 4);
482 }
483 }
484 } else
296 for (; n>0; n--) { 485 for (; n>0; n--) {
297 *dst = adpcm_ima_compress_sample(&c->status[0], samples[0]) & 0x0F; 486 *dst = adpcm_ima_compress_sample(&c->status[0], samples[0]) & 0x0F;
298 *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels]) << 4) & 0xF0; 487 *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels]) << 4) & 0xF0;
299 dst++; 488 dst++;
300 *dst = adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 2]) & 0x0F; 489 *dst = adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 2]) & 0x0F;
350 539
351 *dst++ = c->status[i].sample2 & 0xFF; 540 *dst++ = c->status[i].sample2 & 0xFF;
352 *dst++ = c->status[i].sample2 >> 8; 541 *dst++ = c->status[i].sample2 >> 8;
353 } 542 }
354 543
544 if(avctx->trellis > 0) {
545 int n = avctx->block_align - 7*avctx->channels;
546 uint8_t buf[2][n];
547 if(avctx->channels == 1) {
548 n *= 2;
549 adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
550 for(i=0; i<n; i+=2)
551 *dst++ = (buf[0][i] << 4) | buf[0][i+1];
552 } else {
553 adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
554 adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n);
555 for(i=0; i<n; i++)
556 *dst++ = (buf[0][i] << 4) | buf[1][i];
557 }
558 } else
355 for(i=7*avctx->channels; i<avctx->block_align; i++) { 559 for(i=7*avctx->channels; i<avctx->block_align; i++) {
356 int nibble; 560 int nibble;
357 nibble = adpcm_ms_compress_sample(&c->status[ 0], *samples++)<<4; 561 nibble = adpcm_ms_compress_sample(&c->status[ 0], *samples++)<<4;
358 nibble|= adpcm_ms_compress_sample(&c->status[st], *samples++); 562 nibble|= adpcm_ms_compress_sample(&c->status[st], *samples++);
359 *dst++ = nibble; 563 *dst++ = nibble;
360 } 564 }
361 break; 565 break;
362 case CODEC_ID_ADPCM_YAMAHA: 566 case CODEC_ID_ADPCM_YAMAHA:
363 n = avctx->frame_size / 2; 567 n = avctx->frame_size / 2;
568 if(avctx->trellis > 0) {
569 uint8_t buf[2][n*2];
570 n *= 2;
571 if(avctx->channels == 1) {
572 adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
573 for(i=0; i<n; i+=2)
574 *dst++ = buf[0][i] | (buf[0][i+1] << 4);
575 } else {
576 adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
577 adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n);
578 for(i=0; i<n; i++)
579 *dst++ = buf[0][i] | (buf[1][i] << 4);
580 }
581 } else
364 for (; n>0; n--) { 582 for (; n>0; n--) {
365 for(i = 0; i < avctx->channels; i++) { 583 for(i = 0; i < avctx->channels; i++) {
366 int nibble; 584 int nibble;
367 nibble = adpcm_yamaha_compress_sample(&c->status[i], samples[i]); 585 nibble = adpcm_yamaha_compress_sample(&c->status[i], samples[i]);
368 nibble |= adpcm_yamaha_compress_sample(&c->status[i], samples[i+avctx->channels]) << 4; 586 nibble |= adpcm_yamaha_compress_sample(&c->status[i], samples[i+avctx->channels]) << 4;