Mercurial > libavcodec.hg
comparison ac3dec.c @ 5317:6d81881f257f libavcodec
AC-3 decoder, soc revision 57, Aug 19 12:44:38 2006 UTC by cloud9
Optimization as suggested by Benjamin.
author | jbr |
---|---|
date | Sat, 14 Jul 2007 16:02:08 +0000 |
parents | a09b700fcfca |
children | ef4ef249ca72 |
comparison
equal
deleted
inserted
replaced
5316:a09b700fcfca | 5317:6d81881f257f |
---|---|
169 DECLARE_ALIGNED_16(float, transform_coeffs[MAX_CHANNELS][BLOCK_SIZE]); | 169 DECLARE_ALIGNED_16(float, transform_coeffs[MAX_CHANNELS][BLOCK_SIZE]); |
170 | 170 |
171 /* For IMDCT. */ | 171 /* For IMDCT. */ |
172 MDCTContext imdct_512; //N/8 point IFFT context | 172 MDCTContext imdct_512; //N/8 point IFFT context |
173 MDCTContext imdct_256; //N/4 point IFFT context | 173 MDCTContext imdct_256; //N/4 point IFFT context |
174 DSPContext dsp; //for optimization | |
174 DECLARE_ALIGNED_16(float, output[MAX_CHANNELS][BLOCK_SIZE]); | 175 DECLARE_ALIGNED_16(float, output[MAX_CHANNELS][BLOCK_SIZE]); |
175 DECLARE_ALIGNED_16(float, delay[MAX_CHANNELS][BLOCK_SIZE]); | 176 DECLARE_ALIGNED_16(float, delay[MAX_CHANNELS][BLOCK_SIZE]); |
176 DECLARE_ALIGNED_16(float, tmp_imdct[BLOCK_SIZE]); | 177 DECLARE_ALIGNED_16(float, tmp_imdct[BLOCK_SIZE]); |
177 DECLARE_ALIGNED_16(float, tmp_output[BLOCK_SIZE * 2]); | 178 DECLARE_ALIGNED_16(float, tmp_output[BLOCK_SIZE * 2]); |
178 | 179 |
395 AC3DecodeContext *ctx = avctx->priv_data; | 396 AC3DecodeContext *ctx = avctx->priv_data; |
396 | 397 |
397 ac3_tables_init(); | 398 ac3_tables_init(); |
398 ff_mdct_init(&ctx->imdct_256, 8, 1); | 399 ff_mdct_init(&ctx->imdct_256, 8, 1); |
399 ff_mdct_init(&ctx->imdct_512, 9, 1); | 400 ff_mdct_init(&ctx->imdct_512, 9, 1); |
401 dsputil_init(&ctx->dsp, avctx); | |
400 dither_seed(&ctx->dith_state, 0); | 402 dither_seed(&ctx->dith_state, 0); |
401 | 403 |
402 return 0; | 404 return 0; |
403 } | 405 } |
404 | 406 |
1564 | 1566 |
1565 static void do_imdct_256(AC3DecodeContext *ctx, int chindex) | 1567 static void do_imdct_256(AC3DecodeContext *ctx, int chindex) |
1566 { | 1568 { |
1567 int k; | 1569 int k; |
1568 float x1[128], x2[128]; | 1570 float x1[128], x2[128]; |
1571 float *ptr; | |
1569 | 1572 |
1570 for (k = 0; k < N / 4; k++) { | 1573 for (k = 0; k < N / 4; k++) { |
1571 x1[k] = ctx->transform_coeffs[chindex][2 * k]; | 1574 x1[k] = ctx->transform_coeffs[chindex][2 * k]; |
1572 x2[k] = ctx->transform_coeffs[chindex][2 * k + 1]; | 1575 x2[k] = ctx->transform_coeffs[chindex][2 * k + 1]; |
1573 } | 1576 } |
1574 | 1577 |
1575 ff_imdct_calc(&ctx->imdct_256, ctx->tmp_output, x1, ctx->tmp_imdct); | 1578 ff_imdct_calc(&ctx->imdct_256, ctx->tmp_output, x1, ctx->tmp_imdct); |
1576 ff_imdct_calc(&ctx->imdct_256, ctx->tmp_output + 256, x2, ctx->tmp_imdct); | 1579 ff_imdct_calc(&ctx->imdct_256, ctx->tmp_output + 256, x2, ctx->tmp_imdct); |
1577 | 1580 |
1578 for (k = 0; k < N / 2; k++) { | 1581 ptr = ctx->output[chindex]; |
1582 ctx->dsp.vector_fmul_add_add(ptr, ctx->tmp_output, window, ctx->delay[chindex], 0, BLOCK_SIZE, 1); | |
1583 ptr = ctx->delay[chindex]; | |
1584 ctx->dsp.vector_fmul_reverse(ptr, ctx->tmp_output + 256, window, BLOCK_SIZE); | |
1585 /*for (k = 0; k < N / 2; k++) { | |
1579 ctx->output[chindex][k] = ctx->tmp_output[k] * window[k] + ctx->delay[chindex][k]; | 1586 ctx->output[chindex][k] = ctx->tmp_output[k] * window[k] + ctx->delay[chindex][k]; |
1580 //dump_floats("samples", 10, ctx->output[chindex], 256); | 1587 //dump_floats("samples", 10, ctx->output[chindex], 256); |
1581 ctx->delay[chindex][k] = ctx->tmp_output[N / 2 + k] * window[255 - k]; | 1588 ctx->delay[chindex][k] = ctx->tmp_output[N / 2 + k] * window[255 - k]; |
1582 } | 1589 }*/ |
1583 } | 1590 } |
1584 | 1591 |
1585 static void do_imdct_512(AC3DecodeContext *ctx, int chindex) | 1592 static void do_imdct_512(AC3DecodeContext *ctx, int chindex) |
1586 { | 1593 { |
1587 int k; | 1594 //int k; |
1595 float *ptr; | |
1588 | 1596 |
1589 ff_imdct_calc(&ctx->imdct_512, ctx->tmp_output, | 1597 ff_imdct_calc(&ctx->imdct_512, ctx->tmp_output, |
1590 ctx->transform_coeffs[chindex], ctx->tmp_imdct); | 1598 ctx->transform_coeffs[chindex], ctx->tmp_imdct); |
1591 //ff_imdct_calc_ac3_512(&ctx->imdct_512, ctx->tmp_output, ctx->transform_coeffs[chindex], | 1599 //ff_imdct_calc_ac3_512(&ctx->imdct_512, ctx->tmp_output, ctx->transform_coeffs[chindex], |
1592 // ctx->tmp_imdct, window); | 1600 // ctx->tmp_imdct, window); |
1593 | 1601 ptr = ctx->output[chindex]; |
1594 for (k = 0; k < N / 2; k++) { | 1602 ctx->dsp.vector_fmul_add_add(ptr, ctx->tmp_output, window, ctx->delay[chindex], 0, BLOCK_SIZE, 1); |
1603 ptr = ctx->delay[chindex]; | |
1604 ctx->dsp.vector_fmul_reverse(ptr, ctx->tmp_output + 256, window, BLOCK_SIZE); | |
1605 | |
1606 /*for (k = 0; k < N / 2; k++) { | |
1595 ctx->output[chindex][k] = ctx->tmp_output[k] * window[k] + ctx->delay[chindex][k]; | 1607 ctx->output[chindex][k] = ctx->tmp_output[k] * window[k] + ctx->delay[chindex][k]; |
1596 //dump_floats("samples", 10, ctx->output[chindex], 256); | 1608 //dump_floats("samples", 10, ctx->output[chindex], 256); |
1597 ctx->delay[chindex][k] = ctx->tmp_output[N / 2 + k] * window[255 - k]; | 1609 ctx->delay[chindex][k] = ctx->tmp_output[N / 2 + k] * window[255 - k]; |
1598 } | 1610 } */ |
1599 } | 1611 } |
1600 | 1612 |
1601 static inline void do_imdct(AC3DecodeContext *ctx) | 1613 static inline void do_imdct(AC3DecodeContext *ctx) |
1602 { | 1614 { |
1603 int i; | 1615 int i; |