comparison ac3dec.c @ 5317:6d81881f257f libavcodec

AC-3 decoder, soc revision 57, Aug 19 12:44:38 2006 UTC by cloud9 Optimization as suggested by Benjamin.
author jbr
date Sat, 14 Jul 2007 16:02:08 +0000
parents a09b700fcfca
children ef4ef249ca72
comparison
equal deleted inserted replaced
5316:a09b700fcfca 5317:6d81881f257f
169 DECLARE_ALIGNED_16(float, transform_coeffs[MAX_CHANNELS][BLOCK_SIZE]); 169 DECLARE_ALIGNED_16(float, transform_coeffs[MAX_CHANNELS][BLOCK_SIZE]);
170 170
171 /* For IMDCT. */ 171 /* For IMDCT. */
172 MDCTContext imdct_512; //N/8 point IFFT context 172 MDCTContext imdct_512; //N/8 point IFFT context
173 MDCTContext imdct_256; //N/4 point IFFT context 173 MDCTContext imdct_256; //N/4 point IFFT context
174 DSPContext dsp; //for optimization
174 DECLARE_ALIGNED_16(float, output[MAX_CHANNELS][BLOCK_SIZE]); 175 DECLARE_ALIGNED_16(float, output[MAX_CHANNELS][BLOCK_SIZE]);
175 DECLARE_ALIGNED_16(float, delay[MAX_CHANNELS][BLOCK_SIZE]); 176 DECLARE_ALIGNED_16(float, delay[MAX_CHANNELS][BLOCK_SIZE]);
176 DECLARE_ALIGNED_16(float, tmp_imdct[BLOCK_SIZE]); 177 DECLARE_ALIGNED_16(float, tmp_imdct[BLOCK_SIZE]);
177 DECLARE_ALIGNED_16(float, tmp_output[BLOCK_SIZE * 2]); 178 DECLARE_ALIGNED_16(float, tmp_output[BLOCK_SIZE * 2]);
178 179
395 AC3DecodeContext *ctx = avctx->priv_data; 396 AC3DecodeContext *ctx = avctx->priv_data;
396 397
397 ac3_tables_init(); 398 ac3_tables_init();
398 ff_mdct_init(&ctx->imdct_256, 8, 1); 399 ff_mdct_init(&ctx->imdct_256, 8, 1);
399 ff_mdct_init(&ctx->imdct_512, 9, 1); 400 ff_mdct_init(&ctx->imdct_512, 9, 1);
401 dsputil_init(&ctx->dsp, avctx);
400 dither_seed(&ctx->dith_state, 0); 402 dither_seed(&ctx->dith_state, 0);
401 403
402 return 0; 404 return 0;
403 } 405 }
404 406
1564 1566
1565 static void do_imdct_256(AC3DecodeContext *ctx, int chindex) 1567 static void do_imdct_256(AC3DecodeContext *ctx, int chindex)
1566 { 1568 {
1567 int k; 1569 int k;
1568 float x1[128], x2[128]; 1570 float x1[128], x2[128];
1571 float *ptr;
1569 1572
1570 for (k = 0; k < N / 4; k++) { 1573 for (k = 0; k < N / 4; k++) {
1571 x1[k] = ctx->transform_coeffs[chindex][2 * k]; 1574 x1[k] = ctx->transform_coeffs[chindex][2 * k];
1572 x2[k] = ctx->transform_coeffs[chindex][2 * k + 1]; 1575 x2[k] = ctx->transform_coeffs[chindex][2 * k + 1];
1573 } 1576 }
1574 1577
1575 ff_imdct_calc(&ctx->imdct_256, ctx->tmp_output, x1, ctx->tmp_imdct); 1578 ff_imdct_calc(&ctx->imdct_256, ctx->tmp_output, x1, ctx->tmp_imdct);
1576 ff_imdct_calc(&ctx->imdct_256, ctx->tmp_output + 256, x2, ctx->tmp_imdct); 1579 ff_imdct_calc(&ctx->imdct_256, ctx->tmp_output + 256, x2, ctx->tmp_imdct);
1577 1580
1578 for (k = 0; k < N / 2; k++) { 1581 ptr = ctx->output[chindex];
1582 ctx->dsp.vector_fmul_add_add(ptr, ctx->tmp_output, window, ctx->delay[chindex], 0, BLOCK_SIZE, 1);
1583 ptr = ctx->delay[chindex];
1584 ctx->dsp.vector_fmul_reverse(ptr, ctx->tmp_output + 256, window, BLOCK_SIZE);
1585 /*for (k = 0; k < N / 2; k++) {
1579 ctx->output[chindex][k] = ctx->tmp_output[k] * window[k] + ctx->delay[chindex][k]; 1586 ctx->output[chindex][k] = ctx->tmp_output[k] * window[k] + ctx->delay[chindex][k];
1580 //dump_floats("samples", 10, ctx->output[chindex], 256); 1587 //dump_floats("samples", 10, ctx->output[chindex], 256);
1581 ctx->delay[chindex][k] = ctx->tmp_output[N / 2 + k] * window[255 - k]; 1588 ctx->delay[chindex][k] = ctx->tmp_output[N / 2 + k] * window[255 - k];
1582 } 1589 }*/
1583 } 1590 }
1584 1591
1585 static void do_imdct_512(AC3DecodeContext *ctx, int chindex) 1592 static void do_imdct_512(AC3DecodeContext *ctx, int chindex)
1586 { 1593 {
1587 int k; 1594 //int k;
1595 float *ptr;
1588 1596
1589 ff_imdct_calc(&ctx->imdct_512, ctx->tmp_output, 1597 ff_imdct_calc(&ctx->imdct_512, ctx->tmp_output,
1590 ctx->transform_coeffs[chindex], ctx->tmp_imdct); 1598 ctx->transform_coeffs[chindex], ctx->tmp_imdct);
1591 //ff_imdct_calc_ac3_512(&ctx->imdct_512, ctx->tmp_output, ctx->transform_coeffs[chindex], 1599 //ff_imdct_calc_ac3_512(&ctx->imdct_512, ctx->tmp_output, ctx->transform_coeffs[chindex],
1592 // ctx->tmp_imdct, window); 1600 // ctx->tmp_imdct, window);
1593 1601 ptr = ctx->output[chindex];
1594 for (k = 0; k < N / 2; k++) { 1602 ctx->dsp.vector_fmul_add_add(ptr, ctx->tmp_output, window, ctx->delay[chindex], 0, BLOCK_SIZE, 1);
1603 ptr = ctx->delay[chindex];
1604 ctx->dsp.vector_fmul_reverse(ptr, ctx->tmp_output + 256, window, BLOCK_SIZE);
1605
1606 /*for (k = 0; k < N / 2; k++) {
1595 ctx->output[chindex][k] = ctx->tmp_output[k] * window[k] + ctx->delay[chindex][k]; 1607 ctx->output[chindex][k] = ctx->tmp_output[k] * window[k] + ctx->delay[chindex][k];
1596 //dump_floats("samples", 10, ctx->output[chindex], 256); 1608 //dump_floats("samples", 10, ctx->output[chindex], 256);
1597 ctx->delay[chindex][k] = ctx->tmp_output[N / 2 + k] * window[255 - k]; 1609 ctx->delay[chindex][k] = ctx->tmp_output[N / 2 + k] * window[255 - k];
1598 } 1610 } */
1599 } 1611 }
1600 1612
1601 static inline void do_imdct(AC3DecodeContext *ctx) 1613 static inline void do_imdct(AC3DecodeContext *ctx)
1602 { 1614 {
1603 int i; 1615 int i;