Mercurial > mplayer.hg
view libfaad2/sbr_qmf.c @ 13394:455a5056801f
New generic 'portable anymap' video output driver. It supports portable
pixmaps and graymaps in both raw and ASCII mode. Besides PPM and PGM, it
can also output PGMYUV files which are PGM files with the U and V plane
appended to the bottom of the Y image (bottom left and bottom right). All
files can be written to the current directory, to a specified output directory
or to multiple subdirectories if the filesystem can't handle the amount of
files in one directory anymore.
Note: This driver is not yet activated and will not be compiled and linked
to libvo. A separate patch will take care of that. This is just for adding
the file to the repository.
author | ivo |
---|---|
date | Mon, 20 Sep 2004 00:54:57 +0000 |
parents | d81145997036 |
children | 6d50ef45a058 |
line wrap: on
line source
/* ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 2 of the License, or ** (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ** ** Any non-GPL usage of this software or parts of this software is strictly ** forbidden. ** ** Commercial non-GPL licensing of this software is possible. ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. ** ** Initially modified for use with MPlayer by Arpad Gereöffy on 2003/08/30 ** $Id: sbr_qmf.c,v 1.3 2004/06/02 22:59:03 diego Exp $ ** detailed CVS changelog at http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/ **/ #include "common.h" #include "structs.h" #ifdef SBR_DEC #include <stdlib.h> #include <string.h> #include "sbr_dct.h" #include "sbr_qmf.h" #include "sbr_qmf_c.h" #include "sbr_syntax.h" qmfa_info *qmfa_init(uint8_t channels) { qmfa_info *qmfa = (qmfa_info*)faad_malloc(sizeof(qmfa_info)); qmfa->x = (real_t*)faad_malloc(channels * 10 * sizeof(real_t)); memset(qmfa->x, 0, channels * 10 * sizeof(real_t)); qmfa->channels = channels; return qmfa; } void qmfa_end(qmfa_info *qmfa) { if (qmfa) { if (qmfa->x) faad_free(qmfa->x); faad_free(qmfa); } } void sbr_qmf_analysis_32(sbr_info *sbr, qmfa_info *qmfa, const real_t *input, qmf_t X[MAX_NTSRHFG][32], uint8_t offset, uint8_t kx) { ALIGN real_t u[64]; #ifndef SBR_LOW_POWER ALIGN real_t x[64], y[64]; #else ALIGN real_t y[32]; #endif uint16_t in = 0; uint8_t l; /* qmf subsample l */ for (l = 0; l < sbr->numTimeSlotsRate; l++) { int16_t n; /* shift input buffer x */ memmove(qmfa->x + 32, qmfa->x, (320-32)*sizeof(real_t)); /* add new samples to input buffer x */ for (n = 32 - 1; n >= 0; n--) { #ifdef FIXED_POINT qmfa->x[n] = (input[in++]) >> 5; #else qmfa->x[n] = input[in++]; #endif } /* window and summation to create array u */ for (n = 0; n < 64; n++) { u[n] = MUL_F(qmfa->x[n], qmf_c[2*n]) + MUL_F(qmfa->x[n + 64], qmf_c[2*(n + 64)]) + MUL_F(qmfa->x[n + 128], qmf_c[2*(n + 128)]) + MUL_F(qmfa->x[n + 192], qmf_c[2*(n + 192)]) + MUL_F(qmfa->x[n + 256], qmf_c[2*(n + 256)]); } /* calculate 32 subband samples by introducing X */ #ifdef SBR_LOW_POWER y[0] = u[48]; for (n = 1; n < 16; n++) y[n] = u[n+48] + u[48-n]; for (n = 16; n < 32; n++) y[n] = -u[n-16] + u[48-n]; DCT3_32_unscaled(u, y); for (n = 0; n < 32; n++) { if (n < kx) { #ifdef FIXED_POINT QMF_RE(X[l + offset][n]) = u[n] << 1; #else QMF_RE(X[l + offset][n]) = 2. * u[n]; #endif } else { QMF_RE(X[l + offset][n]) = 0; } } #else x[0] = u[0]; for (n = 0; n < 31; n++) { x[2*n+1] = u[n+1] + u[63-n]; x[2*n+2] = u[n+1] - u[63-n]; } x[63] = u[32]; DCT4_64_kernel(y, x); for (n = 0; n < 32; n++) { if (n < kx) { #ifdef FIXED_POINT QMF_RE(X[l + offset][n]) = y[n] << 1; QMF_IM(X[l + offset][n]) = -y[63-n] << 1; #else QMF_RE(X[l + offset][n]) = 2. * y[n]; QMF_IM(X[l + offset][n]) = -2. * y[63-n]; #endif } else { QMF_RE(X[l + offset][n]) = 0; QMF_IM(X[l + offset][n]) = 0; } } #endif } } qmfs_info *qmfs_init(uint8_t channels) { qmfs_info *qmfs = (qmfs_info*)faad_malloc(sizeof(qmfs_info)); #ifndef SBR_LOW_POWER qmfs->v[0] = (real_t*)faad_malloc(channels * 10 * sizeof(real_t)); memset(qmfs->v[0], 0, channels * 10 * sizeof(real_t)); qmfs->v[1] = (real_t*)faad_malloc(channels * 10 * sizeof(real_t)); memset(qmfs->v[1], 0, channels * 10 * sizeof(real_t)); #else qmfs->v[0] = (real_t*)faad_malloc(channels * 20 * sizeof(real_t)); memset(qmfs->v[0], 0, channels * 20 * sizeof(real_t)); qmfs->v[1] = NULL; #endif qmfs->v_index = 0; qmfs->channels = channels; #ifdef USE_SSE if (cpu_has_sse()) { qmfs->qmf_func = sbr_qmf_synthesis_64_sse; } else { qmfs->qmf_func = sbr_qmf_synthesis_64; } #endif return qmfs; } void qmfs_end(qmfs_info *qmfs) { if (qmfs) { if (qmfs->v[0]) faad_free(qmfs->v[0]); #ifndef SBR_LOW_POWER if (qmfs->v[1]) faad_free(qmfs->v[1]); #endif faad_free(qmfs); } } #ifdef SBR_LOW_POWER void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { ALIGN real_t x[64]; ALIGN real_t y[64]; int16_t n, k, out = 0; uint8_t l; /* qmf subsample l */ for (l = 0; l < sbr->numTimeSlotsRate; l++) { //real_t *v0, *v1; /* shift buffers */ //memmove(qmfs->v[0] + 64, qmfs->v[0], (640-64)*sizeof(real_t)); //memmove(qmfs->v[1] + 64, qmfs->v[1], (640-64)*sizeof(real_t)); memmove(qmfs->v[0] + 128, qmfs->v[0], (1280-128)*sizeof(real_t)); //v0 = qmfs->v[qmfs->v_index]; //v1 = qmfs->v[(qmfs->v_index + 1) & 0x1]; //qmfs->v_index = (qmfs->v_index + 1) & 0x1; /* calculate 128 samples */ for (k = 0; k < 64; k++) { #ifdef FIXED_POINT x[k] = QMF_RE(X[l][k]); #else x[k] = QMF_RE(X[l][k]) / 32.; #endif } for (n = 0; n < 32; n++) { y[2*n] = -x[2*n]; y[2*n+1] = x[2*n+1]; } DCT2_64_unscaled(x, x); for (n = 0; n < 64; n++) { qmfs->v[0][n+32] = x[n]; } for (n = 0; n < 32; n++) { qmfs->v[0][31 - n] = x[n + 1]; } DST2_64_unscaled(x, y); qmfs->v[0][96] = 0; for (n = 1; n < 32; n++) { qmfs->v[0][n + 96] = x[n-1]; } /* calculate 64 output samples and window */ for (k = 0; k < 64; k++) { #if 1 output[out++] = MUL_F(qmfs->v[0][k], qmf_c[k]) + MUL_F(qmfs->v[0][192 + k], qmf_c[64 + k]) + MUL_F(qmfs->v[0][256 + k], qmf_c[128 + k]) + MUL_F(qmfs->v[0][256 + 192 + k], qmf_c[128 + 64 + k]) + MUL_F(qmfs->v[0][512 + k], qmf_c[256 + k]) + MUL_F(qmfs->v[0][512 + 192 + k], qmf_c[256 + 64 + k]) + MUL_F(qmfs->v[0][768 + k], qmf_c[384 + k]) + MUL_F(qmfs->v[0][768 + 192 + k], qmf_c[384 + 64 + k]) + MUL_F(qmfs->v[0][1024 + k], qmf_c[512 + k]) + MUL_F(qmfs->v[0][1024 + 192 + k], qmf_c[512 + 64 + k]); #else output[out++] = MUL_F(v0[k], qmf_c[k]) + MUL_F(v0[64 + k], qmf_c[64 + k]) + MUL_F(v0[128 + k], qmf_c[128 + k]) + MUL_F(v0[192 + k], qmf_c[192 + k]) + MUL_F(v0[256 + k], qmf_c[256 + k]) + MUL_F(v0[320 + k], qmf_c[320 + k]) + MUL_F(v0[384 + k], qmf_c[384 + k]) + MUL_F(v0[448 + k], qmf_c[448 + k]) + MUL_F(v0[512 + k], qmf_c[512 + k]) + MUL_F(v0[576 + k], qmf_c[576 + k]); #endif } } } void sbr_qmf_synthesis_64_sse(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { ALIGN real_t x[64]; ALIGN real_t y[64]; ALIGN real_t y2[64]; int16_t n, k, out = 0; uint8_t l; /* qmf subsample l */ for (l = 0; l < sbr->numTimeSlotsRate; l++) { //real_t *v0, *v1; /* shift buffers */ //memmove(qmfs->v[0] + 64, qmfs->v[0], (640-64)*sizeof(real_t)); //memmove(qmfs->v[1] + 64, qmfs->v[1], (640-64)*sizeof(real_t)); memmove(qmfs->v[0] + 128, qmfs->v[0], (1280-128)*sizeof(real_t)); //v0 = qmfs->v[qmfs->v_index]; //v1 = qmfs->v[(qmfs->v_index + 1) & 0x1]; //qmfs->v_index = (qmfs->v_index + 1) & 0x1; /* calculate 128 samples */ for (k = 0; k < 64; k++) { #ifdef FIXED_POINT x[k] = QMF_RE(X[l][k]); #else x[k] = QMF_RE(X[l][k]) / 32.; #endif } for (n = 0; n < 32; n++) { y[2*n] = -x[2*n]; y[2*n+1] = x[2*n+1]; } DCT2_64_unscaled(x, x); for (n = 0; n < 64; n++) { qmfs->v[0][n+32] = x[n]; } for (n = 0; n < 32; n++) { qmfs->v[0][31 - n] = x[n + 1]; } DST2_64_unscaled(x, y); qmfs->v[0][96] = 0; for (n = 1; n < 32; n++) { qmfs->v[0][n + 96] = x[n-1]; } /* calculate 64 output samples and window */ for (k = 0; k < 64; k++) { #if 1 output[out++] = MUL_F(qmfs->v[0][k], qmf_c[k]) + MUL_F(qmfs->v[0][192 + k], qmf_c[64 + k]) + MUL_F(qmfs->v[0][256 + k], qmf_c[128 + k]) + MUL_F(qmfs->v[0][256 + 192 + k], qmf_c[128 + 64 + k]) + MUL_F(qmfs->v[0][512 + k], qmf_c[256 + k]) + MUL_F(qmfs->v[0][512 + 192 + k], qmf_c[256 + 64 + k]) + MUL_F(qmfs->v[0][768 + k], qmf_c[384 + k]) + MUL_F(qmfs->v[0][768 + 192 + k], qmf_c[384 + 64 + k]) + MUL_F(qmfs->v[0][1024 + k], qmf_c[512 + k]) + MUL_F(qmfs->v[0][1024 + 192 + k], qmf_c[512 + 64 + k]); #else output[out++] = MUL_F(v0[k], qmf_c[k]) + MUL_F(v0[64 + k], qmf_c[64 + k]) + MUL_F(v0[128 + k], qmf_c[128 + k]) + MUL_F(v0[192 + k], qmf_c[192 + k]) + MUL_F(v0[256 + k], qmf_c[256 + k]) + MUL_F(v0[320 + k], qmf_c[320 + k]) + MUL_F(v0[384 + k], qmf_c[384 + k]) + MUL_F(v0[448 + k], qmf_c[448 + k]) + MUL_F(v0[512 + k], qmf_c[512 + k]) + MUL_F(v0[576 + k], qmf_c[576 + k]); #endif } } } #else void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { ALIGN real_t x1[64], x2[64]; real_t scale = 1.f/64.f; int16_t n, k, out = 0; uint8_t l; /* qmf subsample l */ for (l = 0; l < sbr->numTimeSlotsRate; l++) { real_t *v0, *v1; /* shift buffers */ memmove(qmfs->v[0] + 64, qmfs->v[0], (640-64)*sizeof(real_t)); memmove(qmfs->v[1] + 64, qmfs->v[1], (640-64)*sizeof(real_t)); v0 = qmfs->v[qmfs->v_index]; v1 = qmfs->v[(qmfs->v_index + 1) & 0x1]; qmfs->v_index = (qmfs->v_index + 1) & 0x1; /* calculate 128 samples */ x1[0] = scale*QMF_RE(X[l][0]); x2[63] = scale*QMF_IM(X[l][0]); for (k = 0; k < 31; k++) { x1[2*k+1] = scale*(QMF_RE(X[l][2*k+1]) - QMF_RE(X[l][2*k+2])); x1[2*k+2] = scale*(QMF_RE(X[l][2*k+1]) + QMF_RE(X[l][2*k+2])); x2[61 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) - QMF_IM(X[l][2*k+1])); x2[62 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) + QMF_IM(X[l][2*k+1])); } x1[63] = scale*QMF_RE(X[l][63]); x2[0] = scale*QMF_IM(X[l][63]); DCT4_64_kernel(x1, x1); DCT4_64_kernel(x2, x2); for (n = 0; n < 32; n++) { v0[ 2*n] = x2[2*n] - x1[2*n]; v1[63-2*n] = x2[2*n] + x1[2*n]; v0[ 2*n+1] = -x2[2*n+1] - x1[2*n+1]; v1[62-2*n] = -x2[2*n+1] + x1[2*n+1]; } /* calculate 64 output samples and window */ for (k = 0; k < 64; k++) { output[out++] = MUL_F(v0[k], qmf_c[k]) + MUL_F(v0[64 + k], qmf_c[64 + k]) + MUL_F(v0[128 + k], qmf_c[128 + k]) + MUL_F(v0[192 + k], qmf_c[192 + k]) + MUL_F(v0[256 + k], qmf_c[256 + k]) + MUL_F(v0[320 + k], qmf_c[320 + k]) + MUL_F(v0[384 + k], qmf_c[384 + k]) + MUL_F(v0[448 + k], qmf_c[448 + k]) + MUL_F(v0[512 + k], qmf_c[512 + k]) + MUL_F(v0[576 + k], qmf_c[576 + k]); } } } #ifdef USE_SSE void memmove_sse_576(real_t *out, const real_t *in) { __m128 m[144]; uint16_t i; for (i = 0; i < 144; i++) { m[i] = _mm_load_ps(&in[i*4]); } for (i = 0; i < 144; i++) { _mm_store_ps(&out[i*4], m[i]); } } void sbr_qmf_synthesis_64_sse(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { ALIGN real_t x1[64], x2[64]; real_t scale = 1.f/64.f; int16_t n, k, out = 0; uint8_t l; /* qmf subsample l */ for (l = 0; l < sbr->numTimeSlotsRate; l++) { real_t *v0, *v1; /* shift buffers */ memmove_sse_576(qmfs->v[0] + 64, qmfs->v[0]); memmove_sse_576(qmfs->v[1] + 64, qmfs->v[1]); v0 = qmfs->v[qmfs->v_index]; v1 = qmfs->v[(qmfs->v_index + 1) & 0x1]; qmfs->v_index = (qmfs->v_index + 1) & 0x1; /* calculate 128 samples */ x1[0] = scale*QMF_RE(X[l][0]); x2[63] = scale*QMF_IM(X[l][0]); for (k = 0; k < 31; k++) { x1[2*k+1] = scale*(QMF_RE(X[l][2*k+1]) - QMF_RE(X[l][2*k+2])); x1[2*k+2] = scale*(QMF_RE(X[l][2*k+1]) + QMF_RE(X[l][2*k+2])); x2[61 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) - QMF_IM(X[l][2*k+1])); x2[62 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) + QMF_IM(X[l][2*k+1])); } x1[63] = scale*QMF_RE(X[l][63]); x2[0] = scale*QMF_IM(X[l][63]); DCT4_64_kernel(x1, x1); DCT4_64_kernel(x2, x2); for (n = 0; n < 32; n++) { v0[ 2*n ] = x2[2*n] - x1[2*n]; v1[63- 2*n ] = x2[2*n] + x1[2*n]; v0[ 2*n+1 ] = -x2[2*n+1] - x1[2*n+1]; v1[63-(2*n+1)] = -x2[2*n+1] + x1[2*n+1]; } /* calculate 64 output samples and window */ for (k = 0; k < 64; k+=4) { __m128 m0, m1, m2, m3, m4, m5, m6, m7, m8, m9; __m128 c0, c1, c2, c3, c4, c5, c6, c7, c8, c9; __m128 s1, s2, s3, s4, s5, s6, s7, s8, s9; m0 = _mm_load_ps(&v0[k]); m1 = _mm_load_ps(&v0[k + 64]); m2 = _mm_load_ps(&v0[k + 128]); m3 = _mm_load_ps(&v0[k + 192]); m4 = _mm_load_ps(&v0[k + 256]); c0 = _mm_load_ps(&qmf_c[k]); c1 = _mm_load_ps(&qmf_c[k + 64]); c2 = _mm_load_ps(&qmf_c[k + 128]); c3 = _mm_load_ps(&qmf_c[k + 192]); c4 = _mm_load_ps(&qmf_c[k + 256]); m0 = _mm_mul_ps(m0, c0); m1 = _mm_mul_ps(m1, c1); m2 = _mm_mul_ps(m2, c2); m3 = _mm_mul_ps(m3, c3); m4 = _mm_mul_ps(m4, c4); s1 = _mm_add_ps(m0, m1); s2 = _mm_add_ps(m2, m3); s6 = _mm_add_ps(s1, s2); m5 = _mm_load_ps(&v0[k + 320]); m6 = _mm_load_ps(&v0[k + 384]); m7 = _mm_load_ps(&v0[k + 448]); m8 = _mm_load_ps(&v0[k + 512]); m9 = _mm_load_ps(&v0[k + 576]); c5 = _mm_load_ps(&qmf_c[k + 320]); c6 = _mm_load_ps(&qmf_c[k + 384]); c7 = _mm_load_ps(&qmf_c[k + 448]); c8 = _mm_load_ps(&qmf_c[k + 512]); c9 = _mm_load_ps(&qmf_c[k + 576]); m5 = _mm_mul_ps(m5, c5); m6 = _mm_mul_ps(m6, c6); m7 = _mm_mul_ps(m7, c7); m8 = _mm_mul_ps(m8, c8); m9 = _mm_mul_ps(m9, c9); s3 = _mm_add_ps(m4, m5); s4 = _mm_add_ps(m6, m7); s5 = _mm_add_ps(m8, m9); s7 = _mm_add_ps(s3, s4); s8 = _mm_add_ps(s5, s6); s9 = _mm_add_ps(s7, s8); _mm_store_ps(&output[out], s9); out += 4; } } } #endif #endif #endif