comparison mpegaudiodec.c @ 1733:b47d56b1a049 libavcodec

optimize compute_antialias() and add a floating point based alternative (2x faster)
author michael
date Thu, 08 Jan 2004 21:08:57 +0000
parents 0d2b59cf9f45
children 8aace334bcf0
comparison
equal deleted inserted replaced
1732:f716b8f47d98 1733:b47d56b1a049
63 63
64 /****************/ 64 /****************/
65 65
66 #define HEADER_SIZE 4 66 #define HEADER_SIZE 4
67 #define BACKSTEP_SIZE 512 67 #define BACKSTEP_SIZE 512
68
69 struct GranuleDef;
68 70
69 typedef struct MPADecodeContext { 71 typedef struct MPADecodeContext {
70 uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */ 72 uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */
71 int inbuf_index; 73 int inbuf_index;
72 uint8_t *inbuf_ptr, *inbuf; 74 uint8_t *inbuf_ptr, *inbuf;
91 int32_t sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT] __attribute__((aligned(16))); 93 int32_t sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT] __attribute__((aligned(16)));
92 int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */ 94 int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
93 #ifdef DEBUG 95 #ifdef DEBUG
94 int frame_count; 96 int frame_count;
95 #endif 97 #endif
98 void (*compute_antialias)(struct MPADecodeContext *s, struct GranuleDef *g);
96 } MPADecodeContext; 99 } MPADecodeContext;
97 100
98 /* layer 3 "granule" */ 101 /* layer 3 "granule" */
99 typedef struct GranuleDef { 102 typedef struct GranuleDef {
100 uint8_t scfsi; 103 uint8_t scfsi;
125 const uint16_t *codes; 128 const uint16_t *codes;
126 } HuffTable; 129 } HuffTable;
127 130
128 #include "mpegaudiodectab.h" 131 #include "mpegaudiodectab.h"
129 132
133 static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g);
134 static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g);
135
130 /* vlc structure for decoding layer 3 huffman tables */ 136 /* vlc structure for decoding layer 3 huffman tables */
131 static VLC huff_vlc[16]; 137 static VLC huff_vlc[16];
132 static uint8_t *huff_code_table[16]; 138 static uint8_t *huff_code_table[16];
133 static VLC huff_quad_vlc[2]; 139 static VLC huff_quad_vlc[2];
134 /* computed from band_size_long */ 140 /* computed from band_size_long */
142 static uint32_t *table_4_3_value; 148 static uint32_t *table_4_3_value;
143 #endif 149 #endif
144 /* intensity stereo coef table */ 150 /* intensity stereo coef table */
145 static int32_t is_table[2][16]; 151 static int32_t is_table[2][16];
146 static int32_t is_table_lsf[2][2][16]; 152 static int32_t is_table_lsf[2][2][16];
147 static int32_t csa_table[8][2]; 153 static int32_t csa_table[8][4];
154 static float csa_table_float[8][4];
148 static int32_t mdct_win[8][36]; 155 static int32_t mdct_win[8][36];
149 156
150 /* lower 2 bits: modulo 3, higher bits: shift */ 157 /* lower 2 bits: modulo 3, higher bits: shift */
151 static uint16_t scale_factor_modshift[64]; 158 static uint16_t scale_factor_modshift[64];
152 /* [i][j]: 2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */ 159 /* [i][j]: 2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */
453 dprintf("is_table_lsf %d %d: %x %x\n", 460 dprintf("is_table_lsf %d %d: %x %x\n",
454 i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]); 461 i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
455 } 462 }
456 } 463 }
457 464
465 if(avctx->antialias_algo == FF_AA_INT)
466 s->compute_antialias= compute_antialias_integer;
467 else
468 s->compute_antialias= compute_antialias_float;
458 for(i=0;i<8;i++) { 469 for(i=0;i<8;i++) {
459 float ci, cs, ca; 470 float ci, cs, ca;
460 ci = ci_table[i]; 471 ci = ci_table[i];
461 cs = 1.0 / sqrt(1.0 + ci * ci); 472 cs = 1.0 / sqrt(1.0 + ci * ci);
462 ca = cs * ci; 473 ca = cs * ci;
463 csa_table[i][0] = FIX(cs); 474 csa_table[i][0] = FIX(cs);
464 csa_table[i][1] = FIX(ca); 475 csa_table[i][1] = FIX(ca);
476 csa_table[i][2] = FIX(ca) + FIX(cs);
477 csa_table[i][3] = FIX(ca) - FIX(cs);
478 csa_table_float[i][0] = cs;
479 csa_table_float[i][1] = ca;
480 csa_table_float[i][2] = ca + cs;
481 csa_table_float[i][3] = ca - cs;
482 // printf("%d %d %d %d\n", FIX(cs), FIX(cs-1), FIX(ca), FIX(cs)-FIX(ca));
465 } 483 }
466 484
467 /* compute mdct windows */ 485 /* compute mdct windows */
468 for(i=0;i<36;i++) { 486 for(i=0;i<36;i++) {
469 int v; 487 int v;
1890 tab1[i] = tmp0 - tmp1; 1908 tab1[i] = tmp0 - tmp1;
1891 } 1909 }
1892 } 1910 }
1893 } 1911 }
1894 1912
1895 static void compute_antialias(MPADecodeContext *s, 1913 static void compute_antialias_integer(MPADecodeContext *s,
1896 GranuleDef *g) 1914 GranuleDef *g)
1897 { 1915 {
1898 int32_t *ptr, *p0, *p1, *csa; 1916 int32_t *ptr, *p0, *p1, *csa;
1899 int n, tmp0, tmp1, i, j; 1917 int n, i, j;
1900 1918
1901 /* we antialias only "long" bands */ 1919 /* we antialias only "long" bands */
1902 if (g->block_type == 2) { 1920 if (g->block_type == 2) {
1903 if (!g->switch_point) 1921 if (!g->switch_point)
1904 return; 1922 return;
1910 1928
1911 ptr = g->sb_hybrid + 18; 1929 ptr = g->sb_hybrid + 18;
1912 for(i = n;i > 0;i--) { 1930 for(i = n;i > 0;i--) {
1913 p0 = ptr - 1; 1931 p0 = ptr - 1;
1914 p1 = ptr; 1932 p1 = ptr;
1915 csa = &csa_table[0][0]; 1933 csa = &csa_table[0][0];
1916 for(j=0;j<8;j++) { 1934 for(j=0;j<4;j++) {
1935 int tmp0 = *p0;
1936 int tmp1 = *p1;
1937 #if 0
1938 *p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1]));
1939 *p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0]));
1940 #else
1941 int64_t tmp2= MUL64(tmp0 + tmp1, csa[0]);
1942 *p0 = FRAC_RND(tmp2 - MUL64(tmp1, csa[2]));
1943 *p1 = FRAC_RND(tmp2 + MUL64(tmp0, csa[3]));
1944 #endif
1945 p0--; p1++;
1946 csa += 4;
1917 tmp0 = *p0; 1947 tmp0 = *p0;
1918 tmp1 = *p1; 1948 tmp1 = *p1;
1949 #if 0
1919 *p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1])); 1950 *p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1]));
1920 *p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0])); 1951 *p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0]));
1921 p0--; 1952 #else
1922 p1++; 1953 tmp2= MUL64(tmp0 + tmp1, csa[0]);
1923 csa += 2; 1954 *p0 = FRAC_RND(tmp2 - MUL64(tmp1, csa[2]));
1924 } 1955 *p1 = FRAC_RND(tmp2 + MUL64(tmp0, csa[3]));
1925 ptr += 18; 1956 #endif
1957 p0--; p1++;
1958 csa += 4;
1959 }
1960 ptr += 18;
1961 }
1962 }
1963
1964 static void compute_antialias_float(MPADecodeContext *s,
1965 GranuleDef *g)
1966 {
1967 int32_t *ptr, *p0, *p1;
1968 int n, i, j;
1969
1970 /* we antialias only "long" bands */
1971 if (g->block_type == 2) {
1972 if (!g->switch_point)
1973 return;
1974 /* XXX: check this for 8000Hz case */
1975 n = 1;
1976 } else {
1977 n = SBLIMIT - 1;
1978 }
1979
1980 ptr = g->sb_hybrid + 18;
1981 for(i = n;i > 0;i--) {
1982 float *csa = &csa_table_float[0][0];
1983 p0 = ptr - 1;
1984 p1 = ptr;
1985 for(j=0;j<4;j++) {
1986 float tmp0 = *p0;
1987 float tmp1 = *p1;
1988 #if 1
1989 *p0 = lrintf(tmp0 * csa[0] - tmp1 * csa[1]);
1990 *p1 = lrintf(tmp0 * csa[1] + tmp1 * csa[0]);
1991 #else
1992 float tmp2= (tmp0 + tmp1) * csa[0];
1993 *p0 = lrintf(tmp2 - tmp1 * csa[2]);
1994 *p1 = lrintf(tmp2 + tmp0 * csa[3]);
1995 #endif
1996 p0--; p1++;
1997 csa += 4;
1998 tmp0 = *p0;
1999 tmp1 = *p1;
2000 #if 1
2001 *p0 = lrintf(tmp0 * csa[0] - tmp1 * csa[1]);
2002 *p1 = lrintf(tmp0 * csa[1] + tmp1 * csa[0]);
2003 #else
2004 tmp2= (tmp0 + tmp1) * csa[0];
2005 *p0 = lrintf(tmp2 - tmp1 * csa[2]);
2006 *p1 = lrintf(tmp2 + tmp0 * csa[3]);
2007 #endif
2008 p0--; p1++;
2009 csa += 4;
2010 }
2011 ptr += 18;
1926 } 2012 }
1927 } 2013 }
1928 2014
1929 static void compute_imdct(MPADecodeContext *s, 2015 static void compute_imdct(MPADecodeContext *s,
1930 GranuleDef *g, 2016 GranuleDef *g,
2350 2436
2351 reorder_block(s, g); 2437 reorder_block(s, g);
2352 #if defined(DEBUG) 2438 #if defined(DEBUG)
2353 sample_dump(0, g->sb_hybrid, 576); 2439 sample_dump(0, g->sb_hybrid, 576);
2354 #endif 2440 #endif
2355 compute_antialias(s, g); 2441 s->compute_antialias(s, g);
2356 #if defined(DEBUG) 2442 #if defined(DEBUG)
2357 sample_dump(1, g->sb_hybrid, 576); 2443 sample_dump(1, g->sb_hybrid, 576);
2358 #endif 2444 #endif
2359 compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]); 2445 compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
2360 #if defined(DEBUG) 2446 #if defined(DEBUG)