Mercurial > libavcodec.hg
comparison mpegaudiodec.c @ 1733:b47d56b1a049 libavcodec
optimize compute_antialias() and add a floating point based alternative (2x faster)
author | michael |
---|---|
date | Thu, 08 Jan 2004 21:08:57 +0000 |
parents | 0d2b59cf9f45 |
children | 8aace334bcf0 |
comparison
equal
deleted
inserted
replaced
1732:f716b8f47d98 | 1733:b47d56b1a049 |
---|---|
63 | 63 |
64 /****************/ | 64 /****************/ |
65 | 65 |
66 #define HEADER_SIZE 4 | 66 #define HEADER_SIZE 4 |
67 #define BACKSTEP_SIZE 512 | 67 #define BACKSTEP_SIZE 512 |
68 | |
69 struct GranuleDef; | |
68 | 70 |
69 typedef struct MPADecodeContext { | 71 typedef struct MPADecodeContext { |
70 uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */ | 72 uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */ |
71 int inbuf_index; | 73 int inbuf_index; |
72 uint8_t *inbuf_ptr, *inbuf; | 74 uint8_t *inbuf_ptr, *inbuf; |
91 int32_t sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT] __attribute__((aligned(16))); | 93 int32_t sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT] __attribute__((aligned(16))); |
92 int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */ | 94 int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */ |
93 #ifdef DEBUG | 95 #ifdef DEBUG |
94 int frame_count; | 96 int frame_count; |
95 #endif | 97 #endif |
98 void (*compute_antialias)(struct MPADecodeContext *s, struct GranuleDef *g); | |
96 } MPADecodeContext; | 99 } MPADecodeContext; |
97 | 100 |
98 /* layer 3 "granule" */ | 101 /* layer 3 "granule" */ |
99 typedef struct GranuleDef { | 102 typedef struct GranuleDef { |
100 uint8_t scfsi; | 103 uint8_t scfsi; |
125 const uint16_t *codes; | 128 const uint16_t *codes; |
126 } HuffTable; | 129 } HuffTable; |
127 | 130 |
128 #include "mpegaudiodectab.h" | 131 #include "mpegaudiodectab.h" |
129 | 132 |
133 static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g); | |
134 static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g); | |
135 | |
130 /* vlc structure for decoding layer 3 huffman tables */ | 136 /* vlc structure for decoding layer 3 huffman tables */ |
131 static VLC huff_vlc[16]; | 137 static VLC huff_vlc[16]; |
132 static uint8_t *huff_code_table[16]; | 138 static uint8_t *huff_code_table[16]; |
133 static VLC huff_quad_vlc[2]; | 139 static VLC huff_quad_vlc[2]; |
134 /* computed from band_size_long */ | 140 /* computed from band_size_long */ |
142 static uint32_t *table_4_3_value; | 148 static uint32_t *table_4_3_value; |
143 #endif | 149 #endif |
144 /* intensity stereo coef table */ | 150 /* intensity stereo coef table */ |
145 static int32_t is_table[2][16]; | 151 static int32_t is_table[2][16]; |
146 static int32_t is_table_lsf[2][2][16]; | 152 static int32_t is_table_lsf[2][2][16]; |
147 static int32_t csa_table[8][2]; | 153 static int32_t csa_table[8][4]; |
154 static float csa_table_float[8][4]; | |
148 static int32_t mdct_win[8][36]; | 155 static int32_t mdct_win[8][36]; |
149 | 156 |
150 /* lower 2 bits: modulo 3, higher bits: shift */ | 157 /* lower 2 bits: modulo 3, higher bits: shift */ |
151 static uint16_t scale_factor_modshift[64]; | 158 static uint16_t scale_factor_modshift[64]; |
152 /* [i][j]: 2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */ | 159 /* [i][j]: 2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */ |
453 dprintf("is_table_lsf %d %d: %x %x\n", | 460 dprintf("is_table_lsf %d %d: %x %x\n", |
454 i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]); | 461 i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]); |
455 } | 462 } |
456 } | 463 } |
457 | 464 |
465 if(avctx->antialias_algo == FF_AA_INT) | |
466 s->compute_antialias= compute_antialias_integer; | |
467 else | |
468 s->compute_antialias= compute_antialias_float; | |
458 for(i=0;i<8;i++) { | 469 for(i=0;i<8;i++) { |
459 float ci, cs, ca; | 470 float ci, cs, ca; |
460 ci = ci_table[i]; | 471 ci = ci_table[i]; |
461 cs = 1.0 / sqrt(1.0 + ci * ci); | 472 cs = 1.0 / sqrt(1.0 + ci * ci); |
462 ca = cs * ci; | 473 ca = cs * ci; |
463 csa_table[i][0] = FIX(cs); | 474 csa_table[i][0] = FIX(cs); |
464 csa_table[i][1] = FIX(ca); | 475 csa_table[i][1] = FIX(ca); |
476 csa_table[i][2] = FIX(ca) + FIX(cs); | |
477 csa_table[i][3] = FIX(ca) - FIX(cs); | |
478 csa_table_float[i][0] = cs; | |
479 csa_table_float[i][1] = ca; | |
480 csa_table_float[i][2] = ca + cs; | |
481 csa_table_float[i][3] = ca - cs; | |
482 // printf("%d %d %d %d\n", FIX(cs), FIX(cs-1), FIX(ca), FIX(cs)-FIX(ca)); | |
465 } | 483 } |
466 | 484 |
467 /* compute mdct windows */ | 485 /* compute mdct windows */ |
468 for(i=0;i<36;i++) { | 486 for(i=0;i<36;i++) { |
469 int v; | 487 int v; |
1890 tab1[i] = tmp0 - tmp1; | 1908 tab1[i] = tmp0 - tmp1; |
1891 } | 1909 } |
1892 } | 1910 } |
1893 } | 1911 } |
1894 | 1912 |
1895 static void compute_antialias(MPADecodeContext *s, | 1913 static void compute_antialias_integer(MPADecodeContext *s, |
1896 GranuleDef *g) | 1914 GranuleDef *g) |
1897 { | 1915 { |
1898 int32_t *ptr, *p0, *p1, *csa; | 1916 int32_t *ptr, *p0, *p1, *csa; |
1899 int n, tmp0, tmp1, i, j; | 1917 int n, i, j; |
1900 | 1918 |
1901 /* we antialias only "long" bands */ | 1919 /* we antialias only "long" bands */ |
1902 if (g->block_type == 2) { | 1920 if (g->block_type == 2) { |
1903 if (!g->switch_point) | 1921 if (!g->switch_point) |
1904 return; | 1922 return; |
1910 | 1928 |
1911 ptr = g->sb_hybrid + 18; | 1929 ptr = g->sb_hybrid + 18; |
1912 for(i = n;i > 0;i--) { | 1930 for(i = n;i > 0;i--) { |
1913 p0 = ptr - 1; | 1931 p0 = ptr - 1; |
1914 p1 = ptr; | 1932 p1 = ptr; |
1915 csa = &csa_table[0][0]; | 1933 csa = &csa_table[0][0]; |
1916 for(j=0;j<8;j++) { | 1934 for(j=0;j<4;j++) { |
1935 int tmp0 = *p0; | |
1936 int tmp1 = *p1; | |
1937 #if 0 | |
1938 *p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1])); | |
1939 *p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0])); | |
1940 #else | |
1941 int64_t tmp2= MUL64(tmp0 + tmp1, csa[0]); | |
1942 *p0 = FRAC_RND(tmp2 - MUL64(tmp1, csa[2])); | |
1943 *p1 = FRAC_RND(tmp2 + MUL64(tmp0, csa[3])); | |
1944 #endif | |
1945 p0--; p1++; | |
1946 csa += 4; | |
1917 tmp0 = *p0; | 1947 tmp0 = *p0; |
1918 tmp1 = *p1; | 1948 tmp1 = *p1; |
1949 #if 0 | |
1919 *p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1])); | 1950 *p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1])); |
1920 *p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0])); | 1951 *p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0])); |
1921 p0--; | 1952 #else |
1922 p1++; | 1953 tmp2= MUL64(tmp0 + tmp1, csa[0]); |
1923 csa += 2; | 1954 *p0 = FRAC_RND(tmp2 - MUL64(tmp1, csa[2])); |
1924 } | 1955 *p1 = FRAC_RND(tmp2 + MUL64(tmp0, csa[3])); |
1925 ptr += 18; | 1956 #endif |
1957 p0--; p1++; | |
1958 csa += 4; | |
1959 } | |
1960 ptr += 18; | |
1961 } | |
1962 } | |
1963 | |
1964 static void compute_antialias_float(MPADecodeContext *s, | |
1965 GranuleDef *g) | |
1966 { | |
1967 int32_t *ptr, *p0, *p1; | |
1968 int n, i, j; | |
1969 | |
1970 /* we antialias only "long" bands */ | |
1971 if (g->block_type == 2) { | |
1972 if (!g->switch_point) | |
1973 return; | |
1974 /* XXX: check this for 8000Hz case */ | |
1975 n = 1; | |
1976 } else { | |
1977 n = SBLIMIT - 1; | |
1978 } | |
1979 | |
1980 ptr = g->sb_hybrid + 18; | |
1981 for(i = n;i > 0;i--) { | |
1982 float *csa = &csa_table_float[0][0]; | |
1983 p0 = ptr - 1; | |
1984 p1 = ptr; | |
1985 for(j=0;j<4;j++) { | |
1986 float tmp0 = *p0; | |
1987 float tmp1 = *p1; | |
1988 #if 1 | |
1989 *p0 = lrintf(tmp0 * csa[0] - tmp1 * csa[1]); | |
1990 *p1 = lrintf(tmp0 * csa[1] + tmp1 * csa[0]); | |
1991 #else | |
1992 float tmp2= (tmp0 + tmp1) * csa[0]; | |
1993 *p0 = lrintf(tmp2 - tmp1 * csa[2]); | |
1994 *p1 = lrintf(tmp2 + tmp0 * csa[3]); | |
1995 #endif | |
1996 p0--; p1++; | |
1997 csa += 4; | |
1998 tmp0 = *p0; | |
1999 tmp1 = *p1; | |
2000 #if 1 | |
2001 *p0 = lrintf(tmp0 * csa[0] - tmp1 * csa[1]); | |
2002 *p1 = lrintf(tmp0 * csa[1] + tmp1 * csa[0]); | |
2003 #else | |
2004 tmp2= (tmp0 + tmp1) * csa[0]; | |
2005 *p0 = lrintf(tmp2 - tmp1 * csa[2]); | |
2006 *p1 = lrintf(tmp2 + tmp0 * csa[3]); | |
2007 #endif | |
2008 p0--; p1++; | |
2009 csa += 4; | |
2010 } | |
2011 ptr += 18; | |
1926 } | 2012 } |
1927 } | 2013 } |
1928 | 2014 |
1929 static void compute_imdct(MPADecodeContext *s, | 2015 static void compute_imdct(MPADecodeContext *s, |
1930 GranuleDef *g, | 2016 GranuleDef *g, |
2350 | 2436 |
2351 reorder_block(s, g); | 2437 reorder_block(s, g); |
2352 #if defined(DEBUG) | 2438 #if defined(DEBUG) |
2353 sample_dump(0, g->sb_hybrid, 576); | 2439 sample_dump(0, g->sb_hybrid, 576); |
2354 #endif | 2440 #endif |
2355 compute_antialias(s, g); | 2441 s->compute_antialias(s, g); |
2356 #if defined(DEBUG) | 2442 #if defined(DEBUG) |
2357 sample_dump(1, g->sb_hybrid, 576); | 2443 sample_dump(1, g->sb_hybrid, 576); |
2358 #endif | 2444 #endif |
2359 compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]); | 2445 compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]); |
2360 #if defined(DEBUG) | 2446 #if defined(DEBUG) |