diff aactab.c @ 10874:bcfe2acbf190 libavcodec

AAC: Compress codebook tables and optimise sign bit handling The codebooks each consist of small number of values repeated in groups of 2 or 4. Storing the codebooks as a packed list of 2- or 4-bit indexes into a table reduces their size substantially (from 7.5k to 1.5k), resulting in less cache pressure. For the band types with sign bits in the bitstream, storing the number and position of non-zero codebook values using a few bits avoids multiple get_bits() calls and floating-point comparisons which gcc handles miserably. Some float/int type punning also avoids gcc brain damage. Overall speedup 20-35% on Cortex-A8, 20% on Core i7.
author mru
date Wed, 13 Jan 2010 16:46:28 +0000
parents 76132409af55
children 34a65026fa06
line wrap: on
line diff
--- a/aactab.c	Wed Jan 13 04:35:19 2010 +0000
+++ b/aactab.c	Wed Jan 13 16:46:28 2010 +0000
@@ -899,6 +899,192 @@
     codebook_vector8, codebook_vector10,
 };
 
+static const float codebook_vector0_vals[] = {
+   -1.0000000,  0.0000000,  1.0000000
+};
+
+static const float codebook_vector2_vals[] = {
+    0.0000000,  1.0000000,  2.5198421,
+};
+
+/*
+ * bits  0:1, 2:3, 4:5, 6:7  index into _vals array
+ *       8:11                number of non-zero values
+ *      12:15                bit mask of non-zero values
+ */
+static const uint16_t codebook_vector02_idx[] = {
+    0x0000, 0x8140, 0x8180, 0x4110, 0xc250, 0xc290, 0x4120, 0xc260, 0xc2a0,
+    0x2104, 0xa244, 0xa284, 0x6214, 0xe354, 0xe394, 0x6224, 0xe364, 0xe3a4,
+    0x2108, 0xa248, 0xa288, 0x6218, 0xe358, 0xe398, 0x6228, 0xe368, 0xe3a8,
+    0x1101, 0x9241, 0x9281, 0x5211, 0xd351, 0xd391, 0x5221, 0xd361, 0xd3a1,
+    0x3205, 0xb345, 0xb385, 0x7315, 0xf455, 0xf495, 0x7325, 0xf465, 0xf4a5,
+    0x3209, 0xb349, 0xb389, 0x7319, 0xf459, 0xf499, 0x7329, 0xf469, 0xf4a9,
+    0x1102, 0x9242, 0x9282, 0x5212, 0xd352, 0xd392, 0x5222, 0xd362, 0xd3a2,
+    0x3206, 0xb346, 0xb386, 0x7316, 0xf456, 0xf496, 0x7326, 0xf466, 0xf4a6,
+    0x320a, 0xb34a, 0xb38a, 0x731a, 0xf45a, 0xf49a, 0x732a, 0xf46a, 0xf4aa,
+};
+
+static const float codebook_vector4_vals[] = {
+   -6.3496042, -4.3267487,
+   -2.5198421, -1.0000000,
+    0.0000000,  1.0000000,
+    2.5198421,  4.3267487,
+    6.3496042,
+};
+
+/*
+ * bits  0:3, 4:7  index into _vals array
+ */
+static const uint16_t codebook_vector4_idx[] = {
+    0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050, 0x0060, 0x0070, 0x0080,
+    0x0001, 0x0011, 0x0021, 0x0031, 0x0041, 0x0051, 0x0061, 0x0071, 0x0081,
+    0x0002, 0x0012, 0x0022, 0x0032, 0x0042, 0x0052, 0x0062, 0x0072, 0x0082,
+    0x0003, 0x0013, 0x0023, 0x0033, 0x0043, 0x0053, 0x0063, 0x0073, 0x0083,
+    0x0004, 0x0014, 0x0024, 0x0034, 0x0044, 0x0054, 0x0064, 0x0074, 0x0084,
+    0x0005, 0x0015, 0x0025, 0x0035, 0x0045, 0x0055, 0x0065, 0x0075, 0x0085,
+    0x0006, 0x0016, 0x0026, 0x0036, 0x0046, 0x0056, 0x0066, 0x0076, 0x0086,
+    0x0007, 0x0017, 0x0027, 0x0037, 0x0047, 0x0057, 0x0067, 0x0077, 0x0087,
+    0x0008, 0x0018, 0x0028, 0x0038, 0x0048, 0x0058, 0x0068, 0x0078, 0x0088,
+};
+
+static const float codebook_vector6_vals[] = {
+    0.0000000,  1.0000000,  2.5198421,  4.3267487,
+    6.3496042,  8.5498797, 10.9027236, 13.3905183,
+};
+
+/*
+ * bits  0:3, 4:7  index into _vals array
+ *       8:11      number of non-zero values
+ *      12:15      1: only second value non-zero
+ *                 0: other cases
+ */
+static const uint16_t codebook_vector6_idx[] = {
+    0x0000, 0x0110, 0x0120, 0x0130, 0x0140, 0x0150, 0x0160, 0x0170,
+    0x1101, 0x0211, 0x0221, 0x0231, 0x0241, 0x0251, 0x0261, 0x0271,
+    0x1102, 0x0212, 0x0222, 0x0232, 0x0242, 0x0252, 0x0262, 0x0272,
+    0x1103, 0x0213, 0x0223, 0x0233, 0x0243, 0x0253, 0x0263, 0x0273,
+    0x1104, 0x0214, 0x0224, 0x0234, 0x0244, 0x0254, 0x0264, 0x0274,
+    0x1105, 0x0215, 0x0225, 0x0235, 0x0245, 0x0255, 0x0265, 0x0275,
+    0x1106, 0x0216, 0x0226, 0x0236, 0x0246, 0x0256, 0x0266, 0x0276,
+    0x1107, 0x0217, 0x0227, 0x0237, 0x0247, 0x0257, 0x0267, 0x0277,
+};
+
+static const float codebook_vector8_vals[] = {
+     0.0000000,  1.0000000,
+     2.5198421,  4.3267487,
+     6.3496042,  8.5498797,
+    10.9027236, 13.3905183,
+    16.0000000, 18.7207544,
+    21.5443469, 24.4637810,
+    27.4731418,
+};
+
+/*
+ * bits  0:3, 4:7  index into _vals array
+ *       8:11      number of non-zero values
+ *      12:15      1: only second value non-zero
+ *                 0: other cases
+ */
+static const uint16_t codebook_vector8_idx[] = {
+  0x0000, 0x0110, 0x0120, 0x0130, 0x0140, 0x0150, 0x0160,
+  0x0170, 0x0180, 0x0190, 0x01a0, 0x01b0, 0x01c0,
+  0x1101, 0x0211, 0x0221, 0x0231, 0x0241, 0x0251, 0x0261,
+  0x0271, 0x0281, 0x0291, 0x02a1, 0x02b1, 0x02c1,
+  0x1102, 0x0212, 0x0222, 0x0232, 0x0242, 0x0252, 0x0262,
+  0x0272, 0x0282, 0x0292, 0x02a2, 0x02b2, 0x02c2,
+  0x1103, 0x0213, 0x0223, 0x0233, 0x0243, 0x0253, 0x0263,
+  0x0273, 0x0283, 0x0293, 0x02a3, 0x02b3, 0x02c3,
+  0x1104, 0x0214, 0x0224, 0x0234, 0x0244, 0x0254, 0x0264,
+  0x0274, 0x0284, 0x0294, 0x02a4, 0x02b4, 0x02c4,
+  0x1105, 0x0215, 0x0225, 0x0235, 0x0245, 0x0255, 0x0265,
+  0x0275, 0x0285, 0x0295, 0x02a5, 0x02b5, 0x02c5,
+  0x1106, 0x0216, 0x0226, 0x0236, 0x0246, 0x0256, 0x0266,
+  0x0276, 0x0286, 0x0296, 0x02a6, 0x02b6, 0x02c6,
+  0x1107, 0x0217, 0x0227, 0x0237, 0x0247, 0x0257, 0x0267,
+  0x0277, 0x0287, 0x0297, 0x02a7, 0x02b7, 0x02c7,
+  0x1108, 0x0218, 0x0228, 0x0238, 0x0248, 0x0258, 0x0268,
+  0x0278, 0x0288, 0x0298, 0x02a8, 0x02b8, 0x02c8,
+  0x1109, 0x0219, 0x0229, 0x0239, 0x0249, 0x0259, 0x0269,
+  0x0279, 0x0289, 0x0299, 0x02a9, 0x02b9, 0x02c9,
+  0x110a, 0x021a, 0x022a, 0x023a, 0x024a, 0x025a, 0x026a,
+  0x027a, 0x028a, 0x029a, 0x02aa, 0x02ba, 0x02ca,
+  0x110b, 0x021b, 0x022b, 0x023b, 0x024b, 0x025b, 0x026b,
+  0x027b, 0x028b, 0x029b, 0x02ab, 0x02bb, 0x02cb,
+  0x110c, 0x021c, 0x022c, 0x023c, 0x024c, 0x025c, 0x026c,
+  0x027c, 0x028c, 0x029c, 0x02ac, 0x02bc, 0x02cc,
+};
+
+static const float codebook_vector10_vals[] = {
+     0.0000000,  1.0000000,
+     2.5198421,  4.3267487,
+     6.3496042,  8.5498797,
+    10.9027236, 13.3905183,
+    16.0000000, 18.7207544,
+    21.5443469, 24.4637810,
+    27.4731418, 30.5673509,
+    33.7419917, 36.9931811,
+};
+
+/*
+ * bits  0:3, 4:7  index into _vals array
+ *       8:9       bit mask of escape-coded entries
+ *      12:15      number of non-zero values
+ */
+static const uint16_t codebook_vector10_idx[] = {
+    0x0000, 0x1010, 0x1020, 0x1030, 0x1040, 0x1050, 0x1060, 0x1070,
+    0x1080, 0x1090, 0x10a0, 0x10b0, 0x10c0, 0x10d0, 0x10e0, 0x10f0, 0x1200,
+    0x1001, 0x2011, 0x2021, 0x2031, 0x2041, 0x2051, 0x2061, 0x2071,
+    0x2081, 0x2091, 0x20a1, 0x20b1, 0x20c1, 0x20d1, 0x20e1, 0x20f1, 0x2201,
+    0x1002, 0x2012, 0x2022, 0x2032, 0x2042, 0x2052, 0x2062, 0x2072,
+    0x2082, 0x2092, 0x20a2, 0x20b2, 0x20c2, 0x20d2, 0x20e2, 0x20f2, 0x2202,
+    0x1003, 0x2013, 0x2023, 0x2033, 0x2043, 0x2053, 0x2063, 0x2073,
+    0x2083, 0x2093, 0x20a3, 0x20b3, 0x20c3, 0x20d3, 0x20e3, 0x20f3, 0x2203,
+    0x1004, 0x2014, 0x2024, 0x2034, 0x2044, 0x2054, 0x2064, 0x2074,
+    0x2084, 0x2094, 0x20a4, 0x20b4, 0x20c4, 0x20d4, 0x20e4, 0x20f4, 0x2204,
+    0x1005, 0x2015, 0x2025, 0x2035, 0x2045, 0x2055, 0x2065, 0x2075,
+    0x2085, 0x2095, 0x20a5, 0x20b5, 0x20c5, 0x20d5, 0x20e5, 0x20f5, 0x2205,
+    0x1006, 0x2016, 0x2026, 0x2036, 0x2046, 0x2056, 0x2066, 0x2076,
+    0x2086, 0x2096, 0x20a6, 0x20b6, 0x20c6, 0x20d6, 0x20e6, 0x20f6, 0x2206,
+    0x1007, 0x2017, 0x2027, 0x2037, 0x2047, 0x2057, 0x2067, 0x2077,
+    0x2087, 0x2097, 0x20a7, 0x20b7, 0x20c7, 0x20d7, 0x20e7, 0x20f7, 0x2207,
+    0x1008, 0x2018, 0x2028, 0x2038, 0x2048, 0x2058, 0x2068, 0x2078,
+    0x2088, 0x2098, 0x20a8, 0x20b8, 0x20c8, 0x20d8, 0x20e8, 0x20f8, 0x2208,
+    0x1009, 0x2019, 0x2029, 0x2039, 0x2049, 0x2059, 0x2069, 0x2079,
+    0x2089, 0x2099, 0x20a9, 0x20b9, 0x20c9, 0x20d9, 0x20e9, 0x20f9, 0x2209,
+    0x100a, 0x201a, 0x202a, 0x203a, 0x204a, 0x205a, 0x206a, 0x207a,
+    0x208a, 0x209a, 0x20aa, 0x20ba, 0x20ca, 0x20da, 0x20ea, 0x20fa, 0x220a,
+    0x100b, 0x201b, 0x202b, 0x203b, 0x204b, 0x205b, 0x206b, 0x207b,
+    0x208b, 0x209b, 0x20ab, 0x20bb, 0x20cb, 0x20db, 0x20eb, 0x20fb, 0x220b,
+    0x100c, 0x201c, 0x202c, 0x203c, 0x204c, 0x205c, 0x206c, 0x207c,
+    0x208c, 0x209c, 0x20ac, 0x20bc, 0x20cc, 0x20dc, 0x20ec, 0x20fc, 0x220c,
+    0x100d, 0x201d, 0x202d, 0x203d, 0x204d, 0x205d, 0x206d, 0x207d,
+    0x208d, 0x209d, 0x20ad, 0x20bd, 0x20cd, 0x20dd, 0x20ed, 0x20fd, 0x220d,
+    0x100e, 0x201e, 0x202e, 0x203e, 0x204e, 0x205e, 0x206e, 0x207e,
+    0x208e, 0x209e, 0x20ae, 0x20be, 0x20ce, 0x20de, 0x20ee, 0x20fe, 0x220e,
+    0x100f, 0x201f, 0x202f, 0x203f, 0x204f, 0x205f, 0x206f, 0x207f,
+    0x208f, 0x209f, 0x20af, 0x20bf, 0x20cf, 0x20df, 0x20ef, 0x20ff, 0x220f,
+    0x1100, 0x2110, 0x2120, 0x2130, 0x2140, 0x2150, 0x2160, 0x2170,
+    0x2180, 0x2190, 0x21a0, 0x21b0, 0x21c0, 0x21d0, 0x21e0, 0x21f0, 0x2300,
+};
+
+const float *const ff_aac_codebook_vector_vals[] = {
+    codebook_vector0_vals, codebook_vector0_vals,
+    codebook_vector2_vals, codebook_vector2_vals,
+    codebook_vector4_vals, codebook_vector4_vals,
+    codebook_vector6_vals, codebook_vector6_vals,
+    codebook_vector8_vals, codebook_vector8_vals,
+    codebook_vector10_vals,
+};
+
+const uint16_t *const ff_aac_codebook_vector_idx[] = {
+    codebook_vector02_idx, codebook_vector02_idx,
+    codebook_vector02_idx, codebook_vector02_idx,
+    codebook_vector4_idx,  codebook_vector4_idx,
+    codebook_vector6_idx,  codebook_vector6_idx,
+    codebook_vector8_idx,  codebook_vector8_idx,
+    codebook_vector10_idx,
+};
+
 /* @name swb_offsets
  * Sample offset into the window indicating the beginning of a scalefactor
  * window band