comparison aactab.c @ 10874:bcfe2acbf190 libavcodec

AAC: Compress codebook tables and optimise sign bit handling The codebooks each consist of small number of values repeated in groups of 2 or 4. Storing the codebooks as a packed list of 2- or 4-bit indexes into a table reduces their size substantially (from 7.5k to 1.5k), resulting in less cache pressure. For the band types with sign bits in the bitstream, storing the number and position of non-zero codebook values using a few bits avoids multiple get_bits() calls and floating-point comparisons which gcc handles miserably. Some float/int type punning also avoids gcc brain damage. Overall speedup 20-35% on Cortex-A8, 20% on Core i7.
author mru
date Wed, 13 Jan 2010 16:46:28 +0000
parents 76132409af55
children 34a65026fa06
comparison
equal deleted inserted replaced
10873:fb42dfc877cc 10874:bcfe2acbf190
897 codebook_vector2, codebook_vector4, codebook_vector4, 897 codebook_vector2, codebook_vector4, codebook_vector4,
898 codebook_vector6, codebook_vector6, codebook_vector8, 898 codebook_vector6, codebook_vector6, codebook_vector8,
899 codebook_vector8, codebook_vector10, 899 codebook_vector8, codebook_vector10,
900 }; 900 };
901 901
902 static const float codebook_vector0_vals[] = {
903 -1.0000000, 0.0000000, 1.0000000
904 };
905
906 static const float codebook_vector2_vals[] = {
907 0.0000000, 1.0000000, 2.5198421,
908 };
909
910 /*
911 * bits 0:1, 2:3, 4:5, 6:7 index into _vals array
912 * 8:11 number of non-zero values
913 * 12:15 bit mask of non-zero values
914 */
915 static const uint16_t codebook_vector02_idx[] = {
916 0x0000, 0x8140, 0x8180, 0x4110, 0xc250, 0xc290, 0x4120, 0xc260, 0xc2a0,
917 0x2104, 0xa244, 0xa284, 0x6214, 0xe354, 0xe394, 0x6224, 0xe364, 0xe3a4,
918 0x2108, 0xa248, 0xa288, 0x6218, 0xe358, 0xe398, 0x6228, 0xe368, 0xe3a8,
919 0x1101, 0x9241, 0x9281, 0x5211, 0xd351, 0xd391, 0x5221, 0xd361, 0xd3a1,
920 0x3205, 0xb345, 0xb385, 0x7315, 0xf455, 0xf495, 0x7325, 0xf465, 0xf4a5,
921 0x3209, 0xb349, 0xb389, 0x7319, 0xf459, 0xf499, 0x7329, 0xf469, 0xf4a9,
922 0x1102, 0x9242, 0x9282, 0x5212, 0xd352, 0xd392, 0x5222, 0xd362, 0xd3a2,
923 0x3206, 0xb346, 0xb386, 0x7316, 0xf456, 0xf496, 0x7326, 0xf466, 0xf4a6,
924 0x320a, 0xb34a, 0xb38a, 0x731a, 0xf45a, 0xf49a, 0x732a, 0xf46a, 0xf4aa,
925 };
926
927 static const float codebook_vector4_vals[] = {
928 -6.3496042, -4.3267487,
929 -2.5198421, -1.0000000,
930 0.0000000, 1.0000000,
931 2.5198421, 4.3267487,
932 6.3496042,
933 };
934
935 /*
936 * bits 0:3, 4:7 index into _vals array
937 */
938 static const uint16_t codebook_vector4_idx[] = {
939 0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050, 0x0060, 0x0070, 0x0080,
940 0x0001, 0x0011, 0x0021, 0x0031, 0x0041, 0x0051, 0x0061, 0x0071, 0x0081,
941 0x0002, 0x0012, 0x0022, 0x0032, 0x0042, 0x0052, 0x0062, 0x0072, 0x0082,
942 0x0003, 0x0013, 0x0023, 0x0033, 0x0043, 0x0053, 0x0063, 0x0073, 0x0083,
943 0x0004, 0x0014, 0x0024, 0x0034, 0x0044, 0x0054, 0x0064, 0x0074, 0x0084,
944 0x0005, 0x0015, 0x0025, 0x0035, 0x0045, 0x0055, 0x0065, 0x0075, 0x0085,
945 0x0006, 0x0016, 0x0026, 0x0036, 0x0046, 0x0056, 0x0066, 0x0076, 0x0086,
946 0x0007, 0x0017, 0x0027, 0x0037, 0x0047, 0x0057, 0x0067, 0x0077, 0x0087,
947 0x0008, 0x0018, 0x0028, 0x0038, 0x0048, 0x0058, 0x0068, 0x0078, 0x0088,
948 };
949
950 static const float codebook_vector6_vals[] = {
951 0.0000000, 1.0000000, 2.5198421, 4.3267487,
952 6.3496042, 8.5498797, 10.9027236, 13.3905183,
953 };
954
955 /*
956 * bits 0:3, 4:7 index into _vals array
957 * 8:11 number of non-zero values
958 * 12:15 1: only second value non-zero
959 * 0: other cases
960 */
961 static const uint16_t codebook_vector6_idx[] = {
962 0x0000, 0x0110, 0x0120, 0x0130, 0x0140, 0x0150, 0x0160, 0x0170,
963 0x1101, 0x0211, 0x0221, 0x0231, 0x0241, 0x0251, 0x0261, 0x0271,
964 0x1102, 0x0212, 0x0222, 0x0232, 0x0242, 0x0252, 0x0262, 0x0272,
965 0x1103, 0x0213, 0x0223, 0x0233, 0x0243, 0x0253, 0x0263, 0x0273,
966 0x1104, 0x0214, 0x0224, 0x0234, 0x0244, 0x0254, 0x0264, 0x0274,
967 0x1105, 0x0215, 0x0225, 0x0235, 0x0245, 0x0255, 0x0265, 0x0275,
968 0x1106, 0x0216, 0x0226, 0x0236, 0x0246, 0x0256, 0x0266, 0x0276,
969 0x1107, 0x0217, 0x0227, 0x0237, 0x0247, 0x0257, 0x0267, 0x0277,
970 };
971
972 static const float codebook_vector8_vals[] = {
973 0.0000000, 1.0000000,
974 2.5198421, 4.3267487,
975 6.3496042, 8.5498797,
976 10.9027236, 13.3905183,
977 16.0000000, 18.7207544,
978 21.5443469, 24.4637810,
979 27.4731418,
980 };
981
982 /*
983 * bits 0:3, 4:7 index into _vals array
984 * 8:11 number of non-zero values
985 * 12:15 1: only second value non-zero
986 * 0: other cases
987 */
988 static const uint16_t codebook_vector8_idx[] = {
989 0x0000, 0x0110, 0x0120, 0x0130, 0x0140, 0x0150, 0x0160,
990 0x0170, 0x0180, 0x0190, 0x01a0, 0x01b0, 0x01c0,
991 0x1101, 0x0211, 0x0221, 0x0231, 0x0241, 0x0251, 0x0261,
992 0x0271, 0x0281, 0x0291, 0x02a1, 0x02b1, 0x02c1,
993 0x1102, 0x0212, 0x0222, 0x0232, 0x0242, 0x0252, 0x0262,
994 0x0272, 0x0282, 0x0292, 0x02a2, 0x02b2, 0x02c2,
995 0x1103, 0x0213, 0x0223, 0x0233, 0x0243, 0x0253, 0x0263,
996 0x0273, 0x0283, 0x0293, 0x02a3, 0x02b3, 0x02c3,
997 0x1104, 0x0214, 0x0224, 0x0234, 0x0244, 0x0254, 0x0264,
998 0x0274, 0x0284, 0x0294, 0x02a4, 0x02b4, 0x02c4,
999 0x1105, 0x0215, 0x0225, 0x0235, 0x0245, 0x0255, 0x0265,
1000 0x0275, 0x0285, 0x0295, 0x02a5, 0x02b5, 0x02c5,
1001 0x1106, 0x0216, 0x0226, 0x0236, 0x0246, 0x0256, 0x0266,
1002 0x0276, 0x0286, 0x0296, 0x02a6, 0x02b6, 0x02c6,
1003 0x1107, 0x0217, 0x0227, 0x0237, 0x0247, 0x0257, 0x0267,
1004 0x0277, 0x0287, 0x0297, 0x02a7, 0x02b7, 0x02c7,
1005 0x1108, 0x0218, 0x0228, 0x0238, 0x0248, 0x0258, 0x0268,
1006 0x0278, 0x0288, 0x0298, 0x02a8, 0x02b8, 0x02c8,
1007 0x1109, 0x0219, 0x0229, 0x0239, 0x0249, 0x0259, 0x0269,
1008 0x0279, 0x0289, 0x0299, 0x02a9, 0x02b9, 0x02c9,
1009 0x110a, 0x021a, 0x022a, 0x023a, 0x024a, 0x025a, 0x026a,
1010 0x027a, 0x028a, 0x029a, 0x02aa, 0x02ba, 0x02ca,
1011 0x110b, 0x021b, 0x022b, 0x023b, 0x024b, 0x025b, 0x026b,
1012 0x027b, 0x028b, 0x029b, 0x02ab, 0x02bb, 0x02cb,
1013 0x110c, 0x021c, 0x022c, 0x023c, 0x024c, 0x025c, 0x026c,
1014 0x027c, 0x028c, 0x029c, 0x02ac, 0x02bc, 0x02cc,
1015 };
1016
1017 static const float codebook_vector10_vals[] = {
1018 0.0000000, 1.0000000,
1019 2.5198421, 4.3267487,
1020 6.3496042, 8.5498797,
1021 10.9027236, 13.3905183,
1022 16.0000000, 18.7207544,
1023 21.5443469, 24.4637810,
1024 27.4731418, 30.5673509,
1025 33.7419917, 36.9931811,
1026 };
1027
1028 /*
1029 * bits 0:3, 4:7 index into _vals array
1030 * 8:9 bit mask of escape-coded entries
1031 * 12:15 number of non-zero values
1032 */
1033 static const uint16_t codebook_vector10_idx[] = {
1034 0x0000, 0x1010, 0x1020, 0x1030, 0x1040, 0x1050, 0x1060, 0x1070,
1035 0x1080, 0x1090, 0x10a0, 0x10b0, 0x10c0, 0x10d0, 0x10e0, 0x10f0, 0x1200,
1036 0x1001, 0x2011, 0x2021, 0x2031, 0x2041, 0x2051, 0x2061, 0x2071,
1037 0x2081, 0x2091, 0x20a1, 0x20b1, 0x20c1, 0x20d1, 0x20e1, 0x20f1, 0x2201,
1038 0x1002, 0x2012, 0x2022, 0x2032, 0x2042, 0x2052, 0x2062, 0x2072,
1039 0x2082, 0x2092, 0x20a2, 0x20b2, 0x20c2, 0x20d2, 0x20e2, 0x20f2, 0x2202,
1040 0x1003, 0x2013, 0x2023, 0x2033, 0x2043, 0x2053, 0x2063, 0x2073,
1041 0x2083, 0x2093, 0x20a3, 0x20b3, 0x20c3, 0x20d3, 0x20e3, 0x20f3, 0x2203,
1042 0x1004, 0x2014, 0x2024, 0x2034, 0x2044, 0x2054, 0x2064, 0x2074,
1043 0x2084, 0x2094, 0x20a4, 0x20b4, 0x20c4, 0x20d4, 0x20e4, 0x20f4, 0x2204,
1044 0x1005, 0x2015, 0x2025, 0x2035, 0x2045, 0x2055, 0x2065, 0x2075,
1045 0x2085, 0x2095, 0x20a5, 0x20b5, 0x20c5, 0x20d5, 0x20e5, 0x20f5, 0x2205,
1046 0x1006, 0x2016, 0x2026, 0x2036, 0x2046, 0x2056, 0x2066, 0x2076,
1047 0x2086, 0x2096, 0x20a6, 0x20b6, 0x20c6, 0x20d6, 0x20e6, 0x20f6, 0x2206,
1048 0x1007, 0x2017, 0x2027, 0x2037, 0x2047, 0x2057, 0x2067, 0x2077,
1049 0x2087, 0x2097, 0x20a7, 0x20b7, 0x20c7, 0x20d7, 0x20e7, 0x20f7, 0x2207,
1050 0x1008, 0x2018, 0x2028, 0x2038, 0x2048, 0x2058, 0x2068, 0x2078,
1051 0x2088, 0x2098, 0x20a8, 0x20b8, 0x20c8, 0x20d8, 0x20e8, 0x20f8, 0x2208,
1052 0x1009, 0x2019, 0x2029, 0x2039, 0x2049, 0x2059, 0x2069, 0x2079,
1053 0x2089, 0x2099, 0x20a9, 0x20b9, 0x20c9, 0x20d9, 0x20e9, 0x20f9, 0x2209,
1054 0x100a, 0x201a, 0x202a, 0x203a, 0x204a, 0x205a, 0x206a, 0x207a,
1055 0x208a, 0x209a, 0x20aa, 0x20ba, 0x20ca, 0x20da, 0x20ea, 0x20fa, 0x220a,
1056 0x100b, 0x201b, 0x202b, 0x203b, 0x204b, 0x205b, 0x206b, 0x207b,
1057 0x208b, 0x209b, 0x20ab, 0x20bb, 0x20cb, 0x20db, 0x20eb, 0x20fb, 0x220b,
1058 0x100c, 0x201c, 0x202c, 0x203c, 0x204c, 0x205c, 0x206c, 0x207c,
1059 0x208c, 0x209c, 0x20ac, 0x20bc, 0x20cc, 0x20dc, 0x20ec, 0x20fc, 0x220c,
1060 0x100d, 0x201d, 0x202d, 0x203d, 0x204d, 0x205d, 0x206d, 0x207d,
1061 0x208d, 0x209d, 0x20ad, 0x20bd, 0x20cd, 0x20dd, 0x20ed, 0x20fd, 0x220d,
1062 0x100e, 0x201e, 0x202e, 0x203e, 0x204e, 0x205e, 0x206e, 0x207e,
1063 0x208e, 0x209e, 0x20ae, 0x20be, 0x20ce, 0x20de, 0x20ee, 0x20fe, 0x220e,
1064 0x100f, 0x201f, 0x202f, 0x203f, 0x204f, 0x205f, 0x206f, 0x207f,
1065 0x208f, 0x209f, 0x20af, 0x20bf, 0x20cf, 0x20df, 0x20ef, 0x20ff, 0x220f,
1066 0x1100, 0x2110, 0x2120, 0x2130, 0x2140, 0x2150, 0x2160, 0x2170,
1067 0x2180, 0x2190, 0x21a0, 0x21b0, 0x21c0, 0x21d0, 0x21e0, 0x21f0, 0x2300,
1068 };
1069
1070 const float *const ff_aac_codebook_vector_vals[] = {
1071 codebook_vector0_vals, codebook_vector0_vals,
1072 codebook_vector2_vals, codebook_vector2_vals,
1073 codebook_vector4_vals, codebook_vector4_vals,
1074 codebook_vector6_vals, codebook_vector6_vals,
1075 codebook_vector8_vals, codebook_vector8_vals,
1076 codebook_vector10_vals,
1077 };
1078
1079 const uint16_t *const ff_aac_codebook_vector_idx[] = {
1080 codebook_vector02_idx, codebook_vector02_idx,
1081 codebook_vector02_idx, codebook_vector02_idx,
1082 codebook_vector4_idx, codebook_vector4_idx,
1083 codebook_vector6_idx, codebook_vector6_idx,
1084 codebook_vector8_idx, codebook_vector8_idx,
1085 codebook_vector10_idx,
1086 };
1087
902 /* @name swb_offsets 1088 /* @name swb_offsets
903 * Sample offset into the window indicating the beginning of a scalefactor 1089 * Sample offset into the window indicating the beginning of a scalefactor
904 * window band 1090 * window band
905 * 1091 *
906 * scalefactor window band - term for scalefactor bands within a window, 1092 * scalefactor window band - term for scalefactor bands within a window,