88123
|
1 BEGIN {
|
|
2 tohex["A"] = 10;
|
|
3 tohex["B"] = 11;
|
|
4 tohex["C"] = 12;
|
|
5 tohex["D"] = 13;
|
|
6 tohex["E"] = 14;
|
|
7 tohex["F"] = 15;
|
|
8 tohex["a"] = 10;
|
|
9 tohex["b"] = 11;
|
|
10 tohex["c"] = 12;
|
|
11 tohex["d"] = 13;
|
|
12 tohex["e"] = 14;
|
|
13 tohex["f"] = 15;
|
|
14 }
|
|
15
|
|
16 function decode_hex(str) {
|
|
17 n = 0;
|
|
18 len = length(str);
|
|
19 for (i = 1; i <= len; i++)
|
|
20 {
|
|
21 c = substr (str, i, 1);
|
|
22 if (c >= "0" && c <= "9")
|
|
23 n = n * 16 + (c - "0");
|
|
24 else
|
|
25 n = n * 16 + tohex[c];
|
|
26 }
|
|
27 return n;
|
|
28 }
|
|
29
|
|
30 function gb_to_index(gb) {
|
|
31 b0 = int(gb / 256);
|
|
32 b1 = gb % 256;
|
|
33 idx = (((b0 - 129)) * 191 + b1 - 64);
|
|
34 # if (b1 >= 127)
|
|
35 # idx--;
|
|
36 return idx
|
|
37 }
|
|
38
|
|
39 function index_to_gb(idx) {
|
|
40 b3 = (idx % 10) + 48;
|
|
41 idx = int(idx / 10);
|
|
42 b2 = (idx % 126) + 129;
|
|
43 idx = int(idx / 126);
|
|
44 b1 = (idx % 10) + 48;
|
|
45 b0 = int(idx / 10) + 129;
|
|
46 return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3);
|
|
47 }
|
|
48
|
|
49 /^\#/ {
|
|
50 print;
|
|
51 next;
|
|
52 }
|
|
53
|
|
54 /0x....-0x..../ {
|
|
55 gb_from = gb_to_index(decode_hex(substr($1, 3, 4)));
|
|
56 gb_to = gb_to_index(decode_hex(substr($1, 10, 4)));
|
|
57 unicode = decode_hex(substr($2, 3, 4));
|
|
58 while (gb_from <= gb_to)
|
|
59 {
|
|
60 table[unicode++] = 1;
|
|
61 gb_from++;
|
|
62 }
|
|
63 next;
|
|
64 }
|
|
65
|
|
66 {
|
|
67 gb = decode_hex(substr($1, 3, 4));
|
|
68 unicode = decode_hex(substr($2, 3, 4));
|
|
69 table[unicode] = 1;
|
|
70 }
|
|
71
|
|
72 END {
|
|
73 from_gb = -1;
|
|
74 to_gb = 0;
|
|
75 from_i = 0;
|
|
76 table[65536] = 1;
|
|
77 for (i = 128; i <= 65536; i++)
|
|
78 {
|
|
79 if (table[i] == 0)
|
|
80 {
|
|
81 if (i < 55296 || i >= 57344)
|
|
82 {
|
|
83 if (from_gb < 0)
|
|
84 {
|
|
85 from_gb = to_gb;
|
|
86 from_i = i;
|
|
87 }
|
|
88 to_gb++;
|
|
89 }
|
|
90 }
|
|
91 else if (from_gb >= 0)
|
|
92 {
|
|
93 if (from_gb + 1 == to_gb)
|
|
94 printf "0x%s\t\t0x%04X\n",
|
|
95 index_to_gb(from_gb), from_i;
|
|
96 else
|
|
97 printf "0x%s-0x%s\t0x%04X\n",
|
|
98 index_to_gb(from_gb), index_to_gb(to_gb - 1), from_i;
|
|
99 from_gb = -1;
|
|
100 }
|
|
101 }
|
|
102 }
|