88674
|
1 BEGIN {
|
|
2 tohex["A"] = 10;
|
|
3 tohex["B"] = 11;
|
|
4 tohex["C"] = 12;
|
|
5 tohex["D"] = 13;
|
|
6 tohex["E"] = 14;
|
|
7 tohex["F"] = 15;
|
|
8 tohex["a"] = 10;
|
|
9 tohex["b"] = 11;
|
|
10 tohex["c"] = 12;
|
|
11 tohex["d"] = 13;
|
|
12 tohex["e"] = 14;
|
|
13 tohex["f"] = 15;
|
|
14 }
|
|
15
|
|
16 function decode_hex(str) {
|
|
17 n = 0;
|
|
18 len = length(str);
|
|
19 for (i = 1; i <= len; i++)
|
|
20 {
|
|
21 c = substr (str, i, 1);
|
|
22 if (c >= "0" && c <= "9")
|
|
23 n = n * 16 + (c - "0");
|
|
24 else
|
|
25 n = n * 16 + tohex[c];
|
|
26 }
|
|
27 return n;
|
|
28 }
|
|
29
|
|
30 function gb_to_index(b0,b1,b2,b3) {
|
|
31 return ((((b0 - 129) * 10 + (b1 - 48)) * 126 + (b2 - 129)) * 10 + b3 - 48);
|
|
32 }
|
|
33
|
|
34 function index_to_gb(idx) {
|
|
35 b3 = (idx % 10) + 48;
|
|
36 idx /= 10;
|
|
37 b2 = (idx % 126) + 129;
|
|
38 idx /= 126;
|
|
39 b1 = (idx % 10) + 48;
|
|
40 b0 = (idx / 10) + 129;
|
|
41 return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3);
|
|
42 }
|
|
43
|
|
44 function decode_gb(str) {
|
|
45 b0 = decode_hex(substr(str, 3, 2));
|
|
46 b1 = decode_hex(substr(str, 7, 2));
|
|
47 b2 = decode_hex(substr(str, 11, 2));
|
|
48 b3 = decode_hex(substr(str, 15, 2));
|
|
49 return gb_to_index(b0, b1, b2, b3);
|
|
50 }
|
|
51
|
|
52 function printline(from, to) {
|
|
53 fromgb = index_to_gb(from);
|
|
54 fromuni = gbtable[from];
|
|
55 if (from == to)
|
|
56 printf ("0x%s 0x%04X\n", fromgb, fromuni);
|
|
57 else
|
|
58 printf ("0x%s-0x%s 0x%04X\n", fromgb, index_to_gb(to), fromuni);
|
|
59 }
|
|
60
|
|
61 /^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ {
|
|
62 unicode = decode_hex(substr($1, 3, 4));
|
|
63 if ($2 ~ /\\x8[1-4]\\x3[0-9]\\x[8-9A-F][0-9A-F]\\x3[0-9]/)
|
|
64 unitable[unicode] = decode_gb($2);
|
|
65 else
|
|
66 unitable[unicode] = -1;
|
|
67 }
|
|
68
|
|
69 END {
|
|
70 lastgb = 0;
|
|
71 surrogate_min = decode_hex("D800");
|
|
72 surrogate_max = decode_hex("DFFF");
|
|
73 lastgb = unitable[128];
|
|
74 gbtable[lastgb] = 128;
|
|
75 for (i = 129; i < 65536; i++)
|
|
76 {
|
|
77 if (unitable[i] == 0 && (i < surrogate_min || i > surrogate_max))
|
|
78 {
|
|
79 lastgb++;
|
|
80 gbtable[lastgb] = i;
|
|
81 unitable[i] = lastgb;
|
|
82 }
|
|
83 else if (unitable[i] > 0)
|
|
84 {
|
|
85 lastgb = unitable[i];
|
|
86 gbtable[lastgb] = i;
|
|
87 }
|
|
88 }
|
|
89
|
|
90 fromgb = lastgb = unitable[128];
|
|
91 for (i = 129; i < 65536; i++)
|
|
92 {
|
|
93 if (unitable[i] > 0)
|
|
94 {
|
|
95 if (lastgb + 1 == unitable[i])
|
|
96 {
|
|
97 lastgb++;
|
|
98 }
|
|
99 else
|
|
100 {
|
|
101 if (lastgb >= 0)
|
|
102 printline(fromgb, lastgb);
|
|
103 fromgb = lastgb = unitable[i];
|
|
104 }
|
|
105 }
|
|
106 else # i.e. (unitable[i] < 0)
|
|
107 {
|
|
108 if (lastgb >= 0)
|
|
109 printline(fromgb, lastgb);
|
|
110 lastgb = -1;
|
|
111 }
|
|
112 }
|
|
113 printline(fromgb, unitable[65535]);
|
|
114 }
|