annotate etc/charsets/gb18030.awk @ 89184:88a9e962e183

(decode_coding_utf_8): Treat surrogates as invalid.
author Dave Love <fx@gnu.org>
date Wed, 09 Oct 2002 22:00:36 +0000
parents ddb17e4c813c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
88448
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1 BEGIN {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
2 tohex["A"] = 10;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
3 tohex["B"] = 11;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
4 tohex["C"] = 12;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
5 tohex["D"] = 13;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
6 tohex["E"] = 14;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
7 tohex["F"] = 15;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
8 tohex["a"] = 10;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
9 tohex["b"] = 11;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
10 tohex["c"] = 12;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
11 tohex["d"] = 13;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
12 tohex["e"] = 14;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
13 tohex["f"] = 15;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
14 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
15
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
16 function decode_hex(str) {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
17 n = 0;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
18 len = length(str);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
19 for (i = 1; i <= len; i++)
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
20 {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
21 c = substr (str, i, 1);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
22 if (c >= "0" && c <= "9")
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
23 n = n * 16 + (c - "0");
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
24 else
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
25 n = n * 16 + tohex[c];
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
26 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
27 return n;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
28 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
29
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
30 function gb_to_index(b0,b1,b2,b3) {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
31 return ((((b0 - 129) * 10 + (b1 - 48)) * 126 + (b2 - 129)) * 10 + b3 - 48);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
32 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
33
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
34 function index_to_gb(idx) {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
35 b3 = (idx % 10) + 48;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
36 idx /= 10;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
37 b2 = (idx % 126) + 129;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
38 idx /= 126;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
39 b1 = (idx % 10) + 48;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
40 b0 = (idx / 10) + 129;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
41 return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
42 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
43
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
44 function decode_gb(str) {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
45 b0 = decode_hex(substr(str, 3, 2));
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
46 b1 = decode_hex(substr(str, 7, 2));
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
47 b2 = decode_hex(substr(str, 11, 2));
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
48 b3 = decode_hex(substr(str, 15, 2));
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
49 return gb_to_index(b0, b1, b2, b3);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
50 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
51
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
52 function printline(from, to) {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
53 fromgb = index_to_gb(from);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
54 fromuni = gbtable[from];
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
55 if (from == to)
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
56 printf ("0x%s U+%04X\n", fromgb, fromuni);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
57 else
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
58 printf ("0x%s-0x%s U+%04X-U+%04X\n", fromgb, index_to_gb(to),
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
59 fromuni, fromuni + (to - from));
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
60 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
61
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
62 /^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
63 unicode = decode_hex(substr($1, 3, 4));
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
64 if ($2 ~ /\\x8[1-4]\\x3[0-9]\\x[8-9A-F][0-9A-F]\\x3[0-9]/)
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
65 unitable[unicode] = decode_gb($2);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
66 else
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
67 unitable[unicode] = -1;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
68 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
69
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
70 END {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
71 lastgb = 0;
88466
ddb17e4c813c (END): Initialize lastgb and gbtable[lastgb] correctly.
Kenichi Handa <handa@m17n.org>
parents: 88448
diff changeset
72 surrogate_min = decode_hex("D800");
ddb17e4c813c (END): Initialize lastgb and gbtable[lastgb] correctly.
Kenichi Handa <handa@m17n.org>
parents: 88448
diff changeset
73 surrogate_max = decode_hex("DFFF");
ddb17e4c813c (END): Initialize lastgb and gbtable[lastgb] correctly.
Kenichi Handa <handa@m17n.org>
parents: 88448
diff changeset
74 lastgb = unitable[128];
ddb17e4c813c (END): Initialize lastgb and gbtable[lastgb] correctly.
Kenichi Handa <handa@m17n.org>
parents: 88448
diff changeset
75 gbtable[lastgb] = 128;
88448
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
76 for (i = 129; i < 65536; i++)
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
77 {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
78 if (unitable[i] == 0 && (i < surrogate_min || i > surrogate_max))
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
79 {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
80 lastgb++;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
81 gbtable[lastgb] = i;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
82 unitable[i] = lastgb;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
83 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
84 else if (unitable[i] > 0)
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
85 {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
86 lastgb = unitable[i];
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
87 gbtable[lastgb] = i;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
88 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
89 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
90
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
91 fromgb = lastgb = unitable[128];
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
92 for (i = 129; i < 65536; i++)
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
93 {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
94 if (unitable[i] > 0)
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
95 {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
96 if (lastgb + 1 == unitable[i])
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
97 {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
98 lastgb++;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
99 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
100 else
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
101 {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
102 if (lastgb >= 0)
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
103 printline(fromgb, lastgb);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
104 fromgb = lastgb = unitable[i];
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
105 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
106 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
107 else # i.e. (unitable[i] < 0)
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
108 {
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
109 if (lastgb >= 0)
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
110 printline(fromgb, lastgb);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
111 lastgb = -1;
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
112 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
113 }
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
114 printline(fromgb, unitable[65535]);
d83b3c05ffab New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
115 }