annotate admin/charsets/compact.awk @ 100006:527cfe29292e

(Text Representations, Converting Representations, Character Sets, Scanning Charsets, Translation of Characters): Make text more accurate.
author Eli Zaretskii <eliz@gnu.org>
date Fri, 28 Nov 2008 13:26:17 +0000
parents eb2d9dfc8486
children ce88a631c161
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1 # compact.awk -- Make charset map compact.
91412
daa208ab5dfb Update copyright years and GPL version.
Glenn Morris <rgm@gnu.org>
parents: 89916
diff changeset
2 # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
3 # National Institute of Advanced Industrial Science and Technology (AIST)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
4 # Registration Number H13PRO009
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91412
diff changeset
5
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
6 # This file is part of GNU Emacs.
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91412
diff changeset
7
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91412
diff changeset
8 # GNU Emacs is free software: you can redistribute it and/or modify
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
9 # it under the terms of the GNU General Public License as published by
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91412
diff changeset
10 # the Free Software Foundation, either version 3 of the License, or
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91412
diff changeset
11 # (at your option) any later version.
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91412
diff changeset
12
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
13 # GNU Emacs is distributed in the hope that it will be useful,
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
16 # GNU General Public License for more details.
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91412
diff changeset
17
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
18 # You should have received a copy of the GNU General Public License
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91412
diff changeset
19 # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
20
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91412
diff changeset
21 # Commentary:
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
22 # Make a charset map compact by changing this kind of line sequence:
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
23 # 0x00 0x0000
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
24 # 0x01 0x0001
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
25 # ...
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
26 # 0x7F 0x007F
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
27 # to one line of this format:
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
28 # 0x00-0x7F 0x0000
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
29
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
30 BEGIN {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
31 tohex["0"] = 1;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
32 tohex["1"] = 2;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
33 tohex["2"] = 3;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
34 tohex["3"] = 4;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
35 tohex["4"] = 5;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
36 tohex["5"] = 6;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
37 tohex["6"] = 7;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
38 tohex["7"] = 8;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
39 tohex["8"] = 9;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
40 tohex["9"] = 10;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
41 tohex["A"] = 11;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
42 tohex["B"] = 12;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
43 tohex["C"] = 13;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
44 tohex["D"] = 14;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
45 tohex["E"] = 15;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
46 tohex["F"] = 16;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
47 tohex["a"] = 11;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
48 tohex["b"] = 12;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
49 tohex["c"] = 13;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
50 tohex["d"] = 14;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
51 tohex["e"] = 15;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
52 tohex["f"] = 16;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
53 from_code = 0;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
54 to_code = -1;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
55 to_unicode = 0;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
56 from_unicode = 0;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
57 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
58
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
59 function decode_hex(str, idx) {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
60 n = 0;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
61 len = length(str);
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
62 for (i = idx; i <= len; i++)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
63 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
64 c = tohex[substr (str, i, 1)];
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
65 if (c == 0)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
66 break;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
67 n = n * 16 + c - 1;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
68 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
69 return n;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
70 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
71
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
72 /^\#/ {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
73 print;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
74 next;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
75 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
76
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
77 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
78 code = decode_hex($1, 3);
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
79 unicode = decode_hex($2, 3);
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
80 if ((code == to_code + 1) && (unicode == to_unicode + 1))
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
81 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
82 to_code++;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
83 to_unicode++;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
84 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
85 else
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
86 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
87 if (to_code < 256)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
88 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
89 if (from_code == to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
90 printf "0x%02X 0x%04X\n", from_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
91 else if (from_code < to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
92 printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
93 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
94 else
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
95 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
96 if (from_code == to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
97 printf "0x%04X 0x%04X\n", from_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
98 else if (from_code < to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
99 printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
100 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
101 from_code = to_code = code;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
102 from_unicode = to_unicode = unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
103 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
104 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
105
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
106 END {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
107 if (to_code < 256)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
108 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
109 if (from_code == to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
110 printf "0x%02X 0x%04X\n", from_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
111 else
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
112 printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
113 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
114 else
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
115 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
116 if (from_code == to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
117 printf "0x%04X 0x%04X\n", from_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
118 else
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
119 printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
120 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
121 }
89916
e0e4e6a0599f Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents: 88123
diff changeset
122
e0e4e6a0599f Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents: 88123
diff changeset
123 # arch-tag: 7e6f57c3-8e62-4af3-8916-ca67bca3a0ce