annotate admin/charsets/compact.awk @ 94065:22c67e6590d0

Fix utf-8 breakage in previous commit.
author Juanma Barranquero <lekktu@gmail.com>
date Mon, 14 Apr 2008 02:00:02 +0000
parents daa208ab5dfb
children eb2d9dfc8486
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1 # compact.awk -- Make charset map compact.
91412
daa208ab5dfb Update copyright years and GPL version.
Glenn Morris <rgm@gnu.org>
parents: 89916
diff changeset
2 # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
3 # National Institute of Advanced Industrial Science and Technology (AIST)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
4 # Registration Number H13PRO009
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
5 #
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
6 # This file is part of GNU Emacs.
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
7 #
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
8 # GNU Emacs is free software; you can redistribute it and/or modify
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
9 # it under the terms of the GNU General Public License as published by
91412
daa208ab5dfb Update copyright years and GPL version.
Glenn Morris <rgm@gnu.org>
parents: 89916
diff changeset
10 # the Free Software Foundation; either version 3, or (at your option)
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
11 # any later version.
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
12 #
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
13 # GNU Emacs is distributed in the hope that it will be useful,
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
16 # GNU General Public License for more details.
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
17 #
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
18 # You should have received a copy of the GNU General Public License
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
19 # along with GNU Emacs; see the file COPYING. If not, write to the
91412
daa208ab5dfb Update copyright years and GPL version.
Glenn Morris <rgm@gnu.org>
parents: 89916
diff changeset
20 # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
daa208ab5dfb Update copyright years and GPL version.
Glenn Morris <rgm@gnu.org>
parents: 89916
diff changeset
21 # Boston, MA 02110-1301, USA.
88123
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
22
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
23 # Comment:
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
24 # Make a charset map compact by changing this kind of line sequence:
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
25 # 0x00 0x0000
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
26 # 0x01 0x0001
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
27 # ...
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
28 # 0x7F 0x007F
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
29 # to one line of this format:
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
30 # 0x00-0x7F 0x0000
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
31
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
32 BEGIN {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
33 tohex["0"] = 1;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
34 tohex["1"] = 2;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
35 tohex["2"] = 3;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
36 tohex["3"] = 4;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
37 tohex["4"] = 5;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
38 tohex["5"] = 6;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
39 tohex["6"] = 7;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
40 tohex["7"] = 8;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
41 tohex["8"] = 9;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
42 tohex["9"] = 10;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
43 tohex["A"] = 11;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
44 tohex["B"] = 12;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
45 tohex["C"] = 13;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
46 tohex["D"] = 14;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
47 tohex["E"] = 15;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
48 tohex["F"] = 16;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
49 tohex["a"] = 11;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
50 tohex["b"] = 12;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
51 tohex["c"] = 13;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
52 tohex["d"] = 14;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
53 tohex["e"] = 15;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
54 tohex["f"] = 16;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
55 from_code = 0;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
56 to_code = -1;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
57 to_unicode = 0;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
58 from_unicode = 0;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
59 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
60
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
61 function decode_hex(str, idx) {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
62 n = 0;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
63 len = length(str);
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
64 for (i = idx; i <= len; i++)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
65 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
66 c = tohex[substr (str, i, 1)];
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
67 if (c == 0)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
68 break;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
69 n = n * 16 + c - 1;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
70 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
71 return n;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
72 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
73
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
74 /^\#/ {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
75 print;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
76 next;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
77 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
78
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
79 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
80 code = decode_hex($1, 3);
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
81 unicode = decode_hex($2, 3);
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
82 if ((code == to_code + 1) && (unicode == to_unicode + 1))
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
83 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
84 to_code++;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
85 to_unicode++;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
86 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
87 else
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
88 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
89 if (to_code < 256)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
90 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
91 if (from_code == to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
92 printf "0x%02X 0x%04X\n", from_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
93 else if (from_code < to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
94 printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
95 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
96 else
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
97 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
98 if (from_code == to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
99 printf "0x%04X 0x%04X\n", from_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
100 else if (from_code < to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
101 printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
102 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
103 from_code = to_code = code;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
104 from_unicode = to_unicode = unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
105 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
106 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
107
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
108 END {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
109 if (to_code < 256)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
110 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
111 if (from_code == to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
112 printf "0x%02X 0x%04X\n", from_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
113 else
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
114 printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
115 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
116 else
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
117 {
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
118 if (from_code == to_code)
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
119 printf "0x%04X 0x%04X\n", from_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
120 else
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
121 printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
122 }
375f2633d815 New directory
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
123 }
89916
e0e4e6a0599f Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents: 88123
diff changeset
124
e0e4e6a0599f Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents: 88123
diff changeset
125 # arch-tag: 7e6f57c3-8e62-4af3-8916-ca67bca3a0ce