annotate admin/charsets/eucjp-ms.awk @ 105270:3c67a7fe36f8

*** empty log message ***
author Juanma Barranquero <lekktu@gmail.com>
date Tue, 29 Sep 2009 01:01:43 +0000
parents 46d7fd5d4fe4
children 1d1d5d9bd884
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1 # eucjp-ms.awk -- Generate a translation table for eucJP-ms.
100971
ce88a631c161 Add 2009 to copyright years.
Glenn Morris <rgm@gnu.org>
parents: 94832
diff changeset
2 # Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
3 # National Institute of Advanced Industrial Science and Technology (AIST)
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
4 # Registration Number H13PRO009
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91415
diff changeset
5
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
6 # This file is part of GNU Emacs.
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91415
diff changeset
7
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91415
diff changeset
8 # GNU Emacs is free software: you can redistribute it and/or modify
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
9 # it under the terms of the GNU General Public License as published by
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91415
diff changeset
10 # the Free Software Foundation, either version 3 of the License, or
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91415
diff changeset
11 # (at your option) any later version.
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91415
diff changeset
12
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
13 # GNU Emacs is distributed in the hope that it will be useful,
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
16 # GNU General Public License for more details.
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91415
diff changeset
17
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
18 # You should have received a copy of the GNU General Public License
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91415
diff changeset
19 # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
20
94832
eb2d9dfc8486 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 91415
diff changeset
21 # Commentary:
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
22
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
23 # eucJP-ms is one of eucJP-open encoding defined at this page:
103387
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
24 # http://home.m05.itscom.net/numa/cde/ucs-conv/appendix.html
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
25 # This program reads the mapping file EUC-JP-MS (of glibc) and
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
26 # generates the Elisp file eucjp-ms.el that defines two translation
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
27 # tables `eucjp-ms-decode' and `eucjp-ms-encode'.
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
28
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
29 BEGIN {
103387
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
30 FS = "[ \t][ \t]*"
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
31
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
32 # STATE: 0/ignore, 1/JISX0208, 2/JISX0208 target range
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
33 # 3/JISX0212 4/JISX0212 target range
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
34 state = 0;
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
35
103387
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
36 JISX0208_FROM1 = "/xad/xa1";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
37 JISX0208_TO1 = "/xad/xfc";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
38 JISX0208_FROM2 = "/xf5/xa1";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
39 JISX0212_FROM = "/x8f/xf3/xf3";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
40
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
41 print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
42 print ";;; Automatically generated from /usr/share/i18n/charmaps/EUC-JP-MS.gz";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
43 print "(let ((map";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
44 print " '(;JISEXT<->UNICODE";
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
45 }
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
46
103387
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
47 function write_entry (unicode) {
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
48 if (state == 1) {
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
49 if ($2 == JISX0208_FROM1 || $2 == JISX0208_FROM2)
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
50 state = 2;
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
51 } else if (state == 3) {
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
52 if ($2 == JISX0212_FROM)
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
53 state = 4;
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
54 }
103387
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
55 if (state == 2) {
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
56 jis = $2
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
57 gsub("/x", "", jis);
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
58 printf "\n (#x%s . #x%s)", jis, unicode;
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
59 if ($2 == JISX0208_TO1)
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
60 state = 1;
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
61 } else if (state == 4) {
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
62 jis = substr($2, 5, 8);
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
63 gsub("/x", "", jis);
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
64 printf "\n (#x%s #x%s)", jis, unicode;
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
65 }
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
66 }
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
67
103387
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
68
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
69 /^% JIS X 0208/ {
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
70 state = 1;
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
71 next;
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
72 }
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
73
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
74 /^% JIS X 0212/ {
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
75 state = 3;
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
76 next;
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
77 }
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
78
103387
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
79 /^END CHARMAP/ {
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
80 state = 0;
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
81 next;
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
82 }
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
83
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
84 /^<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ {
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
85 if (state > 0)
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
86 write_entry(substr($1, 3, 4));
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
87 }
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
88
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
89 /^%IRREVERSIBLE%<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ {
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
90 if (state > 0)
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
91 write_entry(substr($1, 17, 4));
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
92 }
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
93
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
94 END {
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
95 print ")))";
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
96 print " (mapc #'(lambda (x)";
103387
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
97 print " (let ((code (logand (car x) #x7F7F)))";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
98 print " (if (integerp (cdr x))";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
99 print " (setcar x (decode-char 'japanese-jisx0208 code))";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
100 print " (setcar x (decode-char 'japanese-jisx0212 code))";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
101 print " (setcdr x (cadr x)))))";
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
102 print " map)";
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
103 print " (define-translation-table 'eucjp-ms-decode map)";
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
104 print " (mapc #'(lambda (x)";
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
105 print " (let ((tmp (car x)))";
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
106 print " (setcar x (cdr x)) (setcdr x tmp)))";
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
107 print " map)";
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
108 print " (define-translation-table 'eucjp-ms-encode map))";
103387
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
109 print "";
46d7fd5d4fe4 Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
110 print ";; arch-tag: c4191096-288a-4f13-9b2a-ee7a1f11eb4a";
89750
d6d9f1efd684 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
111 }
89916
e0e4e6a0599f Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents: 89750
diff changeset
112
e0e4e6a0599f Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents: 89750
diff changeset
113 # arch-tag: d9cc7af7-2d6e-48cd-8eed-a6d25226de7c