Mercurial > emacs
annotate admin/charsets/eucjp-ms.awk @ 106495:cf43d025c1b9
*** empty log message ***
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 09 Dec 2009 01:02:11 +0000 |
parents | 46d7fd5d4fe4 |
children | 1d1d5d9bd884 |
rev | line source |
---|---|
89750 | 1 # eucjp-ms.awk -- Generate a translation table for eucJP-ms. |
100971 | 2 # Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 |
89750 | 3 # National Institute of Advanced Industrial Science and Technology (AIST) |
4 # Registration Number H13PRO009 | |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91415
diff
changeset
|
5 |
89750 | 6 # This file is part of GNU Emacs. |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91415
diff
changeset
|
7 |
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91415
diff
changeset
|
8 # GNU Emacs is free software: you can redistribute it and/or modify |
89750 | 9 # it under the terms of the GNU General Public License as published by |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91415
diff
changeset
|
10 # the Free Software Foundation, either version 3 of the License, or |
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91415
diff
changeset
|
11 # (at your option) any later version. |
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91415
diff
changeset
|
12 |
89750 | 13 # GNU Emacs is distributed in the hope that it will be useful, |
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 # GNU General Public License for more details. | |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91415
diff
changeset
|
17 |
89750 | 18 # You should have received a copy of the GNU General Public License |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91415
diff
changeset
|
19 # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
89750 | 20 |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91415
diff
changeset
|
21 # Commentary: |
89750 | 22 |
23 # eucJP-ms is one of eucJP-open encoding defined at this page: | |
103387
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
24 # http://home.m05.itscom.net/numa/cde/ucs-conv/appendix.html |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
25 # This program reads the mapping file EUC-JP-MS (of glibc) and |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
26 # generates the Elisp file eucjp-ms.el that defines two translation |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
27 # tables `eucjp-ms-decode' and `eucjp-ms-encode'. |
89750 | 28 |
29 BEGIN { | |
103387
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
30 FS = "[ \t][ \t]*" |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
31 |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
32 # STATE: 0/ignore, 1/JISX0208, 2/JISX0208 target range |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
33 # 3/JISX0212 4/JISX0212 target range |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
34 state = 0; |
89750 | 35 |
103387
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
36 JISX0208_FROM1 = "/xad/xa1"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
37 JISX0208_TO1 = "/xad/xfc"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
38 JISX0208_FROM2 = "/xf5/xa1"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
39 JISX0212_FROM = "/x8f/xf3/xf3"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
40 |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
41 print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
42 print ";;; Automatically generated from /usr/share/i18n/charmaps/EUC-JP-MS.gz"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
43 print "(let ((map"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
44 print " '(;JISEXT<->UNICODE"; |
89750 | 45 } |
46 | |
103387
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
47 function write_entry (unicode) { |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
48 if (state == 1) { |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
49 if ($2 == JISX0208_FROM1 || $2 == JISX0208_FROM2) |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
50 state = 2; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
51 } else if (state == 3) { |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
52 if ($2 == JISX0212_FROM) |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
53 state = 4; |
89750 | 54 } |
103387
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
55 if (state == 2) { |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
56 jis = $2 |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
57 gsub("/x", "", jis); |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
58 printf "\n (#x%s . #x%s)", jis, unicode; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
59 if ($2 == JISX0208_TO1) |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
60 state = 1; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
61 } else if (state == 4) { |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
62 jis = substr($2, 5, 8); |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
63 gsub("/x", "", jis); |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
64 printf "\n (#x%s #x%s)", jis, unicode; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
65 } |
89750 | 66 } |
67 | |
103387
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
68 |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
69 /^% JIS X 0208/ { |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
70 state = 1; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
71 next; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
72 } |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
73 |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
74 /^% JIS X 0212/ { |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
75 state = 3; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
76 next; |
89750 | 77 } |
78 | |
103387
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
79 /^END CHARMAP/ { |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
80 state = 0; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
81 next; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
82 } |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
83 |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
84 /^<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ { |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
85 if (state > 0) |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
86 write_entry(substr($1, 3, 4)); |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
87 } |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
88 |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
89 /^%IRREVERSIBLE%<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ { |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
90 if (state > 0) |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
91 write_entry(substr($1, 17, 4)); |
89750 | 92 } |
93 | |
94 END { | |
95 print ")))"; | |
96 print " (mapc #'(lambda (x)"; | |
103387
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
97 print " (let ((code (logand (car x) #x7F7F)))"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
98 print " (if (integerp (cdr x))"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
99 print " (setcar x (decode-char 'japanese-jisx0208 code))"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
100 print " (setcar x (decode-char 'japanese-jisx0212 code))"; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
101 print " (setcdr x (cadr x)))))"; |
89750 | 102 print " map)"; |
103 print " (define-translation-table 'eucjp-ms-decode map)"; | |
104 print " (mapc #'(lambda (x)"; | |
105 print " (let ((tmp (car x)))"; | |
106 print " (setcar x (cdr x)) (setcdr x tmp)))"; | |
107 print " map)"; | |
108 print " (define-translation-table 'eucjp-ms-encode map))"; | |
103387
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
109 print ""; |
46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
Kenichi Handa <handa@m17n.org>
parents:
100971
diff
changeset
|
110 print ";; arch-tag: c4191096-288a-4f13-9b2a-ee7a1f11eb4a"; |
89750 | 111 } |
89916
e0e4e6a0599f
Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents:
89750
diff
changeset
|
112 |
e0e4e6a0599f
Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents:
89750
diff
changeset
|
113 # arch-tag: d9cc7af7-2d6e-48cd-8eed-a6d25226de7c |