Mercurial > emacs
comparison admin/charsets/eucjp-ms.awk @ 103387:46d7fd5d4fe4
Mostly re-written to handle glibc's EUC-JP-MS.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 12 Jun 2009 07:22:13 +0000 |
parents | ce88a631c161 |
children | 1d1d5d9bd884 |
comparison
equal
deleted
inserted
replaced
103386:afce89bcee41 | 103387:46d7fd5d4fe4 |
---|---|
19 # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | 19 # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
20 | 20 |
21 # Commentary: | 21 # Commentary: |
22 | 22 |
23 # eucJP-ms is one of eucJP-open encoding defined at this page: | 23 # eucJP-ms is one of eucJP-open encoding defined at this page: |
24 # http://www.opengroup.or.jp/jvc/cde/appendix.html | 24 # http://home.m05.itscom.net/numa/cde/ucs-conv/appendix.html |
25 # This program reads the mapping file EUC-JP-MS (of glibc) and | |
26 # generates the Elisp file eucjp-ms.el that defines two translation | |
27 # tables `eucjp-ms-decode' and `eucjp-ms-encode'. | |
25 | 28 |
26 BEGIN { | 29 BEGIN { |
30 FS = "[ \t][ \t]*" | |
31 | |
32 # STATE: 0/ignore, 1/JISX0208, 2/JISX0208 target range | |
33 # 3/JISX0212 4/JISX0212 target range | |
34 state = 0; | |
35 | |
36 JISX0208_FROM1 = "/xad/xa1"; | |
37 JISX0208_TO1 = "/xad/xfc"; | |
38 JISX0208_FROM2 = "/xf5/xa1"; | |
39 JISX0212_FROM = "/x8f/xf3/xf3"; | |
40 | |
27 print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-"; | 41 print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-"; |
28 print ";;; Automatically genrated from eucJP-13th.txt, eucJP-udc.txt, eucJP-ibmext.txt"; | 42 print ";;; Automatically generated from /usr/share/i18n/charmaps/EUC-JP-MS.gz"; |
29 print "(let ((map"; | 43 print "(let ((map"; |
30 printf " '(;JISEXT<->UNICODE"; | 44 print " '(;JISEXT<->UNICODE"; |
31 | |
32 tohex["A"] = 10; | |
33 tohex["B"] = 11; | |
34 tohex["C"] = 12; | |
35 tohex["D"] = 13; | |
36 tohex["E"] = 14; | |
37 tohex["F"] = 15; | |
38 } | 45 } |
39 | 46 |
40 function decode_hex(str) { | 47 function write_entry (unicode) { |
41 n = 0; | 48 if (state == 1) { |
42 len = length(str); | 49 if ($2 == JISX0208_FROM1 || $2 == JISX0208_FROM2) |
43 for (i = 1; i <= len; i++) | 50 state = 2; |
44 { | 51 } else if (state == 3) { |
45 c = substr(str, i, 1); | 52 if ($2 == JISX0212_FROM) |
46 if (c >= "0" && c <= "9") | 53 state = 4; |
47 n = n * 16 + (c - "0"); | |
48 else | |
49 n = n * 16 + tohex[c]; | |
50 } | 54 } |
51 return n; | 55 if (state == 2) { |
56 jis = $2 | |
57 gsub("/x", "", jis); | |
58 printf "\n (#x%s . #x%s)", jis, unicode; | |
59 if ($2 == JISX0208_TO1) | |
60 state = 1; | |
61 } else if (state == 4) { | |
62 jis = substr($2, 5, 8); | |
63 gsub("/x", "", jis); | |
64 printf "\n (#x%s #x%s)", jis, unicode; | |
65 } | |
52 } | 66 } |
53 | 67 |
54 /0x8F/ { | 68 |
55 code = decode_hex(substr($1, 5, 4)); | 69 /^% JIS X 0208/ { |
56 code -= 32896; # code -= 0x8080 | 70 state = 1; |
57 printf "\n (#x%04x #x%s)", code, substr($2, 3, 4); | 71 next; |
58 next; | |
59 } | 72 } |
60 | 73 |
61 /0x[A-F]/ { | 74 /^% JIS X 0212/ { |
62 code = decode_hex(substr($1, 3, 4)); | 75 state = 3; |
63 code -= 32896; # code -= 0x8080 | 76 next; |
64 printf "\n (#x%04x . #x%s)", code, substr($2, 3, 4); | 77 } |
78 | |
79 /^END CHARMAP/ { | |
80 state = 0; | |
81 next; | |
82 } | |
83 | |
84 /^<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ { | |
85 if (state > 0) | |
86 write_entry(substr($1, 3, 4)); | |
87 } | |
88 | |
89 /^%IRREVERSIBLE%<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ { | |
90 if (state > 0) | |
91 write_entry(substr($1, 17, 4)); | |
65 } | 92 } |
66 | 93 |
67 END { | 94 END { |
68 print ")))"; | 95 print ")))"; |
69 print " (mapc #'(lambda (x)"; | 96 print " (mapc #'(lambda (x)"; |
70 print " (if (integerp (cdr x))"; | 97 print " (let ((code (logand (car x) #x7F7F)))"; |
71 print " (setcar x (decode-char 'japanese-jisx0208 (car x)))"; | 98 print " (if (integerp (cdr x))"; |
72 print " (setcar x (decode-char 'japanese-jisx0212 (car x)))"; | 99 print " (setcar x (decode-char 'japanese-jisx0208 code))"; |
73 print " (setcdr x (cadr x))))"; | 100 print " (setcar x (decode-char 'japanese-jisx0212 code))"; |
101 print " (setcdr x (cadr x)))))"; | |
74 print " map)"; | 102 print " map)"; |
75 print " (define-translation-table 'eucjp-ms-decode map)"; | 103 print " (define-translation-table 'eucjp-ms-decode map)"; |
76 print " (mapc #'(lambda (x)"; | 104 print " (mapc #'(lambda (x)"; |
77 print " (let ((tmp (car x)))"; | 105 print " (let ((tmp (car x)))"; |
78 print " (setcar x (cdr x)) (setcdr x tmp)))"; | 106 print " (setcar x (cdr x)) (setcdr x tmp)))"; |
79 print " map)"; | 107 print " map)"; |
80 print " (define-translation-table 'eucjp-ms-encode map))"; | 108 print " (define-translation-table 'eucjp-ms-encode map))"; |
109 print ""; | |
110 print ";; arch-tag: c4191096-288a-4f13-9b2a-ee7a1f11eb4a"; | |
81 } | 111 } |
82 | 112 |
83 # arch-tag: d9cc7af7-2d6e-48cd-8eed-a6d25226de7c | 113 # arch-tag: d9cc7af7-2d6e-48cd-8eed-a6d25226de7c |