Mercurial > emacs
annotate lisp/international/ja-dic-utl.el @ 111410:15a32889e96c
Decode utf-8 strings in mixed environments by default.
Done via the new `erc-coding-system-precedence' variable.
author | Lars Magne Ingebrigtsen <larsi@gnus.org> |
---|---|
date | Fri, 05 Nov 2010 15:17:46 +0100 |
parents | 1d1d5d9bd884 |
children | 376148b31b5e |
rev | line source |
---|---|
38414
67b464da13ec
Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents:
36682
diff
changeset
|
1 ;;; ja-dic-utl.el --- utilities for handling Japanese dictionary (SKK-JISYO.L) |
31163 | 2 |
74605
6ee41fdd69ff
Update AIST copyright years.
Kenichi Handa <handa@m17n.org>
parents:
74544
diff
changeset
|
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
106815 | 4 ;; 2005, 2006, 2007, 2008, 2009, 2010 |
62274 | 5 ;; National Institute of Advanced Industrial Science and Technology (AIST) |
6 ;; Registration Number H14PRO021 | |
31163 | 7 |
106256
9be41d3d415d
* international/isearch-x.el, international/ja-dic-cnv.el,
Kevin Ryde <user42@zip.com.au>
parents:
100908
diff
changeset
|
8 ;; Keywords: i18n, mule, multilingual, Japanese |
31163 | 9 |
10 ;; This file is part of GNU Emacs. | |
11 | |
94664
889bc336b89b
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
12 ;; GNU Emacs is free software: you can redistribute it and/or modify |
31163 | 13 ;; it under the terms of the GNU General Public License as published by |
94664
889bc336b89b
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
14 ;; the Free Software Foundation, either version 3 of the License, or |
889bc336b89b
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
15 ;; (at your option) any later version. |
31163 | 16 |
17 ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ;; GNU General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
94664
889bc336b89b
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
31163 | 24 |
25 ;;; Commentary: | |
26 | |
27 ;; This file provides a generic function to look up a Japanese | |
28 ;; dictionary of SKK format. | |
29 ;; | |
30 ;; SKK is a free Japanese input method running on Mule created by | |
31 ;; Masahiko Sato <masahiko@sato.riec.tohoku.ac.jp>. The Emacs Lisp | |
32 ;; library kkc.el provides a facility to convert a Japanese kana | |
33 ;; string to a kanji-kana-mixed string by using SKK's dictionary. | |
34 ;; | |
35 ;; The original SKK dictionary SKK-JISYO.L is converted to ja-dic.el | |
36 ;; by ja-dic-cnv.el. We get entries of the dictionary in four | |
37 ;; variables (listed below) by loading that file (or byte-compiled | |
38 ;; version ja-dic.elc). | |
39 | |
40 ;;; Code: | |
41 | |
42 ;; The following four variables are set by loading ja-dic.el[c]. | |
43 (defvar skkdic-okuri-ari nil | |
44 "Nested alist for OKURI-ARI entries of SKK dictionary.") | |
45 | |
46 (defvar skkdic-postfix nil | |
47 "Nested alist for SETSUBIJI (postfix) entries of SKK dictionary.") | |
48 | |
49 (defvar skkdic-prefix nil | |
50 "Nested alist SETTOUJI (prefix) entries of SKK dictionary.") | |
51 | |
52 (defvar skkdic-okuri-nasi nil | |
53 "Nested alist for OKURI-NASI entries of SKK dictionary.") | |
54 | |
55 (defconst skkdic-okurigana-table | |
56 '((?$B$!(B . ?a) (?$B$"(B . ?a) (?$B$#(B . ?i) (?$B$$(B . ?i) (?$B$%(B . ?u) | |
57 (?$B$&(B . ?u) (?$B$'(B . ?e) (?$B$((B . ?e) (?$B$)(B . ?o) (?$B$*(B . ?o) | |
58 (?$B$+(B . ?k) (?$B$,(B . ?g) (?$B$-(B . ?k) (?$B$.(B . ?g) (?$B$/(B . ?k) | |
59 (?$B$0(B . ?g) (?$B$1(B . ?k) (?$B$2(B . ?g) (?$B$3(B . ?k) (?$B$4(B . ?g) | |
60 (?$B$5(B . ?s) (?$B$6(B . ?z) (?$B$7(B . ?s) (?$B$8(B . ?j) (?$B$9(B . ?s) | |
61 (?$B$:(B . ?z) (?$B$;(B . ?s) (?$B$<(B . ?z) (?$B$=(B . ?s) (?$B$>(B . ?z) | |
62 (?$B$?(B . ?t) (?$B$@(B . ?d) (?$B$A(B . ?t) (?$B$B(B . ?d) (?$B$C(B . ?t) | |
63 (?$B$D(B . ?t) (?$B$E(B . ?d) (?$B$F(B . ?t) (?$B$G(B . ?d) (?$B$H(B . ?t) (?$B$I(B . ?d) | |
64 (?$B$J(B . ?n) (?$B$K(B . ?n) (?$B$L(B . ?n) (?$B$M(B . ?n) (?$B$N(B . ?n) | |
65 (?$B$O(B . ?h) (?$B$P(B . ?b) (?$B$Q(B . ?p) (?$B$R(B . ?h) (?$B$S(B . ?b) | |
66 (?$B$T(B . ?p) (?$B$U(B . ?h) (?$B$V(B . ?b) (?$B$W(B . ?p) (?$B$X(B . ?h) | |
67 (?$B$Y(B . ?b) (?$B$Z(B . ?p) (?$B$[(B . ?h) (?$B$\(B . ?b) (?$B$](B . ?p) | |
68 (?$B$^(B . ?m) (?$B$_(B . ?m) (?$B$`(B . ?m) (?$B$a(B . ?m) (?$B$b(B . ?m) | |
69 (?$B$c(B . ?y) (?$B$d(B . ?y) (?$B$e(B . ?y) (?$B$f(B . ?y) (?$B$g(B . ?y) (?$B$h(B . ?y) | |
70 (?$B$i(B . ?r) (?$B$j(B . ?r) (?$B$k(B . ?r) (?$B$l(B . ?r) (?$B$m(B . ?r) | |
71 (?$B$o(B . ?w) (?$B$p(B . ?w) (?$B$q(B . ?w) (?$B$r(B . ?w) | |
72 (?$B$s(B . ?n) | |
73 ) | |
74 "Alist of Okuriganas vs trailing ASCII letters in OKURI-ARI entry.") | |
75 | |
76 (defun skkdic-merge-head-and-tail (heads tails postfix) | |
77 (let ((min-len 2) | |
78 l) | |
79 (while heads | |
80 (if (or (not postfix) | |
81 (>= (length (car heads)) min-len)) | |
82 (let ((tail tails)) | |
83 (while tail | |
84 (if (or postfix | |
85 (>= (length (car tail)) min-len)) | |
86 (setq l (cons (concat (car heads) (car tail)) l))) | |
87 (setq tail (cdr tail))))) | |
88 (setq heads (cdr heads))) | |
89 l)) | |
90 | |
88518
914548535d25
(skkdic-jisx0208-hiragana-block):
Kenichi Handa <handa@m17n.org>
parents:
88407
diff
changeset
|
91 (defconst skkdic-jisx0208-hiragana-block |
914548535d25
(skkdic-jisx0208-hiragana-block):
Kenichi Handa <handa@m17n.org>
parents:
88407
diff
changeset
|
92 (cons (decode-char 'japanese-jisx0208 #x2421) |
914548535d25
(skkdic-jisx0208-hiragana-block):
Kenichi Handa <handa@m17n.org>
parents:
88407
diff
changeset
|
93 (decode-char 'japanese-jisx0208 #x247E))) |
31163 | 94 |
95 (defun skkdic-lookup-key (seq len &optional postfix prefer-noun) | |
96 "Return a list of conversion string for sequence SEQ of length LEN. | |
97 | |
98 SEQ is a vector of Kana characters to be converted by SKK dictionary. | |
99 If LEN is shorter than the length of KEYSEQ, the first LEN keys in SEQ | |
100 are took into account. | |
101 | |
102 Optional 3rd arg POSTFIX non-nil means SETSUBIJI (postfix) are also | |
103 considered to find conversion strings. | |
104 | |
105 Optional 4th arg PREFER-NOUN non-nil means that the conversions | |
106 without okurigana are placed at the head of the returned list." | |
107 (or skkdic-okuri-nasi | |
108 (condition-case err | |
109 (load-library "ja-dic/ja-dic") | |
110 (error (ding) | |
111 (with-output-to-temp-buffer "*Help*" | |
112 (princ "The library `ja-dic' can't be loaded. | |
113 | |
114 The most common case is that you have not yet installed the library | |
115 included in LEIM (Libraries of Emacs Input Method) which is | |
116 distributed separately from Emacs. | |
117 | |
118 LEIM is available from the same ftp directory as Emacs.")) | |
119 (signal (car err) (cdr err))))) | |
120 | |
121 (let ((vec (make-vector len 0)) | |
122 (i 0) | |
123 entry) | |
124 ;; At first, generate vector VEC from SEQ for looking up SKK | |
125 ;; alists. Nth element in VEC corresponds to Nth element in SEQ. | |
126 ;; The values are decided as follows. | |
127 ;; If SEQ[N] is `$B!<(B', VEC[N] is 0, | |
128 ;; else if SEQ[N] is a Hiragana character, VEC[N] is: | |
129 ;; ((The 2nd position code of SEQ[N]) - 32), | |
130 ;; else VEC[N] is 128. | |
131 (while (< i len) | |
132 (let ((ch (aref seq i)) | |
88407
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
133 code) |
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
134 (cond ((= ch ?$B!<(B) |
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
135 (aset vec i 0)) |
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
136 ((and (>= ch (car skkdic-jisx0208-hiragana-block)) |
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
137 (<= ch (cdr skkdic-jisx0208-hiragana-block))) |
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
138 (setq code (encode-char ch 'japanese-jisx0208)) |
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
139 (if code |
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
140 (aset vec i (- (logand code #xFF) 32)) |
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
141 (aset vec i 128))) |
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
142 (t |
9ae36aa886d5
(skkdic-jisx0208-hiragana-block): Value changed.
Kenichi Handa <handa@m17n.org>
parents:
38414
diff
changeset
|
143 (aset vec i 128)))) |
31163 | 144 (setq i (1+ i))) |
145 | |
146 ;; Search OKURI-NASI entries. | |
147 (setq entry (lookup-nested-alist vec skkdic-okuri-nasi len 0 t)) | |
148 (if (consp (car entry)) | |
149 (setq entry (copy-sequence (car entry))) | |
150 (setq entry nil)) | |
151 | |
152 (if postfix | |
153 ;; Search OKURI-NASI entries with postfixes. | |
154 (let ((break (max (- len (car skkdic-postfix)) 1)) | |
155 entry-head entry-postfix entry2) | |
156 (while (< break len) | |
157 (if (and (setq entry-head | |
158 (lookup-nested-alist vec skkdic-okuri-nasi | |
159 break 0 t)) | |
160 (consp (car entry-head)) | |
161 (setq entry-postfix | |
162 (lookup-nested-alist vec skkdic-postfix | |
163 len break t)) | |
164 (consp (car entry-postfix)) | |
165 (setq entry2 (skkdic-merge-head-and-tail | |
166 (car entry-head) (car entry-postfix) t))) | |
167 (if entry | |
168 (nconc entry entry2) | |
169 (setq entry entry2))) | |
170 (setq break (1+ break))))) | |
171 | |
172 ;; Search OKURI-NASI entries with prefixes. | |
173 (let ((break (min (car skkdic-prefix) (- len 2))) | |
174 entry-prefix entry-tail entry2) | |
175 (while (> break 0) | |
176 (if (and (setq entry-prefix | |
177 (lookup-nested-alist vec skkdic-prefix break 0 t)) | |
178 (consp (car entry-prefix)) | |
179 (setq entry-tail | |
180 (lookup-nested-alist vec skkdic-okuri-nasi len break t)) | |
181 (consp (car entry-tail)) | |
182 (setq entry2 (skkdic-merge-head-and-tail | |
183 (car entry-prefix) (car entry-tail) nil))) | |
184 (progn | |
185 (if entry | |
186 (nconc entry entry2) | |
187 (setq entry entry2)))) | |
188 (setq break (1- break)))) | |
189 | |
190 ;; Search OKURI-ARI entries. | |
191 (let ((okurigana (assq (aref seq (1- len)) skkdic-okurigana-table)) | |
192 orig-element entry2) | |
193 (if okurigana | |
194 (progn | |
195 (setq orig-element (aref vec (1- len))) | |
196 (aset vec (1- len) (- (cdr okurigana))) | |
197 (if (and (setq entry2 (lookup-nested-alist vec skkdic-okuri-ari | |
198 len 0 t)) | |
199 (consp (car entry2))) | |
200 (progn | |
201 (setq entry2 (copy-sequence (car entry2))) | |
202 (let ((l entry2) | |
203 (okuri (char-to-string (aref seq (1- len))))) | |
204 (while l | |
205 (setcar l (concat (car l) okuri)) | |
206 (setq l (cdr l))) | |
207 (if entry | |
208 (if prefer-noun | |
209 (nconc entry entry2) | |
210 (setq entry2 (nreverse entry2)) | |
211 (nconc entry2 entry) | |
212 (setq entry entry2)) | |
213 (setq entry (nreverse entry2)))))) | |
214 (aset vec (1- len) orig-element)))) | |
215 | |
216 entry)) | |
217 | |
218 ;; | |
219 (provide 'ja-dic-utl) | |
220 | |
36682
8adcbdf9202c
Add coding: tag in Local Variable: section.
Kenichi Handa <handa@m17n.org>
parents:
31163
diff
changeset
|
221 ;; Local Variables: |
8adcbdf9202c
Add coding: tag in Local Variable: section.
Kenichi Handa <handa@m17n.org>
parents:
31163
diff
changeset
|
222 ;; coding: iso-2022-7bit |
8adcbdf9202c
Add coding: tag in Local Variable: section.
Kenichi Handa <handa@m17n.org>
parents:
31163
diff
changeset
|
223 ;; End: |
38414
67b464da13ec
Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents:
36682
diff
changeset
|
224 |
93975
1e3a407766b9
Fix up comment convention on the arch-tag lines.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91327
diff
changeset
|
225 ;; arch-tag: df2218fa-469c-40f6-bace-7f89a053f9c0 |
38414
67b464da13ec
Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents:
36682
diff
changeset
|
226 ;;; ja-dic-utl.el ends here |