46670
|
1 ;;; titdic-cnv.el --- convert cxterm dictionary (TIT format) to Quail package -*- coding:iso-2022-7bit; -*-
|
17052
|
2
|
62274
|
3 ;; Copyright (C) 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
|
|
4 ;; Copyright (C) 1995, 1997, 1998, 2000, 2001, 2002
|
|
5 ;; National Institute of Advanced Industrial Science and Technology (AIST)
|
|
6 ;; Registration Number H14PRO021
|
17052
|
7
|
|
8 ;; Keywords: Quail, TIT, cxterm
|
|
9
|
|
10 ;; This file is part of GNU Emacs.
|
|
11
|
|
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
13 ;; it under the terms of the GNU General Public License as published by
|
|
14 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
15 ;; any later version.
|
|
16
|
|
17 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
20 ;; GNU General Public License for more details.
|
|
21
|
|
22 ;; You should have received a copy of the GNU General Public License
|
17071
|
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
64085
|
24 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
25 ;; Boston, MA 02110-1301, USA.
|
17052
|
26
|
38414
|
27 ;;; Commentary:
|
17052
|
28
|
18555
|
29 ;; Convert cxterm dictionary (of TIT format) to quail-package.
|
17052
|
30 ;;
|
|
31 ;; Usage (within Emacs):
|
18555
|
32 ;; M-x titdic-convert<CR>CXTERM-DICTIONARY-NAME<CR>
|
17052
|
33 ;; Usage (from shell):
|
18555
|
34 ;; % emacs -batch -l titdic-cnv -f batch-titdic-convert\
|
17052
|
35 ;; [-dir DIR] [DIR | FILE] ...
|
|
36 ;;
|
|
37 ;; When you run titdic-convert within Emacs, you have a chance to
|
|
38 ;; modify arguments of `quail-define-package' before saving the
|
|
39 ;; converted file. For instance, you are likely to modify TITLE,
|
|
40 ;; DOCSTRING, and KEY-BINDINGS.
|
|
41
|
18555
|
42 ;; Cxterm dictionary file (*.tit) is a line-oriented text (English,
|
17052
|
43 ;; Chinese, Japanese, and Korean) file. The whole file contains of
|
|
44 ;; two parts, the definition part (`header' here after) followed by
|
|
45 ;; the dictionary part (`body' here after). All lines begin with
|
|
46 ;; leading '#' are ignored.
|
|
47 ;;
|
|
48 ;; Each line in the header part has two fields, KEY and VALUE. These
|
|
49 ;; fields are separated by one or more white characters.
|
|
50 ;;
|
|
51 ;; Each line in the body part has two fields, KEYSEQ and TRANSLATIONS.
|
|
52 ;; These fields are separated by one or more white characters.
|
|
53 ;;
|
|
54 ;; See the manual page of `tit2cit' of cxterm distribution for more
|
|
55 ;; detail.
|
37116
|
56 ;;
|
37261
|
57 ;; Near the end of this file, we also have a few other tools to convert
|
37116
|
58 ;; miscellaneous dictionaries.
|
17052
|
59
|
|
60 ;;; Code:
|
|
61
|
|
62 (require 'quail)
|
|
63
|
18555
|
64 ;; List of values of key "ENCODE:" and the corresponding Emacs
|
17052
|
65 ;; coding-system and language environment name.
|
|
66 (defvar tit-encode-list
|
17098
|
67 '(("GB" euc-china "Chinese-GB")
|
|
68 ("BIG5" cn-big5 "Chinese-BIG5")
|
|
69 ("JIS" euc-japan "Japanese")
|
18555
|
70 ("KS" euc-kr "Korean")))
|
|
71
|
31548
|
72 ;; Alist of input method names and the corresponding title and extra
|
|
73 ;; docstring. For each of input method generated from TIT dictionary,
|
|
74 ;; a docstring is automatically generated from the comments in the
|
|
75 ;; dictionary. The extra docstring in this alist is to add more
|
|
76 ;; information.
|
|
77 ;; The command describe-input-method shows the automatically generated
|
46670
|
78 ;; docstring, then an extra docstring while replacing the form \<VAR>
|
31548
|
79 ;; by the value of variable VAR. For instance, the form
|
|
80 ;; \<quail-translation-docstring> is replaced by a description about
|
|
81 ;; how to select a translation from a list of candidates.
|
|
82
|
31425
|
83 (defvar quail-cxterm-package-ext-info
|
|
84 '(("chinese-4corner" "$(0(?-F(B")
|
|
85 ("chinese-array30" "$(0#R#O(B")
|
31548
|
86 ("chinese-ccdospy" "$AKuF4(B"
|
|
87 "Pinyin base input method for Chinese charset GB2312 \(`chinese-gb2312').
|
|
88
|
46670
|
89 Pinyin is the standard Roman transliteration method for Chinese.
|
31548
|
90 For the detail of Pinyin system, see the documentation of the input
|
|
91 method `chinese-py'.
|
|
92
|
|
93 This input method works almost the same way as `chinese-py'. The
|
|
94 difference is that you type a single key for these Pinyin spelling.
|
|
95 Pinyin: zh en eng ang ch an ao ai ong sh ing yu($A(9(B)
|
|
96 keyseq: a f g h i j k l s u y v
|
46670
|
97 For example:
|
31548
|
98 Chinese: $A0!(B $A9{(B $AVP(B $AND(B $A9b(B $ASq(B $AH+(B
|
|
99 Pinyin: a guo zhong wen guang yu quan
|
|
100 Keyseq: a1 guo4 as1 wf4 guh1 yu..6 qvj6
|
|
101
|
|
102 \\<quail-translation-docstring>
|
|
103
|
|
104 For double-width GB2312 characters correponding to ASCII, use the
|
|
105 input method `chinese-qj'.")
|
|
106
|
|
107 ("chinese-ecdict" "$(05CKH(B"
|
|
108 "In this input method, you enter a Chinese (Big5) charactere or word
|
|
109 by typing the corresponding English word. For example, if you type
|
|
110 \"computer\", \"$(0IZH+(B\" is input.
|
|
111
|
|
112 \\<quail-translation-docstring>")
|
|
113
|
|
114 ("chinese-etzy" "$(06/0D(B"
|
|
115 "Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
|
|
116 `chinese-big5-2').
|
|
117
|
|
118 Zhuyin is a kind of phonetic symbol. One to three Zhuyin symbols
|
|
119 compose one Chinese character.
|
|
120
|
|
121 In this input method, you enter a Chinese character by first typing
|
|
122 keys corresponding to Zhuyin symbols (see the above table) followed by
|
|
123 SPC, 1, 2, 3, or 4 specifing a tone (SPC:$(0?v(N(B, 1:$(0M=Vy(B, 2:$(0Dm(N(B, 3: $(0&9Vy(B,
|
|
124 4:$(0(+Vy(B).
|
|
125
|
|
126 \\<quail-translation-docstring>")
|
31425
|
127
|
|
128 ("chinese-punct-b5" "$(0O:(BB"
|
|
129 "Input method for Chinese punctuations and symbols of Big5
|
|
130 \(`chinese-big5-1' and `chinese-big5-2').")
|
|
131
|
|
132 ("chinese-punct" "$A1j(BG"
|
|
133 "Input method for Chinese punctuations and symbols of GB2312
|
|
134 \(`chinese-gb2312').")
|
|
135
|
|
136 ("chinese-py-b5" "$(03<(BB"
|
|
137 "Pinyin base input method for Chinese Big5 characters
|
|
138 \(`chinese-big5-1', `chinese-big5-2').
|
|
139
|
|
140 This input method works almost the same way as `chinese-py' (which
|
|
141 see).
|
|
142
|
|
143 This input method supports only Han characters. The more convenient
|
31481
|
144 method is `chinese-py-punct-b5', which is the combination of this
|
|
145 method and `chinese-punct-b5' and which supports both Han characters
|
|
146 and punctuation/symbols.
|
31425
|
147
|
31481
|
148 For double-width Big5 characters corresponding to ASCII, use the input
|
31425
|
149 method `chinese-qj-b5'.
|
|
150
|
|
151 The input method `chinese-py' and `chinese-tonepy' are also Pinyin
|
31481
|
152 based, but for the character set GB2312 (`chinese-gb2312').")
|
31425
|
153
|
31548
|
154 ("chinese-qj-b5" "$(0)A(BB")
|
|
155
|
|
156 ("chinese-qj" "$AH+(BG")
|
|
157
|
31425
|
158 ("chinese-sw" "$AJWN2(B"
|
31548
|
159 "Radical base input method for Chinese charset GB2312 (`chinese-gb2312').
|
|
160
|
32277
|
161 In this input method, you enter a Chinese character by typing two
|
|
162 keys. The first key corresponds to the first ($AJW(B) radical, the second
|
|
163 key corresponds to the last ($AN2(B) radical. The correspondence of keys
|
|
164 and radicals is as below:
|
31548
|
165
|
|
166 first radical:
|
|
167 a b c d e f g h i j k l m n o p q r s t u v w x y z
|
49598
|
168 $APD(B $AZ"(B $AJ,(B $AX<(B $A;p(B $A?Z(B $A^P(B $Ac_(B $AZ%(B $A\3(B $AXi(B $AD>(B $Alj(B $Ab;(B $ATB(B $Afy(B $AJ/(B $AMu(B $A0K(B $AX/(B $AHU(B $AeA(B $Aak(B $AVq(B $AR;(B $AHK(B
|
31548
|
169 last radical:
|
|
170 a b c d e f g h i j k l m n o p q r s t u v w x y z
|
49598
|
171 $ASV(B $AI=(B $AMA(B $A56(B $AZb(B $A?Z(B $ARB(B $Aqb(B $A4s(B $A6!(B $A[L(B $Ala(B $AJ.(B $A4u(B $AXg(B $ACE(B $A=q(B $AX-(B $AE.(B $ARR(B $A`m(B $AP!(B $A3'(B $A3f(B $A_.(B $A27(B
|
31548
|
172
|
36995
|
173 \\<quail-translation-docstring>")
|
31548
|
174
|
31425
|
175 ("chinese-tonepy" "$A5wF4(B"
|
|
176 "Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
|
|
177
|
48760
|
178 Pinyin is the standard roman transliteration method for Chinese.
|
32277
|
179 For the details of Pinyin system, see the documentation of the input
|
31425
|
180 method `chinese-py'.
|
|
181
|
|
182 This input method works almost the same way as `chinese-py'. The
|
31548
|
183 difference is that you must type 1..5 after each Pinyin spelling to
|
|
184 specify a tone (1:$ARuF=(B, 2:$AQtF=(B, 3:$AIOIy(B, 4$AOBIy(B, 5:$AGaIy(B).
|
|
185
|
36995
|
186 \\<quail-translation-docstring>
|
31548
|
187
|
|
188 For instance, to input $ADc(B, you type \"n i 3 3\", the first \"n i\" is
|
|
189 a Pinyin, the next \"3\" specifies tone, and the last \"3\" selects
|
|
190 the third character from the candidate list.
|
31425
|
191
|
|
192 For double-width GB2312 characters correponding to ASCII, use the
|
|
193 input method `chinese-qj'.")
|
|
194
|
31548
|
195 ("chinese-zozy" "$(0I\0D(B"
|
|
196 "Zhuyin base input method for Chinese Big5 characters (`chinese-big5-1',
|
|
197 `chinese-big5-2').
|
|
198
|
32277
|
199 Zhuyin is a kind of a phonetic symbol. One to three Zhuyin symbols
|
31548
|
200 compose a Chinese character.
|
|
201
|
|
202 In this input method, you enter a Chinese character by first typing
|
|
203 keys corresponding to Zhuyin symbols (see the above table) followed by
|
|
204 SPC, 6, 3, 4, or 7 specifing a tone (SPC:$(0?v(N(B, 6:$(0Dm(N(B, 3:$(0&9Vy(B, 4:$(0(+Vy(B,
|
|
205 7:$(0M=Vy(B).
|
|
206
|
36995
|
207 \\<quail-translation-docstring>")))
|
17052
|
208
|
|
209 ;; Return a value of the key in the current line.
|
|
210 (defsubst tit-read-key-value ()
|
52408
|
211 (if (looking-at "[^ \t\r\n]+")
|
17052
|
212 (car (read-from-string (concat "\"" (match-string 0) "\"")))))
|
|
213
|
|
214 ;; Return an appropriate quail-package filename from FILENAME (TIT
|
18555
|
215 ;; dictionary filename). For instance, ".../ZOZY.tit" -> "ZOZY.el".
|
|
216 (defun tit-make-quail-package-file-name (filename &optional dirname)
|
17052
|
217 (expand-file-name
|
18555
|
218 (concat (file-name-nondirectory (substring filename 0 -4)) ".el")
|
17052
|
219 dirname))
|
|
220
|
20840
|
221 ;; This value is nil if we are processing phrase dictionary.
|
44449
|
222 (defvar tit-dictionary t)
|
17052
|
223 (defvar tit-encode nil)
|
|
224 (defvar tit-default-encode "GB")
|
|
225
|
|
226 ;; Generate elements of KEY-BINDINGS arg for `quail-define-package' so
|
|
227 ;; that each characters in KEYS invokes FUNCTION-SYMBOL.
|
|
228 (defun tit-generate-key-bindings (keys function-symbol)
|
|
229 (let ((len (length keys))
|
|
230 (i 0)
|
20840
|
231 (first t)
|
17052
|
232 key)
|
|
233 (while (< i len)
|
20840
|
234 (or first (princ "\n "))
|
17052
|
235 (setq key (aref keys i))
|
20840
|
236 (if (if (< key ?\ )
|
|
237 (eq (lookup-key quail-translation-keymap
|
|
238 (char-to-string key))
|
17052
|
239 'quail-execute-non-quail-command)
|
20840
|
240 (<= key 127))
|
|
241 (progn
|
|
242 (princ (cons (cond ((< key ?\ ) (format "\"\\C-%c\"" (+ key ?@)))
|
|
243 ((< key 127) (format "\"%c\"" key))
|
|
244 (t "\"\\C-?\""))
|
|
245 function-symbol))
|
|
246 (setq first nil)))
|
17052
|
247 (setq i (1+ i)))))
|
|
248
|
|
249 ;; Analyze header part of TIT dictionary and generate an appropriate
|
|
250 ;; `quail-define-package' function call.
|
|
251 (defun tit-process-header (filename)
|
|
252 (message "Processing header part...")
|
|
253 (goto-char (point-min))
|
|
254
|
20840
|
255 ;; At first, generate header part of the Quail package while
|
|
256 ;; collecting information from the original header.
|
|
257 (let ((package (concat
|
|
258 "chinese-"
|
|
259 (substring (downcase (file-name-nondirectory filename))
|
|
260 0 -4)))
|
|
261 ;; TIT keywords and the corresponding default values.
|
17052
|
262 (tit-multichoice t)
|
|
263 (tit-prompt "")
|
|
264 (tit-comments nil)
|
|
265 (tit-backspace "\010\177")
|
|
266 (tit-deleteall "\015\025")
|
|
267 (tit-moveright ".>")
|
|
268 (tit-moveleft ",<")
|
|
269 (tit-keyprompt nil))
|
20840
|
270
|
|
271 (princ ";; Quail package `")
|
46670
|
272 (princ package) (princ "' -*- coding:iso-2022-7bit; -*-\n")
|
|
273 (princ ";; Generated by the command `titdic-convert'\n;;\tDate: ")
|
20840
|
274 (princ (current-time-string))
|
|
275 (princ "\n;;\tOriginal TIT dictionary file: ")
|
|
276 (princ (file-name-nondirectory filename))
|
|
277 (princ "\n\n;;; Comment:\n\n")
|
21503
|
278 (princ ";; Byte-compile this file again after any modification.\n\n")
|
20840
|
279 (princ ";;; Start of the header of original TIT dictionary.\n\n")
|
|
280
|
17052
|
281 (while (not (eobp))
|
20840
|
282 (let ((ch (following-char))
|
|
283 (pos (point)))
|
17052
|
284 (cond ((= ch ?C) ; COMMENT
|
|
285 (cond ((looking-at "COMMENT")
|
|
286 (let ((pos (match-end 0)))
|
|
287 (end-of-line)
|
|
288 (setq tit-comments (cons (buffer-substring pos (point))
|
|
289 tit-comments))))))
|
|
290 ((= ch ?M) ; MULTICHOICE, MOVERIGHT, MOVELEFT
|
|
291 (cond ((looking-at "MULTICHOICE:[ \t]*")
|
|
292 (goto-char (match-end 0))
|
|
293 (setq tit-multichoice (looking-at "YES")))
|
|
294 ((looking-at "MOVERIGHT:[ \t]*")
|
|
295 (goto-char (match-end 0))
|
|
296 (setq tit-moveright (tit-read-key-value)))
|
|
297 ((looking-at "MOVELEFT:[ \t]*")
|
|
298 (goto-char (match-end 0))
|
|
299 (setq tit-moveleft (tit-read-key-value)))))
|
|
300 ((= ch ?P) ; PROMPT
|
|
301 (cond ((looking-at "PROMPT:[ \t]*")
|
|
302 (goto-char (match-end 0))
|
31425
|
303 (setq tit-prompt (tit-read-key-value))
|
|
304 ;; Some TIT dictionaies that are encoded by
|
|
305 ;; euc-china contains invalid character at the tail.
|
|
306 (let* ((last (aref tit-prompt (1- (length tit-prompt))))
|
|
307 (split (split-char last)))
|
|
308 (if (or (eq (nth 1 split) 32)
|
|
309 (eq (nth 2 split) 32))
|
|
310 (setq tit-prompt (substring tit-prompt 0 -1)))))))
|
17052
|
311 ((= ch ?B) ; BACKSPACE, BEGINDICTIONARY,
|
|
312 ; BEGINPHRASE
|
|
313 (cond ((looking-at "BACKSPACE:[ \t]*")
|
|
314 (goto-char (match-end 0))
|
|
315 (setq tit-backspace (tit-read-key-value)))
|
|
316 ((looking-at "BEGINDICTIONARY")
|
20840
|
317 (setq tit-dictionary t))
|
17052
|
318 ((looking-at "BEGINPHRASE")
|
20840
|
319 (setq tit-dictionary nil))))
|
17052
|
320 ((= ch ?K) ; KEYPROMPT
|
|
321 (cond ((looking-at "KEYPROMPT(\\(.*\\)):[ \t]*")
|
|
322 (let ((key-char (match-string 1)))
|
|
323 (goto-char (match-end 0))
|
19943
23ada3fdbc8b
(tit-process-header): Convert argument of KEYPROMPT if it contains an escape.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
324 (if (string-match "\\\\[0-9]+" key-char)
|
23ada3fdbc8b
(tit-process-header): Convert argument of KEYPROMPT if it contains an escape.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
325 (setq key-char
|
23ada3fdbc8b
(tit-process-header): Convert argument of KEYPROMPT if it contains an escape.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
326 (car (read-from-string (format "\"%s\""
|
23ada3fdbc8b
(tit-process-header): Convert argument of KEYPROMPT if it contains an escape.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
327 key-char)))))
|
17052
|
328 (setq tit-keyprompt
|
|
329 (cons (cons key-char (tit-read-key-value))
|
20840
|
330 tit-keyprompt)))))))
|
|
331 (end-of-line)
|
|
332 (princ ";; ")
|
|
333 (princ (buffer-substring pos (point)))
|
|
334 (princ "\n")
|
|
335 (forward-line 1)))
|
49598
|
336
|
20840
|
337 (princ "\n;;; End of the header of original TIT dictionary.\n\n")
|
|
338 (princ ";;; Code:\n\n(require 'quail)\n\n")
|
17052
|
339
|
20840
|
340 (princ "(quail-define-package ")
|
|
341 ;; Args NAME, LANGUAGE, TITLE
|
31425
|
342 (let ((title (nth 1 (assoc package quail-cxterm-package-ext-info))))
|
20840
|
343 (princ "\"")
|
|
344 (princ package)
|
|
345 (princ "\" \"")
|
|
346 (princ (nth 2 (assoc tit-encode tit-encode-list)))
|
|
347 (princ "\" \"")
|
|
348 (princ (or title
|
|
349 (if (string-match "[:$A!K$(0!(!J(B]+\\([^:$A!K$(0!(!K(B]+\\)" tit-prompt)
|
|
350 (substring tit-prompt (match-beginning 1) (match-end 1))
|
|
351 tit-prompt)))
|
|
352 (princ "\"\n"))
|
17052
|
353
|
|
354 ;; Arg GUIDANCE
|
|
355 (if tit-keyprompt
|
|
356 (progn
|
20840
|
357 (princ " '(")
|
17052
|
358 (while tit-keyprompt
|
20840
|
359 (princ " ")
|
|
360 (princ (format "(%d . \"%s\")\n"
|
|
361 (string-to-char (car (car tit-keyprompt)))
|
|
362 (cdr (car tit-keyprompt))))
|
17052
|
363 (setq tit-keyprompt (cdr tit-keyprompt)))
|
20840
|
364 (princ ")"))
|
|
365 (princ " t\n"))
|
17052
|
366
|
|
367 ;; Arg DOCSTRING
|
31425
|
368 (let ((doc (concat tit-prompt "\n"))
|
|
369 (comments (if tit-comments
|
|
370 (mapconcat 'identity (nreverse tit-comments) "\n")))
|
|
371 (doc-ext (nth 2 (assoc package quail-cxterm-package-ext-info))))
|
|
372 (if comments
|
|
373 (setq doc (concat doc "\n" comments "\n")))
|
|
374 (if doc-ext
|
|
375 (setq doc (concat doc "\n" doc-ext "\n")))
|
|
376 (prin1 doc)
|
|
377 (terpri))
|
17052
|
378
|
|
379 ;; Arg KEY-BINDINGS
|
20840
|
380 (princ " '(")
|
17052
|
381 (tit-generate-key-bindings tit-backspace 'quail-delete-last-char)
|
20840
|
382 (princ "\n ")
|
17052
|
383 (tit-generate-key-bindings tit-deleteall 'quail-abort-translation)
|
20840
|
384 (princ "\n ")
|
17052
|
385 (tit-generate-key-bindings tit-moveright 'quail-next-translation)
|
20840
|
386 (princ "\n ")
|
17052
|
387 (tit-generate-key-bindings tit-moveleft 'quail-prev-translation)
|
20840
|
388 (princ ")\n")
|
17052
|
389
|
|
390 ;; Args FORGET-TRANSLATION, DETERMINISTIC, KBD-TRANSLATE, SHOW-LAYOUT.
|
|
391 ;; The remaining args are all nil.
|
20840
|
392 (princ " nil")
|
|
393 (princ (if tit-multichoice " nil" " t"))
|
|
394 (princ (if tit-keyprompt " t t)\n\n" " nil nil)\n\n"))))
|
17052
|
395
|
20840
|
396 (defsubst tit-flush-translations (key translations)
|
|
397 (if (string-match "\\\\[0-9][0-9][0-9]" key)
|
|
398 (let ((newkey (concat (substring key 0 (match-beginning 0))
|
|
399 (car (read-from-string
|
|
400 (concat "\"" (match-string 0 key) "\"")))))
|
|
401 (idx (match-end 0)))
|
|
402 (while (string-match "\\\\[0-9][0-9][0-9]" key idx)
|
|
403 (setq newkey (concat
|
|
404 newkey
|
|
405 (substring key idx (match-beginning 0))
|
|
406 (car (read-from-string
|
|
407 (concat "\"" (match-string 0 key) "\"")))))
|
|
408 (setq idx (match-end 0)))
|
|
409 (setq key (concat newkey (substring key idx)))))
|
|
410 (prin1 (list key (if tit-dictionary translations
|
|
411 (vconcat (nreverse translations)))))
|
|
412 (princ "\n"))
|
17052
|
413
|
|
414 ;; Convert body part of TIT dictionary into `quail-define-rules'
|
|
415 ;; function call.
|
|
416 (defun tit-process-body ()
|
|
417 (message "Formatting translation rules...")
|
20840
|
418 (let* ((template (list nil nil))
|
|
419 (second (cdr template))
|
|
420 (prev-key "")
|
|
421 ch key translations pos)
|
|
422 (princ "(quail-define-rules\n")
|
17052
|
423 (while (null (eobp))
|
20840
|
424 (setq ch (following-char))
|
|
425 (if (or (= ch ?#) (= ch ?\n))
|
|
426 (forward-line 1)
|
17052
|
427 (setq pos (point))
|
20840
|
428 (skip-chars-forward "^ \t\n")
|
|
429 (setq key (buffer-substring pos (point)))
|
|
430 (skip-chars-forward " \t")
|
|
431 (setq ch (following-char))
|
|
432 (if (or (= ch ?#) (= ch ?\n))
|
18796
|
433 ;; This entry contains no translations. Let's ignore it.
|
20840
|
434 (forward-line 1)
|
|
435 (or (string= key prev-key)
|
|
436 (progn
|
|
437 (if translations
|
|
438 (tit-flush-translations prev-key translations))
|
|
439 (setq translations nil
|
|
440 prev-key key)))
|
|
441 (if tit-dictionary
|
|
442 (progn
|
|
443 (setq pos (point))
|
|
444 (skip-chars-forward "^ \t#\n")
|
|
445 (setq translations
|
|
446 (if translations
|
|
447 (concat translations
|
|
448 (buffer-substring pos (point)))
|
|
449 (buffer-substring pos (point)))))
|
|
450 (while (not (eolp))
|
18796
|
451 (setq pos (point))
|
20840
|
452 (skip-chars-forward "^ \t\n")
|
|
453 (setq translations (cons (buffer-substring pos (point))
|
|
454 translations))
|
|
455 (skip-chars-forward " \t")
|
|
456 (setq ch (following-char))
|
|
457 (if (= ch ?#) (end-of-line))))
|
|
458 (forward-line 1))))
|
18796
|
459
|
20840
|
460 (if translations
|
|
461 (tit-flush-translations prev-key translations))
|
|
462 (princ ")\n")))
|
17052
|
463
|
|
464 ;;;###autoload
|
|
465 (defun titdic-convert (filename &optional dirname)
|
|
466 "Convert a TIT dictionary of FILENAME into a Quail package.
|
|
467 Optional argument DIRNAME if specified is the directory name under which
|
|
468 the generated Quail package is saved."
|
|
469 (interactive "FTIT dictionary file: ")
|
46670
|
470 (let ((coding-system-for-write 'iso-2022-7bit))
|
|
471 (with-temp-file (tit-make-quail-package-file-name filename dirname)
|
|
472 (set-buffer-file-coding-system 'iso-2022-7bit)
|
|
473 (let ((standard-output (current-buffer)))
|
|
474 (with-temp-buffer
|
|
475 (set-buffer-multibyte nil)
|
|
476 (let ((coding-system-for-read 'no-conversion))
|
|
477 (insert-file-contents (expand-file-name filename)))
|
49598
|
478
|
46670
|
479 ;; Decode the buffer contents from the encoding specified by a
|
|
480 ;; value of the key "ENCODE:".
|
|
481 (if (not (search-forward "\nBEGIN" nil t))
|
|
482 (error "TIT dictionary doesn't have body part"))
|
|
483 (let ((limit (point))
|
|
484 coding-system slot)
|
|
485 (goto-char (point-min))
|
|
486 (if (re-search-forward "^ENCODE:[ \t]*" limit t)
|
|
487 (progn
|
|
488 (goto-char (match-end 0))
|
|
489 (setq tit-encode (tit-read-key-value)))
|
|
490 (setq tit-encode tit-default-encode))
|
|
491 (setq slot (assoc tit-encode tit-encode-list))
|
|
492 (if (not slot)
|
|
493 (error "Invalid ENCODE: value in TIT dictionary"))
|
|
494 (setq coding-system (nth 1 slot))
|
|
495 (message "Decoding with coding system %s..." coding-system)
|
|
496 (goto-char (point-min))
|
|
497 (decode-coding-region (point-min) (point-max) coding-system))
|
|
498
|
|
499 ;; Set point the starting position of the body part.
|
20840
|
500 (goto-char (point-min))
|
46670
|
501 (if (not (search-forward "\nBEGIN" nil t))
|
|
502 (error "TIT dictionary can't be decoded correctly"))
|
17052
|
503
|
48760
|
504 ;; Process the header part in multibyte mode.
|
|
505 (with-current-buffer standard-output
|
|
506 (set-buffer-multibyte t))
|
|
507 (set-buffer-multibyte t)
|
46670
|
508 (forward-line 1)
|
|
509 (narrow-to-region (point-min) (point))
|
|
510 (tit-process-header filename)
|
|
511 (widen)
|
17052
|
512
|
46670
|
513 ;; Process the body part. For speed, we turn off multibyte facility.
|
|
514 (with-current-buffer standard-output
|
|
515 (set-buffer-multibyte nil))
|
|
516 (set-buffer-multibyte nil)
|
|
517 (tit-process-body))))))
|
17052
|
518
|
|
519 ;;;###autoload
|
18831
|
520 (defun batch-titdic-convert (&optional force)
|
17052
|
521 "Run `titdic-convert' on the files remaining on the command line.
|
|
522 Use this from the command line, with `-batch';
|
|
523 it won't work in an interactive Emacs.
|
|
524 For example, invoke \"emacs -batch -f batch-titdic-convert XXX.tit\" to
|
|
525 generate Quail package file \"xxx.el\" from TIT dictionary file \"XXX.tit\".
|
|
526 To get complete usage, invoke \"emacs -batch -f batch-titdic-convert -h\"."
|
|
527 (defvar command-line-args-left) ; Avoid compiler warning.
|
|
528 (if (not noninteractive)
|
|
529 (error "`batch-titdic-convert' should be used only with -batch"))
|
|
530 (if (string= (car command-line-args-left) "-h")
|
|
531 (progn
|
|
532 (message "To convert XXX.tit and YYY.tit into xxx.el and yyy.el:")
|
|
533 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert XXX.tit YYY.tit")
|
|
534 (message "To convert XXX.tit into DIR/xxx.el:")
|
|
535 (message " %% emacs -batch -l titdic-cnv -f batch-titdic-convert -dir DIR XXX.tit"))
|
|
536 (let (targetdir filename files file)
|
|
537 (if (string= (car command-line-args-left) "-dir")
|
|
538 (progn
|
|
539 (setq command-line-args-left (cdr command-line-args-left))
|
|
540 (setq targetdir (car command-line-args-left))
|
|
541 (setq command-line-args-left (cdr command-line-args-left))))
|
|
542 (while command-line-args-left
|
|
543 (setq filename (expand-file-name (car command-line-args-left)))
|
|
544 (if (file-directory-p filename)
|
|
545 (progn
|
|
546 (message "Converting all tit files in the directory %s" filename)
|
|
547 (setq files (directory-files filename t "\\.tit$")))
|
|
548 (setq files (list filename)))
|
|
549 (while files
|
|
550 (setq file (expand-file-name (car files)))
|
18831
|
551 (when (or force
|
|
552 (file-newer-than-file-p
|
|
553 file (tit-make-quail-package-file-name file targetdir)))
|
|
554 (message "Converting %s to quail-package..." file)
|
|
555 (titdic-convert file targetdir))
|
17052
|
556 (setq files (cdr files)))
|
|
557 (setq command-line-args-left (cdr command-line-args-left)))
|
21503
|
558 (message "Byte-compile the created files by:")
|
17052
|
559 (message " %% emacs -batch -f batch-byte-compile XXX.el")))
|
|
560 (kill-emacs 0))
|
|
561
|
37116
|
562
|
|
563 ;;; Converter of miscellaneous dictionaries other than TIT format.
|
|
564
|
|
565 ;; Alist of input method names and the corresponding information.
|
|
566 ;; Each element has this form:
|
|
567 ;; (INPUT-METHOD-NAME ;; Name of the input method.
|
49598
|
568 ;; INPUT-METHOD-TITLE ;; Title string of the input method
|
37116
|
569 ;; DICFILE ;; Name of the source dictionary file.
|
|
570 ;; CODING ;; Coding system of the dictionary file.
|
|
571 ;; QUAILFILE ;; Name of the Quail package file.
|
|
572 ;; CONVERTER ;; Function to generate the Quail package.
|
|
573 ;; COPYRIGHT-NOTICE ;; Copyright notice of the source dictionary.
|
|
574 ;; )
|
|
575
|
|
576 (defvar quail-misc-package-ext-info
|
|
577 '(("chinese-b5-tsangchi" "$(06A(BB"
|
49598
|
578 "cangjie-table.b5" big5 "tsang-b5.el"
|
37116
|
579 tsang-b5-converter
|
|
580 "\
|
|
581 ;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
|
|
582 ;; #
|
|
583 ;; # Permission to copy and distribute both modified and
|
|
584 ;; # unmodified versions is granted without royalty provided
|
|
585 ;; # this notice is preserved.")
|
|
586
|
|
587 ("chinese-b5-quick" "$(0X|(BB"
|
49598
|
588 "cangjie-table.b5" big5 "quick-b5.el"
|
37116
|
589 quick-b5-converter
|
|
590 "\
|
|
591 ;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
|
|
592 ;; #
|
|
593 ;; # Permission to copy and distribute both modified and
|
|
594 ;; # unmodified versions is granted without royalty provided
|
|
595 ;; # this notice is preserved.")
|
|
596
|
|
597 ("chinese-cns-tsangchi" "$(GT?(BC"
|
|
598 "cangjie-table.cns" iso-2022-cn-ext "tsang-cns.el"
|
|
599 tsang-cns-converter
|
|
600 "\
|
|
601 ;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
|
|
602 ;; #
|
|
603 ;; # Permission to copy and distribute both modified and
|
|
604 ;; # unmodified versions is granted without royalty provided
|
|
605 ;; # this notice is preserved.")
|
|
606
|
|
607 ("chinese-cns-quick" "$(Gv|(BC"
|
|
608 "cangjie-table.cns" iso-2022-cn-ext "quick-cns.el"
|
|
609 quick-cns-converter
|
|
610 "\
|
|
611 ;; # Copyright 2001 Christian Wittern <wittern@iis.sinica.edu.tw>
|
|
612 ;; #
|
|
613 ;; # Permission to copy and distribute both modified and
|
|
614 ;; # unmodified versions is granted without royalty provided
|
|
615 ;; # this notice is preserved.")
|
|
616
|
|
617 ("chinese-py" "$AF4(BG"
|
|
618 "pinyin.map" cn-gb-2312 "PY.el"
|
|
619 py-converter
|
|
620 "\
|
|
621 ;; \"pinyin.map\" is included in a free package called CCE. It is
|
|
622 ;; available at:
|
|
623 ;; http://ftp.debian.org/debian/dists/potato/main
|
|
624 ;; /source/utils/cce_0.36.orig.tar.gz
|
|
625 ;; This package contains the following copyright notice.
|
|
626 ;;
|
|
627 ;;
|
|
628 ;; Copyright (C) 1999, Rui He, herui@cs.duke.edu
|
49598
|
629 ;;
|
|
630 ;;
|
37116
|
631 ;; CCE(Console Chinese Environment) 0.32
|
49598
|
632 ;;
|
|
633 ;; CCE is free software; you can redistribute it and/or modify it under the
|
|
634 ;; terms of the GNU General Public License as published by the Free Software
|
|
635 ;; Foundation; either version 1, or (at your option) any later version.
|
|
636 ;;
|
|
637 ;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
638 ;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
639 ;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
640 ;; details.
|
|
641 ;;
|
37116
|
642 ;; You should have received a copy of the GNU General Public License along with
|
49598
|
643 ;; CCE; see the file COPYING. If not, write to the Free Software Foundation,
|
64092
|
644 ;; 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
|
37116
|
645
|
|
646 ("chinese-ziranma" "$AWTH;(B"
|
|
647 "ziranma.cin" cn-gb-2312 "ZIRANMA.el"
|
|
648 ziranma-converter
|
|
649 "\
|
|
650 ;; \"ziranma.cin\" is included in a free package called CCE. It is
|
|
651 ;; available at:
|
|
652 ;; http://ftp.debian.org/debian/dists/potato/main
|
|
653 ;; /source/utils/cce_0.36.orig.tar.gz
|
|
654 ;; This package contains the following copyright notice.
|
|
655 ;;
|
|
656 ;;
|
|
657 ;; Copyright (C) 1999, Rui He, herui@cs.duke.edu
|
49598
|
658 ;;
|
|
659 ;;
|
37116
|
660 ;; CCE(Console Chinese Environment) 0.32
|
49598
|
661 ;;
|
|
662 ;; CCE is free software; you can redistribute it and/or modify it under the
|
|
663 ;; terms of the GNU General Public License as published by the Free Software
|
|
664 ;; Foundation; either version 1, or (at your option) any later version.
|
|
665 ;;
|
|
666 ;; CCE is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
667 ;; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
668 ;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
669 ;; details.
|
|
670 ;;
|
37116
|
671 ;; You should have received a copy of the GNU General Public License along with
|
49598
|
672 ;; CCE; see the file COPYING. If not, write to the Free Software Foundation,
|
64092
|
673 ;; 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
|
37875
|
674
|
|
675 ("chinese-ctlau" "$AAuTA(B"
|
|
676 "CTLau.html" cn-gb-2312 "CTLau.el"
|
|
677 ctlau-gb-converter
|
|
678 "\
|
|
679 ;; \"CTLau.html\" is available at:
|
|
680 ;;
|
|
681 ;; http://umunhum.stanford.edu/~lee/chicomp/CTLau.html
|
|
682 ;;
|
|
683 ;; It contains the following copyright notice:
|
|
684 ;;
|
|
685 ;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu)
|
49598
|
686 ;; #
|
37875
|
687 ;; # This program is free software; you can redistribute it and/or
|
|
688 ;; # modify it under the terms of the GNU General Public License
|
|
689 ;; # as published by the Free Software Foundation; either version 2
|
|
690 ;; # of the License, or any later version.
|
49598
|
691 ;; #
|
37875
|
692 ;; # This program is distributed in the hope that it will be useful,
|
|
693 ;; # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
694 ;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
695 ;; # GNU General Public License for more details.
|
49598
|
696 ;; #
|
37875
|
697 ;; # You should have received a copy of the GNU General Public License
|
|
698 ;; # along with this program; if not, write to the Free Software Foundation,
|
64091
|
699 ;; # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
|
37875
|
700
|
37884
|
701 ("chinese-ctlaub" "$(0N,Gn(B"
|
37882
|
702 "CTLau-b5.html" big5 "CTLau-b5.el"
|
37875
|
703 ctlau-b5-converter
|
|
704 "\
|
|
705 ;; \"CTLau-b5.html\" is available at:
|
|
706 ;;
|
|
707 ;; http://umunhum.stanford.edu/~lee/chicomp/CTLau-b5.html
|
|
708 ;;
|
|
709 ;; It contains the following copyright notice:
|
|
710 ;;
|
|
711 ;; # Copyright (C) 1988-2001 Fung Fung Lee (lee@umunhum.stanford.edu)
|
49598
|
712 ;; #
|
37875
|
713 ;; # This program is free software; you can redistribute it and/or
|
|
714 ;; # modify it under the terms of the GNU General Public License
|
|
715 ;; # as published by the Free Software Foundation; either version 2
|
|
716 ;; # of the License, or any later version.
|
49598
|
717 ;; #
|
37875
|
718 ;; # This program is distributed in the hope that it will be useful,
|
|
719 ;; # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
720 ;; # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
721 ;; # GNU General Public License for more details.
|
49598
|
722 ;; #
|
37875
|
723 ;; # You should have received a copy of the GNU General Public License
|
|
724 ;; # along with this program; if not, write to the Free Software Foundation,
|
64091
|
725 ;; # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.")
|
37116
|
726 ))
|
|
727
|
|
728 ;; Generate a code of a Quail package in the current buffer from Tsang
|
|
729 ;; dictionary in the buffer DICBUF. The input method name of the
|
|
730 ;; Quail package is NAME, and the title string is TITLE.
|
|
731
|
|
732 ;; TSANG-P is non-nil, genereate $(06AQo(B input method. Otherwise
|
|
733 ;; generate $(0X|/y(B (simple version of $(06AQo(B). If BIG5-P is non-nil, the
|
|
734 ;; input method is for inputting Big5 characters. Otherwise the input
|
|
735 ;; method is for inputting CNS characters.
|
|
736
|
|
737 (defun tsang-quick-converter (dicbuf name title tsang-p big5-p)
|
|
738 (let ((fulltitle (if tsang-p (if big5-p "$(06AQo(B" "$(GT?on(B")
|
|
739 (if big5-p "$(0X|/y(B" "$(Gv|Mx(B")))
|
|
740 dic)
|
|
741 (goto-char (point-max))
|
|
742 (if big5-p
|
|
743 (insert (format "\"$(0&d'GTT&,!J(B%s$(0!K(BBIG5
|
|
744
|
|
745 $(0KHM$(B%s$(0TT&,WoOu(B
|
|
746
|
|
747 [Q $(0'D(B] [W $(0(q(B] [E $(0'V(B] [R $(0&H(B] [T $(0'>(B] [Y $(0&4(B] [U $(0&U(B] [I $(0'B(B] [O $(0&*(B] [P $(0'A(B]
|
|
748
|
|
749 [A $(0'K(B] [S $(0&T(B] [D $(0'N(B] [F $(0'W(B] [G $(0&I(B] [H $(0*M(B] [J $(0&3(B] [L $(0&d(B]
|
49598
|
750
|
37116
|
751 [Z ] [X $(0[E(B] [C $(01[(B] [V $(0&M(B] [B $(0'M(B] [N $(0&_(B] [M $(0&"(B]
|
|
752
|
|
753 \\\\<quail-translation-docstring>\"\n"
|
|
754 fulltitle fulltitle))
|
|
755 (insert (format "\"$(GDcEFrSD+!J(B%s$(G!K(BCNS
|
|
756
|
|
757 $(GiGk#(B%s$(GrSD+uomu(B
|
|
758
|
|
759 [Q $(GEC(B] [W $(GFp(B] [E $(GEU(B] [R $(GDG(B] [T $(GE=(B] [Y $(GD3(B] [U $(GDT(B] [I $(GEA(B] [O $(GD)(B] [P $(GE@(B]
|
|
760
|
|
761 [A $(GEJ(B] [S $(GDS(B] [D $(GEM(B] [F $(GEV(B] [G $(GDH(B] [H $(GHL(B] [J $(GD2(B] [L $(GDc(B]
|
49598
|
762
|
|
763 [Z ] [X $(GyE(B] [C $(GOZ(B] [V $(GDL(B] [B $(GEL(B] [N $(GD^(B] [M $(GD!(B]
|
37116
|
764
|
|
765 \\\\<quail-translation-docstring>\"\n"
|
|
766 fulltitle fulltitle)))
|
|
767 (insert " '((\".\" . quail-next-translation-block)
|
|
768 (\",\" . quail-prev-translation-block))
|
|
769 nil nil)\n\n")
|
|
770 (insert "(quail-define-rules\n")
|
|
771 (save-excursion
|
|
772 (set-buffer dicbuf)
|
52415
80d4785de4a0
(tsang-quick-converter): Fix broken line-ends from CVS before doing conversion.
Jason Rumney <jasonr@gnu.org>
diff
changeset
|
773 ;; Handle double CR line ends, which result when checking out of
|
80d4785de4a0
(tsang-quick-converter): Fix broken line-ends from CVS before doing conversion.
Jason Rumney <jasonr@gnu.org>
diff
changeset
|
774 ;; CVS on MS-Windows.
|
80d4785de4a0
(tsang-quick-converter): Fix broken line-ends from CVS before doing conversion.
Jason Rumney <jasonr@gnu.org>
diff
changeset
|
775 (goto-char (point-min))
|
80d4785de4a0
(tsang-quick-converter): Fix broken line-ends from CVS before doing conversion.
Jason Rumney <jasonr@gnu.org>
diff
changeset
|
776 (while (re-search-forward "\r\r$" nil t)
|
80d4785de4a0
(tsang-quick-converter): Fix broken line-ends from CVS before doing conversion.
Jason Rumney <jasonr@gnu.org>
diff
changeset
|
777 (replace-match ""))
|
37116
|
778 (goto-char (point-min))
|
|
779 (search-forward "A440")
|
|
780 (beginning-of-line)
|
|
781 (let ((table (make-hash-table :test 'equal))
|
|
782 val)
|
|
783 (while (not (eobp))
|
|
784 (forward-char 5)
|
|
785 (let ((trans (char-to-string (following-char)))
|
|
786 key slot)
|
|
787 (re-search-forward "[A-Z]+$" nil t)
|
|
788 (setq key (downcase
|
|
789 (if (or tsang-p
|
|
790 (<= (- (match-end 0) (match-beginning 0)) 1))
|
|
791 (match-string 0)
|
|
792 (string (char-after (match-beginning 0))
|
|
793 (char-after (1- (match-end 0)))))))
|
|
794 (setq val (gethash key table))
|
|
795 (if val (setq trans (concat val trans)))
|
|
796 (puthash key trans table)
|
|
797 (forward-line 1)))
|
|
798 (maphash #'(lambda (key val) (setq dic (cons (cons key val) dic)))
|
|
799 table)))
|
|
800 (setq dic (sort dic (function (lambda (x y) (string< (car x ) (car y))))))
|
|
801 (dolist (elt dic)
|
|
802 (insert (format "(%S\t%S)\n" (car elt) (cdr elt))))
|
|
803 (let ((punctuations '((";" "$(0!'!2!"!#!.!/(B" "$(G!'!2!"!#!.!/(B")
|
|
804 (":" "$(0!(!+!3!%!$!&!0!1(B" "$(G!(!+!3!%!$!&!0!1(B")
|
|
805 ("'" "$(0!e!d(B" "$(G!e!d(B")
|
|
806 ("\"" "$(0!g!f!h!i!q(B" "$(G!g!f!h!i!q(B")
|
|
807 ("\\" "$(0"`"b#M(B" "$(G"`"b#M(B")
|
|
808 ("|" "$(0!6!8!:"^(B" "$(G!6!8!:"^(B")
|
|
809 ("/" "$(0"_"a#L(B" "$(G"_"a#L(B")
|
|
810 ("?" "$(0!)!4(B" "$(G!)!4(B")
|
|
811 ("<" "$(0!R"6"A!T"H(B" "$(G!R"6"A!T"H(B")
|
|
812 (">" "$(0!S"7"B!U(B" "$(G!S"7"B!U(B")
|
|
813 ("[" "$(0!F!J!b!H!L!V!Z!X!\(B" "$(G!F!J!b!H!L!V!Z!X!\(B")
|
|
814 ("]" "$(0!G!K!c!I!M!W
|
|
815 ("{" "$(0!B!`!D(B " "$(G!B!`!D(B ")
|
|
816 ("}" "$(0!C!a!E(B" "$(G!C!a!E(B")
|
|
817 ("`" "$(0!j!k(B" "$(G!j!k(B")
|
|
818 ("~" "$(0"D"+",!<!=(B" "$(G"D"+",!<!=(B")
|
|
819 ("!" "$(0!*!5(B" "$(G!*!5(B")
|
|
820 ("@" "$(0"i"n(B" "$(G"i"n(B")
|
|
821 ("#" "$(0!l"-(B" "$(G!l"-(B")
|
|
822 ("$" "$(0"c"l(B" "$(G"c"l(B")
|
|
823 ("%" "$(0"h"m(B" "$(G"h"m(B")
|
|
824 ("&" "$(0!m".(B" "$(G!m".(B")
|
|
825 ("*" "$(0!n"/!o!w!x(B" "$(G!n"/!o!w!x(B")
|
|
826 ("(" "$(0!>!^!@(B" "$(G!>!^!@(B")
|
|
827 (")" "$(0!?!_!A(B" "$(G!?!_!A(B")
|
|
828 ("-" "$(0!7!9"#"$"1"@(B" "$(G!7!9"#"$"1"@(B")
|
|
829 ("_" "$(0"%"&(B" "$(G"%"&(B")
|
|
830 ("=" "$(0"8"C(B" "$(G"8"C(B")
|
|
831 ("+" "$(0"0"?(B" "$(G"0"?(B"))))
|
|
832 (dolist (elt punctuations)
|
|
833 (insert (format "(%S %S)\n" (concat "z" (car elt))
|
|
834 (if big5-p (nth 1 elt) (nth 2 elt))))))
|
|
835 (insert ")\n")))
|
|
836
|
|
837 (defun tsang-b5-converter (dicbuf name title)
|
|
838 (tsang-quick-converter dicbuf name title t t))
|
|
839
|
|
840 (defun quick-b5-converter (dicbuf name title)
|
|
841 (tsang-quick-converter dicbuf name title nil t))
|
|
842
|
|
843 (defun tsang-cns-converter (dicbuf name title)
|
|
844 (tsang-quick-converter dicbuf name title t nil))
|
|
845
|
|
846 (defun quick-cns-converter (dicbuf name title)
|
|
847 (tsang-quick-converter dicbuf name title nil nil))
|
|
848
|
|
849 ;; Generate a code of a Quail package in the current buffer from
|
|
850 ;; Pinyin dictionary in the buffer DICBUF. The input method name of
|
|
851 ;; the Quail package is NAME, and the title string is TITLE.
|
|
852
|
|
853 (defun py-converter (dicbuf name title)
|
|
854 (goto-char (point-max))
|
|
855 (insert (format "%S\n" "$A::WVJdHk!KF4Rt!K(B
|
|
856
|
|
857 $AF4Rt7=08(B
|
|
858
|
|
859 $AP!P4S"NDWVD84z1m!8F4Rt!97{:E#,(B \"u(yu) $ATrSC(B u: $A1mJ>!C(B
|
|
860
|
|
861 Pinyin base input method for Chinese charset GB2312 (`chinese-gb2312').
|
|
862
|
48760
|
863 Pinyin is the standard roman transliteration method for Chinese.
|
37116
|
864 Pinyin uses a sequence of Latin alphabetic characters for each Chinese
|
|
865 character. The sequence is made by the combination of the initials
|
|
866 \(the beginning sounds) and finals (the ending sounds).
|
|
867
|
|
868 initials: b p m f d t n l z c s zh ch sh r j q x g k h
|
|
869 finals: a o e i er ai ei oa ou an en ang eng ong i ia iao ie iu ian in
|
|
870 iang ing iong u ua uo uai ui uan un uan ueng yu yue yuan yun
|
|
871
|
|
872 (Note: In the correct Pinyin writing, the sequence \"yu\" in the last
|
|
873 four finals should be written by the character u-umlaut `$A(9(B'.)
|
|
874
|
|
875 With this input method, you enter a Chinese character by first
|
|
876 entering its pinyin spelling.
|
|
877
|
|
878 \\<quail-translation-docstring>
|
|
879
|
|
880 For instance, to input $ADc(B, you type \"n i C-n 3\". The first \"n i\"
|
|
881 is a Pinyin, \"C-n\" selects the next group of candidates (each group
|
|
882 contains at most 10 characters), \"3\" select the third character in
|
|
883 that group.
|
|
884
|
|
885 This input method supports only Han characters. The related input
|
|
886 method `chinese-py-punct' is the combination of this method and
|
|
887 `chinese-punct'; it supports both Han characters and punctuation
|
|
888 characters.
|
|
889
|
|
890 For double-width GB2312 characters corresponding to ASCII, use the
|
|
891 input method `chinese-qj'.
|
|
892
|
|
893 The correct Pinyin system specifies tones by diacritical marks, but
|
|
894 this input method doesn't use them, which results in easy (you don't
|
|
895 have to know the exact tones), but verbose (many characters are assigned
|
|
896 to the same key sequence) input. You may also want to try the input
|
|
897 method `chinese-tonepy' with which you must specify tones by digits
|
|
898 \(1..5)."))
|
|
899 (insert " '((\"\C-?\" . quail-delete-last-char)
|
|
900 (\".\" . quail-next-translation)
|
|
901 (\">\" . quail-next-translation)
|
|
902 (\",\" . quail-prev-translation)
|
|
903 (\"<\" . quail-prev-translation))
|
|
904 nil nil nil nil)\n\n")
|
|
905 (insert "(quail-define-rules\n")
|
|
906 (let ((pos (point)))
|
|
907 (insert-buffer-substring dicbuf)
|
|
908 (goto-char pos)
|
|
909 (while (not (eobp))
|
|
910 (insert "(\"")
|
|
911 (skip-chars-forward "a-z")
|
|
912 (insert "\" \"")
|
|
913 (delete-char 1)
|
|
914 (end-of-line)
|
|
915 (insert "\")")
|
|
916 (forward-line 1)))
|
|
917 (insert ")\n"))
|
|
918
|
|
919 ;; Generate a code of a Quail package in the current buffer from
|
|
920 ;; Ziranma dictionary in the buffer DICBUF. The input method name of
|
|
921 ;; the Quail package is NAME, and the title string is TITLE.
|
|
922
|
|
923 (defun ziranma-converter (dicbuf name title)
|
|
924 (let (dic)
|
|
925 (save-excursion
|
|
926 (set-buffer dicbuf)
|
|
927 (goto-char (point-min))
|
|
928 (search-forward "%keyname end\n")
|
|
929 (let ((table (make-hash-table :test 'equal))
|
|
930 elt pos key trans val)
|
|
931 (while (not (eobp))
|
|
932 (setq pos (point))
|
|
933 (skip-chars-forward "^ \t")
|
|
934 (setq key (buffer-substring pos (point)))
|
|
935 (skip-chars-forward " \t")
|
|
936 (setq trans (vector (buffer-substring (point) (line-end-position))))
|
|
937 (setq val (gethash key table))
|
|
938 (if val (setq trans (vconcat val trans)))
|
|
939 (puthash key trans table)
|
|
940 (forward-line 1))
|
|
941 (maphash #'(lambda (key trans)
|
|
942 (let ((len (length trans))
|
|
943 i)
|
|
944 (if (and (= len 1) (= (length (aref trans 0)) 1))
|
|
945 (setq trans (aref trans 0))
|
|
946 (setq i 0)
|
|
947 (while (and (< i len)
|
|
948 (= (length (aref trans i)) 1))
|
|
949 (setq i (1+ i)))
|
|
950 (if (= i len)
|
|
951 (setq trans (mapconcat 'identity trans "")))))
|
|
952 (setq dic (cons (cons key trans) dic)))
|
|
953 table)))
|
|
954 (setq dic (sort dic (function (lambda (x y) (string< (car x) (car y))))))
|
|
955 (goto-char (point-max))
|
|
956 (insert (format "%S\n" "$A::WVJdHk!K!>WTH;!?!K(B
|
|
957
|
|
958 $A<|EL6TUU1m(B:
|
|
959 $A)3)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)W)%)%)7(B
|
|
960 $A)'#Q(B $A)'#W(B $A)'#E(B $A)'#R(B $A)'#T(B $A)'#Y(B $A)'#U(Bsh$A)'#I(Bch$A)'#O(B $A)'#P(B $A)'(B
|
|
961 $A)'(B iu$A)'(B ua$A)'(B e$A)'(B uan$A)'(B ue$A)'(B uai$A)'(B u$A)'(B i$A)'(B o$A)'(B un$A)'(B
|
|
962 $A)'(B $A)'(B ia$A)'(B $A)'(B van$A)'(B ve$A)'(B ing$A)'(B $A)'(B $A)'(B uo$A)'(B vn$A)'(B
|
|
963 $A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)?(B
|
|
964 $A)'#A(B $A)'#S(B $A)'#D(B $A)'#F(B $A)'#G(B $A)'#H(B $A)'#J(B $A)'#K(B $A)'#L(B $A)'(B
|
|
965 $A)'(B a$A)'(Biong$A)'(Buang$A)'(B en$A)'(B eng$A)'(B ang$A)'(B an$A)'(B ao$A)'(B ai$A)'(B
|
|
966 $A)'(B $A)'(B ong$A)'(Biang$A)'(B $A)'(B ng$A)'(B $A)'(B $A)'(B $A)'(B $A)'(B
|
|
967 $A);)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)_)W)%)%)7(B
|
|
968 $A)'#Z(B $A)'#X(B $A)'#C(B $A)'#V(Bzh$A)'#B(B $A)'#N(B $A)'#M(B $A)'#,(B $A)'#.(B $A)'(B $A#/(B $A)'(B
|
|
969 $A)'(B ei$A)'(B ie$A)'(B iao$A)'(B ui$A)'(B ou$A)'(B in$A)'(B ian$A)'G0R3)':sR3)'7{:E)'(B
|
|
970 $A)'(B $A)'(B $A)'(B $A)'(B v$A)'(B $A)'(B $A)'(B $A)'(B $A)'(B $A)'(B $A)'(B
|
|
971 $A);)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)_)%)%)?(B
|
|
972
|
|
973
|
|
974 Pinyin base input method for Chinese GB2312 characters (`chinese-gb2312').
|
|
975
|
|
976 Pinyin is the standard roman transliteration method for Chinese.
|
|
977 For the details of Pinyin system, see the documentation of the input
|
|
978 method `chinese-py'.
|
|
979
|
|
980 Unlike the standard spelling of Pinyin, in this input method all
|
|
981 initials and finals are assigned to single keys (see the above table).
|
|
982 For instance, the initial \"ch\" is assigned to the key `i', the final
|
|
983 \"iu\" is assigned to the key `q', and tones 1, 2, 3, 4, and $AGaIy(B are
|
|
984 assigned to the keys `q', `w', `e', `r', `t' respectively.
|
|
985
|
|
986 \\<quail-translation-docstring>
|
|
987
|
|
988 To input one-letter words, you type 4 keys, the first two for the
|
|
989 Pinyin of the letter, next one for tone, and the last one is always a
|
|
990 quote ('). For instance, \"vsq'\" input $AVP(B. Exceptions are these
|
|
991 letters. You can input them just by typing a single key.
|
|
992
|
|
993 Character: $A04(B $A2;(B $A4N(B $A5D(B $A6~(B $A7"(B $A8v(B $A:M(B $A3v(B $A<0(B $A?I(B $AAK(B $AC;(B
|
|
994 Key: a b c d e f g h i j k l m
|
|
995 Character: $ADc(B $AE7(B $AF,(B $AF_(B $AHK(B $AH}(B $AK{(B $AJG(B $AWE(B $ANR(B $AP!(B $AR;(B $ATZ(B
|
|
996 Key: n o p q r s t u v w x y z
|
|
997
|
|
998 To input two-letter words, you have two ways. One way is to type 4
|
|
999 keys, two for the first Pinyin, two for the second Pinyin. For
|
|
1000 instance, \"vsgo\" inputs $AVP9z(B. Another way is to type 3 keys: 2
|
|
1001 initials of two letters, and quote ('). For instance, \"vg'\" also
|
|
1002 inputs $AVP9z(B.
|
|
1003
|
|
1004 To input three-letter words, you type 4 keys: initials of three
|
|
1005 letters, and the last is quote ('). For instance, \"bjy'2\" inputs $A11(B
|
|
1006 $A>)Q<(B (the last `2' is to select one of the candidates).
|
|
1007
|
|
1008 To input words of more than three letters, you type 4 keys, initials
|
|
1009 of the first three letters and the last letter. For instance,
|
|
1010 \"bjdt\" inputs $A11>)5gJSL((B.
|
|
1011
|
|
1012 To input symbols and punctuations, type `/' followed by one of `a' to
|
|
1013 `z', then select one of the candidates."))
|
|
1014 (insert " '((\"\C-?\" . quail-delete-last-char)
|
|
1015 (\".\" . quail-next-translation)
|
|
1016 (\"[\" . quail-next-translation)
|
|
1017 (\",\" . quail-prev-translation)
|
|
1018 (\"]\" . quail-prev-translation))
|
|
1019 nil nil nil nil)\n\n")
|
|
1020 (insert "(quail-define-rules\n")
|
|
1021 (dolist (elt dic)
|
|
1022 (insert (format "(%S %S)\n" (car elt) (cdr elt))))
|
|
1023 (insert ")\n")))
|
|
1024
|
37875
|
1025 ;; Generate the code for a Quail package in the current buffer from a
|
|
1026 ;; CTLau or CTLau-b5 dictionary in the buffer DICBUF. The input
|
|
1027 ;; method name of the Quail package is NAME, and the title string is
|
|
1028 ;; TITLE. DESCRIPTION is the string shown by describe-input-method.
|
|
1029
|
|
1030 (defun ctlau-converter (dicbuf name title description)
|
|
1031 (goto-char (point-max))
|
|
1032 (insert (format "%S\n" description))
|
|
1033 (insert " '((\"\C-?\" . quail-delete-last-char)
|
|
1034 (\".\" . quail-next-translation)
|
|
1035 (\">\" . quail-next-translation)
|
|
1036 (\",\" . quail-prev-translation)
|
|
1037 (\"<\" . quail-prev-translation))
|
|
1038 nil nil nil nil)\n\n")
|
|
1039 (insert "(quail-define-rules\n")
|
|
1040 (let (dicbuf-start dicbuf-end key-start key (pos (point)))
|
|
1041 ;; Find the dictionary, which starts below a horizontal rule and
|
|
1042 ;; ends at the second to last line in the HTML file.
|
|
1043 (save-excursion
|
|
1044 (set-buffer dicbuf)
|
|
1045 (goto-char (point-min))
|
|
1046 (search-forward "#\n#<hr>\n")
|
|
1047 (setq dicbuf-start (point))
|
|
1048 (goto-char (point-max))
|
|
1049 (forward-line -1)
|
|
1050 (setq dicbuf-end (point)))
|
|
1051 (insert-buffer-substring dicbuf dicbuf-start dicbuf-end)
|
|
1052 ;; CTLau-b5.html contains characters (0xa1 0xbc) which show up as
|
|
1053 ;; hollow boxes when the original characters in CTLau.html from
|
|
1054 ;; which the file is converted have no Big5 equivalent. Go
|
|
1055 ;; through and delete them.
|
|
1056 (goto-char pos)
|
|
1057 (while (search-forward "$(0!{(B" nil t)
|
|
1058 (delete-char -1))
|
|
1059 ;; Uppercase keys in dictionary need to be downcased. Backslashes
|
|
1060 ;; at the beginning of keys need to be turned into double
|
|
1061 ;; backslashes.
|
|
1062 (goto-char pos)
|
|
1063 (while (not (eobp))
|
|
1064 (insert "(\"")
|
|
1065 (if (char-equal (following-char) ?\\)
|
|
1066 (insert "\\"))
|
|
1067 (setq key-start (point))
|
|
1068 (skip-chars-forward "\\\\A-Z")
|
|
1069 (downcase-region key-start (point))
|
|
1070 (insert "\" \"")
|
|
1071 (delete-char 1)
|
|
1072 (end-of-line)
|
|
1073 (insert "\")")
|
|
1074 (forward-line 1)))
|
|
1075 (insert ")\n"))
|
|
1076
|
|
1077 (defun ctlau-gb-converter (dicbuf name title)
|
|
1078 (ctlau-converter dicbuf name title
|
|
1079 "$A::WVJdHk!KAuN}OiJ=TARt!K(B
|
|
1080
|
|
1081 $AAuN}OiJ=TASoW"Rt7=08(B
|
|
1082 Sidney Lau's Cantonese transcription scheme as described in his book
|
|
1083 \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
|
|
1084 This file was prepared by Fung Fung Lee ($A@n7c7e(B).
|
|
1085 Originally converted from CTCPS3.tit
|
|
1086 Last modified: June 2, 1993.
|
|
1087
|
37888
|
1088 Some infrequent GB characters are accessed by typing \\, followed by
|
37875
|
1089 the Cantonese romanization of the respective radical ($A2?JW(B)."))
|
|
1090
|
|
1091 (defun ctlau-b5-converter (dicbuf name title)
|
|
1092 (ctlau-converter dicbuf name title
|
|
1093 "$(0KH)tTT&,!(N,Tg>A*#Gn5x!((B
|
|
1094
|
|
1095 $(0N,Tg>A*#GnM$0D5x'J7{(B
|
|
1096 Sidney Lau's Cantonese transcription scheme as described in his book
|
|
1097 \"Elementary Cantonese\", The Government Printer, Hong Kong, 1972.
|
|
1098 This file was prepared by Fung Fung Lee ($(0,XFS76(B).
|
|
1099 Originally converted from CTCPS3.tit
|
|
1100 Last modified: June 2, 1993.
|
|
1101
|
37888
|
1102 Some infrequent characters are accessed by typing \\, followed by
|
37875
|
1103 the Cantonese romanization of the respective radical ($(0?f5}(B)."))
|
|
1104
|
37116
|
1105 (defun miscdic-convert (filename &optional dirname)
|
49598
|
1106 "Convert a dictionary file FILENAME into a Quail package.
|
37116
|
1107 Optional argument DIRNAME if specified is the directory name under which
|
|
1108 the generated Quail package is saved."
|
|
1109 (interactive "FInput method dictionary file: ")
|
|
1110 (or (file-readable-p filename)
|
|
1111 (error "%s does not exist" filename))
|
|
1112 (let ((tail quail-misc-package-ext-info)
|
|
1113 (default-buffer-file-coding-system 'iso-2022-7bit)
|
|
1114 slot
|
|
1115 name title dicfile coding quailfile converter copyright
|
|
1116 dicbuf)
|
|
1117 (while tail
|
55280
|
1118 (setq slot (car tail)
|
|
1119 dicfile (nth 2 slot)
|
|
1120 quailfile (nth 4 slot))
|
|
1121 (when (and (or (string-match dicfile filename)
|
|
1122 ;; MS-DOS filesystem truncates file names to 8+3
|
|
1123 ;; limits, so "cangjie-table.cns" becomes
|
|
1124 ;; "cangjie-.cns", and the above string-match
|
|
1125 ;; fails. Give DOS users a chance...
|
|
1126 (and (fboundp 'msdos-long-file-names)
|
|
1127 (not (msdos-long-file-names))
|
|
1128 (string-match (dos-8+3-filename dicfile) filename)))
|
|
1129 (if (file-newer-than-file-p
|
|
1130 filename (expand-file-name quailfile dirname))
|
|
1131 t
|
|
1132 (message "%s is up to date" quailfile)
|
|
1133 nil))
|
|
1134 (setq name (car slot)
|
37116
|
1135 title (nth 1 slot)
|
|
1136 coding (nth 3 slot)
|
|
1137 converter (nth 5 slot)
|
|
1138 copyright (nth 6 slot))
|
|
1139 (message "Converting %s to %s..." dicfile quailfile)
|
|
1140 (with-temp-file (expand-file-name quailfile dirname)
|
|
1141 (set-buffer-file-coding-system 'iso-2022-7bit)
|
|
1142 (insert ";; Quail package `" name "' -*- coding:iso-2022-7bit; -*-\n")
|
|
1143 (insert ";; Generated by the command `miscdic-convert'\n")
|
|
1144 (insert ";; Date: " (current-time-string) "\n")
|
|
1145 (insert ";; Source dictionary file: " dicfile "\n")
|
|
1146 (insert ";; Copyright notice of the source file\n")
|
|
1147 (insert ";;------------------------------------------------------\n")
|
|
1148 (insert copyright "\n")
|
|
1149 (insert ";;------------------------------------------------------\n")
|
|
1150 (insert "\n")
|
|
1151 (insert ";;; Code:\n\n")
|
|
1152 (insert "(require 'quail)\n")
|
|
1153 (insert "(quail-define-package \"" name "\" \""
|
37146
|
1154 (if (eq coding 'big5) "Chinese-BIG5" "Chinese-CNS")
|
37116
|
1155 "\" \"" title "\" t\n")
|
|
1156 (let* ((coding-system-for-read coding)
|
|
1157 (dicbuf (find-file-noselect filename)))
|
|
1158 (funcall converter dicbuf name title)
|
|
1159 (kill-buffer dicbuf)))
|
|
1160 (message "Converting %s to %s...done" dicfile quailfile))
|
|
1161 (setq tail (cdr tail)))))
|
|
1162
|
|
1163 (defun batch-miscdic-convert ()
|
|
1164 "Run `miscdic-convert' on the files remaing on the command line.
|
|
1165 Use this from the command line, with `-batch';
|
|
1166 it won't work in an interactive Emacs.
|
|
1167 If there's an argument \"-dir\", the next argument specifies a directory
|
|
1168 to store generated Quail packages."
|
|
1169 (defvar command-line-args-left) ; Avoid compiler warning.
|
|
1170 (if (not noninteractive)
|
|
1171 (error "`batch-miscdic-convert' should be used only with -batch"))
|
|
1172 (let ((dir default-directory)
|
|
1173 filename)
|
|
1174 (while command-line-args-left
|
|
1175 (if (string= (car command-line-args-left) "-dir")
|
|
1176 (progn
|
|
1177 (setq command-line-args-left (cdr command-line-args-left))
|
48666
|
1178 (setq dir (car command-line-args-left))
|
|
1179 (setq command-line-args-left (cdr command-line-args-left))))
|
37116
|
1180 (setq filename (car command-line-args-left)
|
|
1181 command-line-args-left (cdr command-line-args-left))
|
|
1182 (if (file-directory-p filename)
|
|
1183 (dolist (file (directory-files filename t nil t))
|
|
1184 (miscdic-convert file dir))
|
|
1185 (miscdic-convert filename dir))))
|
|
1186 (kill-emacs 0))
|
|
1187
|
36684
|
1188 ;; Local Variables:
|
|
1189 ;; coding: iso-2022-7bit
|
|
1190 ;; End:
|
38414
|
1191
|
52401
|
1192 ;;; arch-tag: 8ad478b2-a985-4da2-b47f-d8ee5d7c24a3
|
38414
|
1193 ;;; titdic-cnv.el ends here
|