Mercurial > emacs
comparison lisp/language/chinese.el @ 42150:35e8e47e376b
Implementing euc-tw encoding.
Improving doc strings.
author | Werner LEMBERG <wl@gnu.org> |
---|---|
date | Tue, 18 Dec 2001 17:46:16 +0000 |
parents | 67b464da13ec |
children | f43c7c8adcdf fad0f879877f |
comparison
equal
deleted
inserted
replaced
42149:111acebcb4e0 | 42150:35e8e47e376b |
---|---|
33 ;;; Chinese (general) | 33 ;;; Chinese (general) |
34 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 34 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
35 | 35 |
36 (make-coding-system | 36 (make-coding-system |
37 'iso-2022-cn 2 ?C | 37 'iso-2022-cn 2 ?C |
38 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)" | 38 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)." |
39 '(ascii | 39 '(ascii |
40 (nil chinese-gb2312 chinese-cns11643-1) | 40 (nil chinese-gb2312 chinese-cns11643-1) |
41 (nil chinese-cns11643-2) | 41 (nil chinese-cns11643-2) |
42 nil | 42 nil |
43 nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil | 43 nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil |
47 | 47 |
48 (define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn) | 48 (define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn) |
49 | 49 |
50 (make-coding-system | 50 (make-coding-system |
51 'iso-2022-cn-ext 2 ?C | 51 'iso-2022-cn-ext 2 ?C |
52 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)" | 52 "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)." |
53 '(ascii | 53 '(ascii |
54 (nil chinese-gb2312 chinese-cns11643-1) | 54 (nil chinese-gb2312 chinese-cns11643-1) |
55 (nil chinese-cns11643-2) | 55 (nil chinese-cns11643-2) |
56 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | 56 (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 |
57 chinese-cns11643-6 chinese-cns11643-7) | 57 chinese-cns11643-6 chinese-cns11643-7) |
67 ;;; Chinese GB2312 (simplified) | 67 ;;; Chinese GB2312 (simplified) |
68 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 68 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
69 | 69 |
70 (make-coding-system | 70 (make-coding-system |
71 'chinese-iso-8bit 2 ?c | 71 'chinese-iso-8bit 2 ?c |
72 "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)" | 72 "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:GB2312)." |
73 '(ascii chinese-gb2312 nil nil | 73 '(ascii chinese-gb2312 nil nil |
74 nil ascii-eol ascii-cntl nil nil nil nil) | 74 nil ascii-eol ascii-cntl nil nil nil nil) |
75 '((safe-charsets ascii chinese-gb2312) | 75 '((safe-charsets ascii chinese-gb2312) |
76 (mime-charset . gb2312))) | 76 (mime-charset . gb2312))) |
77 | 77 |
81 (define-coding-system-alias 'cn-gb 'chinese-iso-8bit) | 81 (define-coding-system-alias 'cn-gb 'chinese-iso-8bit) |
82 (define-coding-system-alias 'gb2312 'chinese-iso-8bit) | 82 (define-coding-system-alias 'gb2312 'chinese-iso-8bit) |
83 | 83 |
84 (make-coding-system | 84 (make-coding-system |
85 'chinese-hz 0 ?z | 85 'chinese-hz 0 ?z |
86 "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)" | 86 "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)." |
87 nil | 87 nil |
88 '((safe-charsets ascii chinese-gb2312) | 88 '((safe-charsets ascii chinese-gb2312) |
89 (mime-charset . hz-gb-2312) | 89 (mime-charset . hz-gb-2312) |
90 (post-read-conversion . post-read-decode-hz) | 90 (post-read-conversion . post-read-decode-hz) |
91 (pre-write-conversion . pre-write-encode-hz))) | 91 (pre-write-conversion . pre-write-encode-hz))) |
124 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 124 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
125 ;; Chinese BIG5 (traditional) | 125 ;; Chinese BIG5 (traditional) |
126 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 126 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
127 | 127 |
128 (make-coding-system | 128 (make-coding-system |
129 'chinese-big5 3 ?B "BIG5 8-bit encoding for Chinese (MIME:Big5)" | 129 'chinese-big5 3 ?B |
130 "BIG5 8-bit encoding for Chinese (MIME:Big5)." | |
130 nil | 131 nil |
131 '((safe-charsets ascii chinese-big5-1 chinese-big5-2) | 132 '((safe-charsets ascii chinese-big5-1 chinese-big5-2) |
132 (mime-charset . big5) | 133 (mime-charset . big5) |
133 (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char) | 134 (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char) |
134 (chinese-big5-2 "BIG5" encode-big5-char)))) | 135 (chinese-big5-2 "BIG5" encode-big5-char)))) |
166 | 167 |
167 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 168 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
168 ;; Chinese CNS11643 (traditional) | 169 ;; Chinese CNS11643 (traditional) |
169 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 170 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
170 | 171 |
172 (defvar big5-to-cns (make-translation-table) | |
173 "Translation table for encoding to `euc-tw'.") | |
174 ;; Could have been done by china-util loaded before. | |
175 (unless (get 'big5-to-cns 'translation-table) | |
176 (define-translation-table 'big5-to-cns big5-to-cns)) | |
177 | |
178 (define-ccl-program ccl-decode-euc-tw | |
179 ;; CNS plane 1 needs either two or four bytes in EUC-TW encoding; | |
180 ;; CNS planes 2 to 7 always need four bytes. In internal encoding of | |
181 ;; Emacs, CNS planes 1 and 2 need three bytes, and planes 3 to 7 need | |
182 ;; four bytes. Thus a buffer magnification value of 2 (for both | |
183 ;; encoding and decoding) is sufficient. | |
184 `(2 | |
185 ;; we don't have enough registers to hold all charset-ids | |
186 ((r4 = ,(charset-id 'chinese-cns11643-1)) | |
187 (r5 = ,(charset-id 'chinese-cns11643-2)) | |
188 (r6 = ,(charset-id 'chinese-cns11643-3)) | |
189 (loop | |
190 (read-if (r0 < #x80) | |
191 ;; ASCII | |
192 (write-repeat r0) | |
193 ;; not ASCII | |
194 (if (r0 == #x8E) | |
195 ;; single shift | |
196 (read-if (r1 < #xA1) | |
197 ;; invalid byte | |
198 ((write r0) | |
199 (write-repeat r1)) | |
200 (if (r1 > #xA7) | |
201 ;; invalid plane | |
202 ((write r0) | |
203 (write-repeat r1)) | |
204 ;; OK, we have a plane | |
205 (read-if (r2 < #xA1) | |
206 ;; invalid first byte | |
207 ((write r0 r1) | |
208 (write-repeat r2)) | |
209 (read-if (r3 < #xA1) | |
210 ;; invalid second byte | |
211 ((write r0 r1 r2) | |
212 (write-repeat r3)) | |
213 ;; CNS 1-7, finally | |
214 ((branch (r1 - #xA1) | |
215 (r1 = r4) | |
216 (r1 = r5) | |
217 (r1 = r6) | |
218 (r1 = ,(charset-id 'chinese-cns11643-4)) | |
219 (r1 = ,(charset-id 'chinese-cns11643-5)) | |
220 (r1 = ,(charset-id 'chinese-cns11643-6)) | |
221 (r1 = ,(charset-id 'chinese-cns11643-7))) | |
222 (r2 = ((((r2 - #x80) << 7) + r3) - #x80)) | |
223 (write-multibyte-character r1 r2) | |
224 (repeat)))))) | |
225 ;; standard EUC | |
226 (if (r0 < #xA1) | |
227 ;; invalid first byte | |
228 (write-repeat r0) | |
229 (read-if (r1 < #xA1) | |
230 ;; invalid second byte | |
231 ((write r0) | |
232 (write-repeat r1)) | |
233 ;; CNS 1, finally | |
234 ((r1 = ((((r0 - #x80) << 7) + r1) - #x80)) | |
235 (write-multibyte-character r4 r1) | |
236 (repeat))))))))) | |
237 "CCL program to decode EUC-TW encoding." | |
238 ) | |
239 | |
240 (define-ccl-program ccl-encode-euc-tw | |
241 `(2 | |
242 ;; we don't have enough registers to hold all charset-ids | |
243 ((r2 = ,(charset-id 'ascii)) | |
244 (r3 = ,(charset-id 'chinese-big5-1)) | |
245 (r4 = ,(charset-id 'chinese-big5-2)) | |
246 (r5 = ,(charset-id 'chinese-cns11643-1)) | |
247 (r6 = ,(charset-id 'chinese-cns11643-2)) | |
248 (loop | |
249 (read-multibyte-character r0 r1) | |
250 (if (r0 == r2) | |
251 (write-repeat r1) | |
252 (;; Big 5 encoded characters are first translated to CNS | |
253 (if (r0 == r3) | |
254 (translate-character big5-to-cns r0 r1) | |
255 (if (r0 == r4) | |
256 (translate-character big5-to-cns r0 r1))) | |
257 (if (r0 == r5) | |
258 (r0 = #xA1) | |
259 (if (r0 == r6) | |
260 (r0 = #xA2) | |
261 (if (r0 == ,(charset-id 'chinese-cns11643-3)) | |
262 (r0 = #xA3) | |
263 (if (r0 == ,(charset-id 'chinese-cns11643-4)) | |
264 (r0 = #xA4) | |
265 (if (r0 == ,(charset-id 'chinese-cns11643-5)) | |
266 (r0 = #xA5) | |
267 (if (r0 == ,(charset-id 'chinese-cns11643-6)) | |
268 (r0 = #xA6) | |
269 (if (r0 == ,(charset-id 'chinese-cns11643-7)) | |
270 (r0 = #xA7) | |
271 ;; not CNS. We use a dummy character which | |
272 ;; can't occur in EUC-TW encoding to indicate | |
273 ;; this. | |
274 (write-repeat #xFF)))))))))) | |
275 (if (r0 != #xA1) | |
276 ;; single shift and CNS plane | |
277 ((write #x8E) | |
278 (write r0))) | |
279 (write ((r1 >> 7) + #x80)) | |
280 (write ((r1 % #x80) + #x80)) | |
281 (repeat)))) | |
282 "CCL program to encode EUC-TW encoding." | |
283 ) | |
284 | |
285 (defun euc-tw-pre-write-conversion (beg end) | |
286 "Semi-dummy pre-write function effectively to autoload china-util." | |
287 ;; Ensure translation table is loaded. | |
288 (require 'china-util) | |
289 ;; Don't do this again. | |
290 (coding-system-put 'euc-tw 'pre-write-conversion nil) | |
291 nil) | |
292 | |
293 (make-coding-system | |
294 'euc-tw 4 ?Z | |
295 "ISO 2022 based EUC encoding for Chinese CNS11643. | |
296 Big5 encoding is accepted for input also (which is then converted to CNS)." | |
297 '(ccl-decode-euc-tw . ccl-encode-euc-tw) | |
298 '((safe-charsets ascii | |
299 chinese-big5-1 | |
300 chinese-big5-2 | |
301 chinese-cns11643-1 | |
302 chinese-cns11643-2 | |
303 chinese-cns11643-3 | |
304 chinese-cns11643-4 | |
305 chinese-cns11643-5 | |
306 chinese-cns11643-6 | |
307 chinese-cns11643-7) | |
308 (valid-codes (0 . 255)) | |
309 (pre-write-conversion . euc-tw-pre-write-conversion))) | |
310 | |
311 (define-coding-system-alias 'euc-taiwan 'euc-tw) | |
312 | |
171 (set-language-info-alist | 313 (set-language-info-alist |
172 "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 | 314 "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 |
173 chinese-cns11643-3 chinese-cns11643-4 | 315 chinese-cns11643-3 chinese-cns11643-4 |
174 chinese-cns11643-5 chinese-cns11643-6 | 316 chinese-cns11643-5 chinese-cns11643-6 |
175 chinese-cns11643-7) | 317 chinese-cns11643-7) |
176 (coding-system iso-2022-cn) | 318 (coding-system iso-2022-cn euc-tw) |
177 (coding-priority iso-2022-cn chinese-big5 chinese-iso-8bit) | 319 (coding-priority iso-2022-cn euc-tw chinese-big5 |
320 chinese-iso-8bit) | |
178 (features china-util) | 321 (features china-util) |
179 (input-method . "chinese-cns-quick") | 322 (input-method . "chinese-cns-quick") |
180 (documentation . "Support for Chinese CNS character sets.")) | 323 (documentation . "\ |
324 Support for Chinese CNS character sets. Note that EUC-TW coding system | |
325 accepts Big5 for input also (which is then converted to CNS).")) | |
181 '("Chinese")) | 326 '("Chinese")) |
182 | 327 |
183 (provide 'chinese) | 328 (provide 'chinese) |
184 | 329 |
185 ;;; chinese.el ends here | 330 ;;; chinese.el ends here |