Mercurial > emacs
comparison lisp/international/utf-8.el @ 56562:9274a15c1400
(utf-translate-cjk-mode): Doc fix.
author | Luc Teirlinck <teirllm@auburn.edu> |
---|---|
date | Sat, 31 Jul 2004 03:29:07 +0000 |
parents | 4ec2da03a87c |
children | 752ef76fcc08 |
comparison
equal
deleted
inserted
replaced
56561:9e3e4cc5d4ad | 56562:9274a15c1400 |
---|---|
271 (not utf-translate-cjk-lang-env) | 271 (not utf-translate-cjk-lang-env) |
272 (utf-translate-cjk-substitutable-p code-point)) | 272 (utf-translate-cjk-substitutable-p code-point)) |
273 (utf-translate-cjk-load-tables)) | 273 (utf-translate-cjk-load-tables)) |
274 (gethash code-point | 274 (gethash code-point |
275 (get 'utf-subst-table-for-decode 'translation-hash-table))) | 275 (get 'utf-subst-table-for-decode 'translation-hash-table))) |
276 | 276 |
277 | 277 |
278 (defun utf-lookup-subst-table-for-encode (char) | 278 (defun utf-lookup-subst-table-for-encode (char) |
279 (if (and utf-translate-cjk-mode | 279 (if (and utf-translate-cjk-mode |
280 (not utf-translate-cjk-lang-env) | 280 (not utf-translate-cjk-lang-env) |
281 (memq (char-charset char) utf-translate-cjk-charsets)) | 281 (memq (char-charset char) utf-translate-cjk-charsets)) |
282 (utf-translate-cjk-load-tables)) | 282 (utf-translate-cjk-load-tables)) |
283 (gethash char | 283 (gethash char |
284 (get 'utf-subst-table-for-encode 'translation-hash-table))) | 284 (get 'utf-subst-table-for-encode 'translation-hash-table))) |
285 | 285 |
286 (define-minor-mode utf-translate-cjk-mode | 286 (define-minor-mode utf-translate-cjk-mode |
287 "Whether the UTF based coding systems should decode/encode CJK characters. | 287 "Toggle whether UTF based coding systems de/encode CJK characters. |
288 If ARG is an integer, enable if ARG is positive and disable if | |
289 zero or negative. This is a minor mode. | |
288 Enabling this allows the coding systems mule-utf-8, | 290 Enabling this allows the coding systems mule-utf-8, |
289 mule-utf-16le and mule-utf-16be to encode characters in the charsets | 291 mule-utf-16le and mule-utf-16be to encode characters in the charsets |
290 `korean-ksc5601', `chinese-gb2312', `chinese-big5-1', | 292 `korean-ksc5601', `chinese-gb2312', `chinese-big5-1', |
291 `chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to | 293 `chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to |
292 decode the corresponding unicodes into such characters. | 294 decode the corresponding unicodes into such characters. |
294 Where the charsets overlap, the one preferred for decoding is chosen | 296 Where the charsets overlap, the one preferred for decoding is chosen |
295 according to the language environment in effect when this option is | 297 according to the language environment in effect when this option is |
296 turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for | 298 turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for |
297 Chinese-Big5 and jisx for other environments. | 299 Chinese-Big5 and jisx for other environments. |
298 | 300 |
299 This option is on by default. If you are not interested in CJK | 301 This mode is on by default. If you are not interested in CJK |
300 characters and want to avoid some overhead on encoding/decoding | 302 characters and want to avoid some overhead on encoding/decoding |
301 by the above coding systems, you can customize this option to nil." | 303 by the above coding systems, you can customize the user option |
304 `utf-translate-cjk-mode' to nil." | |
302 :init-value t | 305 :init-value t |
303 :version "21.4" | 306 :version "21.4" |
304 :type 'boolean | 307 :type 'boolean |
305 :group 'mule | 308 :group 'mule |
306 :global t | 309 :global t |
603 | 606 |
604 (define-ccl-program ccl-mule-utf-8-encode-untrans | 607 (define-ccl-program ccl-mule-utf-8-encode-untrans |
605 ;; UTF-8 decoder generates an UTF-8 sequence represented by a | 608 ;; UTF-8 decoder generates an UTF-8 sequence represented by a |
606 ;; sequence eight-bit-control/graphic chars for an untranslatable | 609 ;; sequence eight-bit-control/graphic chars for an untranslatable |
607 ;; character and an invalid byte. | 610 ;; character and an invalid byte. |
608 ;; | 611 ;; |
609 ;; This CCL parses that sequence (the first byte is already in r1), | 612 ;; This CCL parses that sequence (the first byte is already in r1), |
610 ;; writes out the original bytes of that sequence, and sets r5 to | 613 ;; writes out the original bytes of that sequence, and sets r5 to |
611 ;; -1. | 614 ;; -1. |
612 ;; | 615 ;; |
613 ;; If the eight-bit-control/graphic sequence is shorter than what r1 | 616 ;; If the eight-bit-control/graphic sequence is shorter than what r1 |
622 `(0 | 625 `(0 |
623 (;; Read the 2nd byte. | 626 (;; Read the 2nd byte. |
624 (read-multibyte-character r5 r6) | 627 (read-multibyte-character r5 r6) |
625 (r0 = (r5 != ,(charset-id 'eight-bit-control))) | 628 (r0 = (r5 != ,(charset-id 'eight-bit-control))) |
626 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) | 629 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) |
627 ((write r1) ; invalid UTF-8 | 630 ((write r1) ; invalid UTF-8 |
628 (r1 = -1) | 631 (r1 = -1) |
629 (end))) | 632 (end))) |
630 | 633 |
631 (if (r1 <= #xC3) | 634 (if (r1 <= #xC3) |
632 ;; 2-byte sequence for an originally invalid byte. | 635 ;; 2-byte sequence for an originally invalid byte. |
639 (write r1 r6) | 642 (write r1 r6) |
640 (r2 = r1) | 643 (r2 = r1) |
641 (r1 = -1) | 644 (r1 = -1) |
642 ;; Read the 3rd byte. | 645 ;; Read the 3rd byte. |
643 (read-multibyte-character r5 r6) | 646 (read-multibyte-character r5 r6) |
644 (r0 = (r5 != ,(charset-id 'eight-bit-control))) | 647 (r0 = (r5 != ,(charset-id 'eight-bit-control))) |
645 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) | 648 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) |
646 (end)) ; invalid UTF-8 | 649 (end)) ; invalid UTF-8 |
647 (write r6) | 650 (write r6) |
648 (if (r2 < #xF0) | 651 (if (r2 < #xF0) |
649 ;; 3-byte sequence for an untranslated character. | 652 ;; 3-byte sequence for an untranslated character. |
650 ((r5 = -1) | 653 ((r5 = -1) |
651 (end))) | 654 (end))) |
652 ;; Read the 4th byte. | 655 ;; Read the 4th byte. |
653 (read-multibyte-character r5 r6) | 656 (read-multibyte-character r5 r6) |
654 (r0 = (r5 != ,(charset-id 'eight-bit-control))) | 657 (r0 = (r5 != ,(charset-id 'eight-bit-control))) |
655 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) | 658 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) |
656 (end)) ; invalid UTF-8 | 659 (end)) ; invalid UTF-8 |
657 ;; 4-byte sequence for an untranslated character. | 660 ;; 4-byte sequence for an untranslated character. |
658 (write r6) | 661 (write r6) |
659 (r5 = -1) | 662 (r5 = -1) |