comparison lisp/international/utf-8.el @ 56562:9274a15c1400

(utf-translate-cjk-mode): Doc fix.
author Luc Teirlinck <teirllm@auburn.edu>
date Sat, 31 Jul 2004 03:29:07 +0000
parents 4ec2da03a87c
children 752ef76fcc08
comparison
equal deleted inserted replaced
56561:9e3e4cc5d4ad 56562:9274a15c1400
271 (not utf-translate-cjk-lang-env) 271 (not utf-translate-cjk-lang-env)
272 (utf-translate-cjk-substitutable-p code-point)) 272 (utf-translate-cjk-substitutable-p code-point))
273 (utf-translate-cjk-load-tables)) 273 (utf-translate-cjk-load-tables))
274 (gethash code-point 274 (gethash code-point
275 (get 'utf-subst-table-for-decode 'translation-hash-table))) 275 (get 'utf-subst-table-for-decode 'translation-hash-table)))
276 276
277 277
278 (defun utf-lookup-subst-table-for-encode (char) 278 (defun utf-lookup-subst-table-for-encode (char)
279 (if (and utf-translate-cjk-mode 279 (if (and utf-translate-cjk-mode
280 (not utf-translate-cjk-lang-env) 280 (not utf-translate-cjk-lang-env)
281 (memq (char-charset char) utf-translate-cjk-charsets)) 281 (memq (char-charset char) utf-translate-cjk-charsets))
282 (utf-translate-cjk-load-tables)) 282 (utf-translate-cjk-load-tables))
283 (gethash char 283 (gethash char
284 (get 'utf-subst-table-for-encode 'translation-hash-table))) 284 (get 'utf-subst-table-for-encode 'translation-hash-table)))
285 285
286 (define-minor-mode utf-translate-cjk-mode 286 (define-minor-mode utf-translate-cjk-mode
287 "Whether the UTF based coding systems should decode/encode CJK characters. 287 "Toggle whether UTF based coding systems de/encode CJK characters.
288 If ARG is an integer, enable if ARG is positive and disable if
289 zero or negative. This is a minor mode.
288 Enabling this allows the coding systems mule-utf-8, 290 Enabling this allows the coding systems mule-utf-8,
289 mule-utf-16le and mule-utf-16be to encode characters in the charsets 291 mule-utf-16le and mule-utf-16be to encode characters in the charsets
290 `korean-ksc5601', `chinese-gb2312', `chinese-big5-1', 292 `korean-ksc5601', `chinese-gb2312', `chinese-big5-1',
291 `chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to 293 `chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to
292 decode the corresponding unicodes into such characters. 294 decode the corresponding unicodes into such characters.
294 Where the charsets overlap, the one preferred for decoding is chosen 296 Where the charsets overlap, the one preferred for decoding is chosen
295 according to the language environment in effect when this option is 297 according to the language environment in effect when this option is
296 turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for 298 turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for
297 Chinese-Big5 and jisx for other environments. 299 Chinese-Big5 and jisx for other environments.
298 300
299 This option is on by default. If you are not interested in CJK 301 This mode is on by default. If you are not interested in CJK
300 characters and want to avoid some overhead on encoding/decoding 302 characters and want to avoid some overhead on encoding/decoding
301 by the above coding systems, you can customize this option to nil." 303 by the above coding systems, you can customize the user option
304 `utf-translate-cjk-mode' to nil."
302 :init-value t 305 :init-value t
303 :version "21.4" 306 :version "21.4"
304 :type 'boolean 307 :type 'boolean
305 :group 'mule 308 :group 'mule
306 :global t 309 :global t
603 606
604 (define-ccl-program ccl-mule-utf-8-encode-untrans 607 (define-ccl-program ccl-mule-utf-8-encode-untrans
605 ;; UTF-8 decoder generates an UTF-8 sequence represented by a 608 ;; UTF-8 decoder generates an UTF-8 sequence represented by a
606 ;; sequence eight-bit-control/graphic chars for an untranslatable 609 ;; sequence eight-bit-control/graphic chars for an untranslatable
607 ;; character and an invalid byte. 610 ;; character and an invalid byte.
608 ;; 611 ;;
609 ;; This CCL parses that sequence (the first byte is already in r1), 612 ;; This CCL parses that sequence (the first byte is already in r1),
610 ;; writes out the original bytes of that sequence, and sets r5 to 613 ;; writes out the original bytes of that sequence, and sets r5 to
611 ;; -1. 614 ;; -1.
612 ;; 615 ;;
613 ;; If the eight-bit-control/graphic sequence is shorter than what r1 616 ;; If the eight-bit-control/graphic sequence is shorter than what r1
622 `(0 625 `(0
623 (;; Read the 2nd byte. 626 (;; Read the 2nd byte.
624 (read-multibyte-character r5 r6) 627 (read-multibyte-character r5 r6)
625 (r0 = (r5 != ,(charset-id 'eight-bit-control))) 628 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
626 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) 629 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
627 ((write r1) ; invalid UTF-8 630 ((write r1) ; invalid UTF-8
628 (r1 = -1) 631 (r1 = -1)
629 (end))) 632 (end)))
630 633
631 (if (r1 <= #xC3) 634 (if (r1 <= #xC3)
632 ;; 2-byte sequence for an originally invalid byte. 635 ;; 2-byte sequence for an originally invalid byte.
639 (write r1 r6) 642 (write r1 r6)
640 (r2 = r1) 643 (r2 = r1)
641 (r1 = -1) 644 (r1 = -1)
642 ;; Read the 3rd byte. 645 ;; Read the 3rd byte.
643 (read-multibyte-character r5 r6) 646 (read-multibyte-character r5 r6)
644 (r0 = (r5 != ,(charset-id 'eight-bit-control))) 647 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
645 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) 648 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
646 (end)) ; invalid UTF-8 649 (end)) ; invalid UTF-8
647 (write r6) 650 (write r6)
648 (if (r2 < #xF0) 651 (if (r2 < #xF0)
649 ;; 3-byte sequence for an untranslated character. 652 ;; 3-byte sequence for an untranslated character.
650 ((r5 = -1) 653 ((r5 = -1)
651 (end))) 654 (end)))
652 ;; Read the 4th byte. 655 ;; Read the 4th byte.
653 (read-multibyte-character r5 r6) 656 (read-multibyte-character r5 r6)
654 (r0 = (r5 != ,(charset-id 'eight-bit-control))) 657 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
655 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) 658 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
656 (end)) ; invalid UTF-8 659 (end)) ; invalid UTF-8
657 ;; 4-byte sequence for an untranslated character. 660 ;; 4-byte sequence for an untranslated character.
658 (write r6) 661 (write r6)
659 (r5 = -1) 662 (r5 = -1)