emacs: lisp/international/utf-8.el comparison

comparison lisp/international/utf-8.el @ 56562:9274a15c1400

(utf-translate-cjk-mode): Doc fix.

author	Luc Teirlinck <teirllm@auburn.edu>
date	Sat, 31 Jul 2004 03:29:07 +0000
parents	4ec2da03a87c
children	752ef76fcc08

comparison

equal deleted inserted replaced

-:9e3e4cc5d4ad
+:9274a15c1400
 	   (not utf-translate-cjk-lang-env)
 	   (utf-translate-cjk-substitutable-p code-point))
 (utf-translate-cjk-load-tables))
 (gethash code-point
 	   (get 'utf-subst-table-for-decode 'translation-hash-table)))
 (defun utf-lookup-subst-table-for-encode (char)
 (if (and utf-translate-cjk-mode
 	   (not utf-translate-cjk-lang-env)
 	   (memq (char-charset char) utf-translate-cjk-charsets))
 (utf-translate-cjk-load-tables))
 (gethash char
 	   (get 'utf-subst-table-for-encode 'translation-hash-table)))
 (define-minor-mode utf-translate-cjk-mode
-"Whether the UTF based coding systems should decode/encode CJK characters.
+"Toggle whether UTF based coding systems de/encode CJK characters.
+If ARG is an integer, enable if ARG is positive and disable if
+zero or negative.  This is a minor mode.
 Enabling this allows the coding systems mule-utf-8,
 mule-utf-16le and mule-utf-16be to encode characters in the charsets
 `korean-ksc5601', `chinese-gb2312', `chinese-big5-1',
 `chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to
 decode the corresponding unicodes into such characters.
 Where the charsets overlap, the one preferred for decoding is chosen
 according to the language environment in effect when this option is
 turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for
 Chinese-Big5 and jisx for other environments.
-This option is on by default.  If you are not interested in CJK
+This mode is on by default.  If you are not interested in CJK
 characters and want to avoid some overhead on encoding/decoding
-by the above coding systems, you can customize this option to nil."
+by the above coding systems, you can customize the user option
+`utf-translate-cjk-mode' to nil."
 :init-value t
 :version "21.4"
 :type 'boolean
 :group 'mule
 :global t
 (define-ccl-program ccl-mule-utf-8-encode-untrans
 ;; UTF-8 decoder generates an UTF-8 sequence represented by a
 ;; sequence eight-bit-control/graphic chars for an untranslatable
 ;; character and an invalid byte.
 ;;
 ;; This CCL parses that sequence (the first byte is already in r1),
 ;; writes out the original bytes of that sequence, and sets r5 to
 ;; -1.
 ;;
 ;; If the eight-bit-control/graphic sequence is shorter than what r1
 `(0
 (;; Read the 2nd byte.
 (read-multibyte-character r5 r6)
 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
 	 ((write r1)			; invalid UTF-8
 	  (r1 = -1)
 	  (end)))
 (if (r1 <= #xC3)
 	 ;; 2-byte sequence for an originally invalid byte.
 (write r1 r6)
 (r2 = r1)
 (r1 = -1)
 ;; Read the 3rd byte.
 (read-multibyte-character r5 r6)
 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
 	 (end))				; invalid UTF-8
 (write r6)
 (if (r2 < #xF0)
 	 ;; 3-byte sequence for an untranslated character.
 	 ((r5 = -1)
 	  (end)))
 ;; Read the 4th byte.
 (read-multibyte-character r5 r6)
 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
 	 (end))			; invalid UTF-8
 ;; 4-byte sequence for an untranslated character.
 (write r6)
 (r5 = -1)

Mercurial > emacs

comparison lisp/international/utf-8.el @ 56562:9274a15c1400