emacs: lisp/international/mule.el comparison

comparison lisp/international/mule.el @ 90324:b3b869baa1c3

(ctext-non-standard-encodings-alist): Add an entry for gbk-0. Set charset `big5' in the entry for "big5-0". (ctext-post-read-conversion): Use multibyt-char-to-unibyte to read a row 8-bit. (ctext-non-standard-encodings): Initialize to nil. (ctext-non-standard-encodings-table): Return a list instead of char-table. (ctext-pre-write-conversion): Adjusted for the above change.

author	Kenichi Handa <handa@m17n.org>
date	Mon, 27 Feb 2006 01:24:24 +0000
parents	0f622530c46c
children	6e94ff6be848

comparison

equal deleted inserted replaced

-:9ef82b3ff1a7
+:b3b869baa1c3
 (make-obsolete 'set-coding-priority 'set-coding-system-priority "23.1")
 ;;; X selections
 (defvar ctext-non-standard-encodings-alist
-'(("big5-0" big5 2 (chinese-big5-1 chinese-big5-2))
+'(("big5-0" big5 2 big5)
 ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
-("ISO8859-15" iso-8859-15 1 latin-iso8859-15))
+("ISO8859-15" iso-8859-15 1 latin-iso8859-15)
+("gbk-0" gbk 2 chinese-gbk))
 "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
 It controls how extended segments of a compound text are handled
 by the coding system `compound-text-with-extensions'.
 N-OCTET is the number of octets (bytes) that encodes a character
 in the segment.  It can be 0 (meaning the number of octets per
 character is variable), 1, 2, 3, or 4.
 CHARSET is a charater set containing characters that are encoded
-in the segment.  It can be a list of character sets.  It can also
+in the segment.  It can be a list of character sets.
-be a char-table, in which case characters that have non-nil value
-in the char-table are the target.
 On decoding CTEXT, all encoding names listed here are recognized.
 On encoding CTEXT, encoding names in the variable
 `ctext-non-standard-encodings' (which see) and in the information
 listed for the current language environment under the key
 `ctext-non-standard-encodings' are used.")
-(defvar ctext-non-standard-encodings
+(defvar ctext-non-standard-encodings nil
-'("big5-0")
 "List of non-standard encoding names used in extended segments of CTEXT.
 Each element must be one of the names listed in the variable
 `ctext-non-standard-encodings-alist' (which see).")
 (defvar ctext-non-standard-encodings-regexp
 	(while (re-search-forward ctext-non-standard-encodings-regexp
 				  nil 'move)
 	  (setq pos (match-beginning 0))
 	  (if (match-beginning 1)
 	      ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
-	      (let* ((M (char-after (+ pos 4)))
+	      (let* ((M (multibyte-char-to-unibyte (char-after (+ pos 4))))
-		     (L (char-after (+ pos 5)))
+		     (L (multibyte-char-to-unibyte (char-after (+ pos 5))))
 		     (encoding (match-string 2))
 		     (encoding-info (assoc-string
 				     encoding
 				     ctext-non-standard-encodings-alist t))
 		     (coding (if encoding-info
 	    (delete-region pos (+ pos 3))
 	    (decode-coding-region pos (point) 'utf-8))))
 (goto-char (point-min))
 (- (point-max) (point)))))
-;; Return a char table of extended segment usage for each character.
+;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from
-;; Each value of the char table is nil, one of the elements of
+;; `ctext-non-standard-encodings' and a list specified by the key
-;; `ctext-non-standard-encodings-alist', or the symbol `utf-8'.
+;; `ctext-non-standard-encodings' for the currrent language
+;; environment.  CTEXT-USAGE-INFO is one of the element of
+;; `ctext-non-standard-encodings-alist' or nil.  In the former case, a
+;; character in CHARSET is encoded using extended segment.  In the
+;; latter case, a character in CHARSET is encoded using normal ISO2022
+;; designation sequence.  If a character is not in any of CHARSETs, it
+;; is encoded using UTF-8 encoding extention.
 (defun ctext-non-standard-encodings-table ()
-(let ((table (make-char-table 'translation-table)))
+(let (table)
-(aset table (make-char 'mule-unicode-0100-24ff) 'utf-8)
+;; Setup charsets specified in `ctext-non-standard-encodings' and
-(aset table (make-char 'mule-unicode-2500-33ff) 'utf-8)
+;; by the key `ctext-non-standard-encodings' for the current
-(aset table (make-char 'mule-unicode-e000-ffff) 'utf-8)
+;; language environment.
-(dolist (encoding (reverse
+(dolist (encoding (append
-		       (append
+			ctext-non-standard-encodings
 			(get-language-info current-language-environment
-					   'ctext-non-standard-encodings)
+					   'ctext-non-standard-encodings)))
-			ctext-non-standard-encodings)))
 (let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
 	     (charset (nth 3 slot)))
-	(if charset
+	(if (charsetp charset)
-	    (cond ((charsetp charset)
+	    (push (cons charset slot) table)
-		   (aset table (make-char charset) slot))
+	  (dolist (cs charset)
-		  ((listp charset)
+	    (push (cons cs slot) table)))))
-		   (dolist (elt charset)
-		     (aset table (make-char elt) slot)))
+;; Next prepend charsets for ISO2022 designation sequence.
-		  ((char-table-p charset)
+(dolist (charset charset-list)
-		   (map-char-table #'(lambda (k v)
+(let ((final (plist-get (charset-plist charset) :iso-final-char)))
-				   (if (and v (> k 128)) (aset table k slot)))
+	(if (and (integerp final)
-				   charset))))))
+		 (>= final #x40) (<= final #x7e)
+		 ;; Exclude ascii and chinese-cns11643-X.
+		 (not (eq charset 'ascii))
+		 (not (string-match "cns11643" (symbol-name charset))))
+	    (push (cons charset nil) table))))
 table))
 (defun ctext-pre-write-conversion (from to)
 "Encode characters between FROM and TO as Compound Text w/Extended Segments.
 ;; Now we can encode the whole buffer.
 (let ((encoding-table (ctext-non-standard-encodings-table))
 	  last-coding-system-used
 	  last-pos last-encoding-info
-	  encoding-info end-pos)
+	  encoding-info end-pos ch)
 (goto-char (setq last-pos (point-min)))
 (setq end-pos (point-marker))
 (while (re-search-forward "[^\000-\177]+" nil t)
 	;; Found a sequence of non-ASCII characters.
 	(setq last-pos (match-beginning 0)
-	      last-encoding-info (aref encoding-table (char-after last-pos)))
+	      ch (char-after last-pos)
+	      last-encoding-info (catch 'tag
+				   (dolist (elt encoding-table)
+				     (if (encode-char ch (car elt))
+					 (throw 'tag (cdr elt))))
+				   'utf-8))
 	(set-marker end-pos (match-end 0))
 	(goto-char (1+ last-pos))
 	(catch 'tag
 	  (while t
 	    (setq encoding-info
 		  (if (< (point) end-pos)
-		      (aref encoding-table (following-char))))
+		      (catch 'tag
+			(setq ch (following-char))
+			(dolist (elt encoding-table)
+			  (if (encode-char ch (car elt))
+			      (throw 'tag (cdr elt))))
+			'utf-8)))
 	    (unless (eq last-encoding-info encoding-info)
 	      (cond ((consp last-encoding-info)
 		     ;; Encode the previous range using an extended
 		     ;; segment.
 		     (let ((encoding-name (car last-encoding-info))
 		       (encode-coding-region last-pos (point) coding-system)
 		       (setq len (+ (length encoding-name) 1
 				    (- (point) last-pos)))
 		       (save-excursion
 			 (goto-char last-pos)
-			 (insert (string-to-multibyte
+			 (insert (format "\e%%/%d" noctets))
-				  (format "\e%%/%d%c%c%s\002"
+			 (insert-byte (+ (/ len 128) 128) 1)
-					  noctets
+			 (insert-byte (+ (% len 128) 128) 1)
-					  (+ (/ len 128) 128)
+			 (insert encoding-name))))
-					  (+ (% len 128) 128)
-					  encoding-name))))))
 		    ((eq last-encoding-info 'utf-8)
 		     ;; Encode the previous range using UTF-8 encoding
 		     ;; extention.
 		     (encode-coding-region last-pos (point) 'mule-utf-8)
 		     (save-excursion

Mercurial > emacs

comparison lisp/international/mule.el @ 90324:b3b869baa1c3