changeset 90324:b3b869baa1c3

(ctext-non-standard-encodings-alist): Add an entry for gbk-0. Set charset `big5' in the entry for "big5-0". (ctext-post-read-conversion): Use multibyt-char-to-unibyte to read a row 8-bit. (ctext-non-standard-encodings): Initialize to nil. (ctext-non-standard-encodings-table): Return a list instead of char-table. (ctext-pre-write-conversion): Adjusted for the above change.
author Kenichi Handa <handa@m17n.org>
date Mon, 27 Feb 2006 01:24:24 +0000
parents 9ef82b3ff1a7
children 9e9c44bc96d6
files lisp/international/mule.el
diffstat 1 files changed, 54 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/international/mule.el	Mon Feb 27 00:56:37 2006 +0000
+++ b/lisp/international/mule.el	Mon Feb 27 01:24:24 2006 +0000
@@ -1343,9 +1343,10 @@
 ;;; X selections
 
 (defvar ctext-non-standard-encodings-alist
-  '(("big5-0" big5 2 (chinese-big5-1 chinese-big5-2))
+  '(("big5-0" big5 2 big5)
     ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
-    ("ISO8859-15" iso-8859-15 1 latin-iso8859-15))
+    ("ISO8859-15" iso-8859-15 1 latin-iso8859-15)
+    ("gbk-0" gbk 2 chinese-gbk))
   "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
 
 It controls how extended segments of a compound text are handled
@@ -1363,9 +1364,7 @@
 character is variable), 1, 2, 3, or 4.
 
 CHARSET is a charater set containing characters that are encoded
-in the segment.  It can be a list of character sets.  It can also
-be a char-table, in which case characters that have non-nil value
-in the char-table are the target.
+in the segment.  It can be a list of character sets.
 
 On decoding CTEXT, all encoding names listed here are recognized.
 
@@ -1374,8 +1373,7 @@
 listed for the current language environment under the key
 `ctext-non-standard-encodings' are used.")
 
-(defvar ctext-non-standard-encodings
-  '("big5-0")
+(defvar ctext-non-standard-encodings nil
   "List of non-standard encoding names used in extended segments of CTEXT.
 Each element must be one of the names listed in the variable
 `ctext-non-standard-encodings-alist' (which see).")
@@ -1412,8 +1410,8 @@
 	  (setq pos (match-beginning 0))
 	  (if (match-beginning 1)
 	      ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
-	      (let* ((M (char-after (+ pos 4)))
-		     (L (char-after (+ pos 5)))
+	      (let* ((M (multibyte-char-to-unibyte (char-after (+ pos 4))))
+		     (L (multibyte-char-to-unibyte (char-after (+ pos 5))))
 		     (encoding (match-string 2))
 		     (encoding-info (assoc-string
 				     encoding
@@ -1436,32 +1434,41 @@
       (goto-char (point-min))
       (- (point-max) (point)))))
 
-;; Return a char table of extended segment usage for each character.
-;; Each value of the char table is nil, one of the elements of
-;; `ctext-non-standard-encodings-alist', or the symbol `utf-8'.
+;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from
+;; `ctext-non-standard-encodings' and a list specified by the key
+;; `ctext-non-standard-encodings' for the currrent language
+;; environment.  CTEXT-USAGE-INFO is one of the element of
+;; `ctext-non-standard-encodings-alist' or nil.  In the former case, a
+;; character in CHARSET is encoded using extended segment.  In the
+;; latter case, a character in CHARSET is encoded using normal ISO2022
+;; designation sequence.  If a character is not in any of CHARSETs, it
+;; is encoded using UTF-8 encoding extention.
 
 (defun ctext-non-standard-encodings-table ()
-  (let ((table (make-char-table 'translation-table)))
-    (aset table (make-char 'mule-unicode-0100-24ff) 'utf-8)
-    (aset table (make-char 'mule-unicode-2500-33ff) 'utf-8)
-    (aset table (make-char 'mule-unicode-e000-ffff) 'utf-8)
-    (dolist (encoding (reverse
-		       (append
+  (let (table)
+    ;; Setup charsets specified in `ctext-non-standard-encodings' and
+    ;; by the key `ctext-non-standard-encodings' for the current
+    ;; language environment.
+    (dolist (encoding (append
+			ctext-non-standard-encodings
 			(get-language-info current-language-environment
-					   'ctext-non-standard-encodings)
-			ctext-non-standard-encodings)))
+					   'ctext-non-standard-encodings)))
       (let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
 	     (charset (nth 3 slot)))
-	(if charset
-	    (cond ((charsetp charset)
-		   (aset table (make-char charset) slot))
-		  ((listp charset)
-		   (dolist (elt charset)
-		     (aset table (make-char elt) slot)))
-		  ((char-table-p charset)
-		   (map-char-table #'(lambda (k v)
-				   (if (and v (> k 128)) (aset table k slot)))
-				   charset))))))
+	(if (charsetp charset)
+	    (push (cons charset slot) table)
+	  (dolist (cs charset)
+	    (push (cons cs slot) table)))))
+
+    ;; Next prepend charsets for ISO2022 designation sequence.
+    (dolist (charset charset-list)
+      (let ((final (plist-get (charset-plist charset) :iso-final-char)))
+	(if (and (integerp final)
+		 (>= final #x40) (<= final #x7e)
+		 ;; Exclude ascii and chinese-cns11643-X.
+		 (not (eq charset 'ascii))
+		 (not (string-match "cns11643" (symbol-name charset))))
+	    (push (cons charset nil) table))))
     table))
 
 (defun ctext-pre-write-conversion (from to)
@@ -1481,20 +1488,30 @@
     (let ((encoding-table (ctext-non-standard-encodings-table))
 	  last-coding-system-used
 	  last-pos last-encoding-info
-	  encoding-info end-pos)
+	  encoding-info end-pos ch)
       (goto-char (setq last-pos (point-min)))
       (setq end-pos (point-marker))
       (while (re-search-forward "[^\000-\177]+" nil t)
 	;; Found a sequence of non-ASCII characters.
 	(setq last-pos (match-beginning 0)
-	      last-encoding-info (aref encoding-table (char-after last-pos)))
+	      ch (char-after last-pos)
+	      last-encoding-info (catch 'tag
+				   (dolist (elt encoding-table)
+				     (if (encode-char ch (car elt))
+					 (throw 'tag (cdr elt))))
+				   'utf-8))
 	(set-marker end-pos (match-end 0))
 	(goto-char (1+ last-pos))
 	(catch 'tag
 	  (while t
 	    (setq encoding-info
 		  (if (< (point) end-pos)
-		      (aref encoding-table (following-char))))
+		      (catch 'tag
+			(setq ch (following-char))
+			(dolist (elt encoding-table)
+			  (if (encode-char ch (car elt))
+			      (throw 'tag (cdr elt))))
+			'utf-8)))
 	    (unless (eq last-encoding-info encoding-info)
 	      (cond ((consp last-encoding-info)
 		     ;; Encode the previous range using an extended
@@ -1508,12 +1525,10 @@
 				    (- (point) last-pos)))
 		       (save-excursion
 			 (goto-char last-pos)
-			 (insert (string-to-multibyte
-				  (format "\e%%/%d%c%c%s\002"
-					  noctets
-					  (+ (/ len 128) 128)
-					  (+ (% len 128) 128)
-					  encoding-name))))))
+			 (insert (format "\e%%/%d" noctets))
+			 (insert-byte (+ (/ len 128) 128) 1)
+			 (insert-byte (+ (% len 128) 128) 1)
+			 (insert encoding-name))))
 		    ((eq last-encoding-info 'utf-8)
 		     ;; Encode the previous range using UTF-8 encoding
 		     ;; extention.