# HG changeset patch # User Kenichi Handa # Date 1098881409 0 # Node ID e425df7605c9e64b171bbc3af090c63700287449 # Parent dd9868eeca9e064d5ae6f5412a0672a0a38154c2 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more Unicode ranges. diff -r dd9868eeca9e -r e425df7605c9 lisp/international/utf-8.el --- a/lisp/international/utf-8.el Wed Oct 27 11:02:06 2004 +0000 +++ b/lisp/international/utf-8.el Wed Oct 27 12:50:09 2004 +0000 @@ -258,8 +258,11 @@ (funcall decode-char-no-trans (car x)) (funcall decode-char-no-trans (cdr x)))) ranges ""))) - ;; This forces loading tables for utf-translate-cjk-mode. - (setq utf-translate-cjk-lang-env nil)) + ;; These forces loading and settting tables for + ;; utf-translate-cjk-mode. + (setq utf-translate-cjk-lang-env nil + ucs-mule-cjk-to-unicode (make-hash-table :test 'eq) + ucs-unicode-to-mule-cjk (make-hash-table :test 'eq))) (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3) (#xff00 . #xffef)) @@ -492,25 +495,32 @@ ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx ((r1 &= #x3F) (r1 |= ((r0 & #x1F) << 6)) - ;; Now r2 holds scalar value. We don't have to check + ;; Now r1 holds scalar value. We don't have to check ;; `overlong sequence' because r0 >= 0xC2. (if (r1 >= 256) ;; mule-unicode-0100-24ff (< 0800) - ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) - (r1 -= #x0100) - (r2 = (((r1 / 96) + 32) << 7)) - (r1 %= 96) - (r1 += (r2 + 32)) - (translate-character - utf-translation-table-for-decode r0 r1) + ((r0 = r1) + (lookup-integer utf-subst-table-for-decode r0 r1) + (if (r7 == 0) + ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) + (r1 -= #x0100) + (r2 = (((r1 / 96) + 32) << 7)) + (r1 %= 96) + (r1 += (r2 + 32)) + (translate-character + utf-translation-table-for-decode r0 r1))) (write-multibyte-character r0 r1) (read r0) (repeat)) (if (r1 >= 160) ;; latin-iso8859-1 - ((r1 -= 128) - (write-multibyte-character r6 r1) + ((r0 = r1) + (lookup-integer utf-subst-table-for-decode r0 r1) + (if (r7 == 0) + ((r1 -= 128) + (write-multibyte-character r6 r1)) + ((write-multibyte-character r0 r1))) (read r0) (repeat)) ;; eight-bit-control @@ -549,13 +559,16 @@ (if (r3 < #x2500) ;; mule-unicode-0100-24ff (>= 0800) - ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) - (r3 -= #x0100) - (r3 //= 96) - (r1 = (r7 + 32)) - (r1 += ((r3 + 32) << 7)) - (translate-character - utf-translation-table-for-decode r0 r1) + ((r0 = r3) + (lookup-integer utf-subst-table-for-decode r0 r1) + (if (r7 == 0) + ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) + (r3 -= #x0100) + (r3 //= 96) + (r1 = (r7 + 32)) + (r1 += ((r3 + 32) << 7)) + (translate-character + utf-translation-table-for-decode r0 r1))) (write-multibyte-character r0 r1) (read r0) (repeat))) @@ -949,9 +962,9 @@ (unless (eobp) (utf-translate-cjk-load-tables) (setq range - (concat range utf-translate-cjk-unicode-range-string)))) - (setq hash-table (get 'utf-subst-table-for-decode - 'translation-hash-table))) + (concat range utf-translate-cjk-unicode-range-string))) + (setq hash-table (get 'utf-subst-table-for-decode + 'translation-hash-table)))) (while (and (skip-chars-forward range) (not (eobp))) (setq ch (following-char))