comparison lisp/international/utf-8.el @ 57737:e425df7605c9

(ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more Unicode ranges.
author Kenichi Handa <handa@m17n.org>
date Wed, 27 Oct 2004 12:50:09 +0000
parents c3945be39e09
children 13239a8e9e80
comparison
equal deleted inserted replaced
57736:dd9868eeca9e 57737:e425df7605c9
256 (mapconcat #'(lambda (x) 256 (mapconcat #'(lambda (x)
257 (format "%c-%c" 257 (format "%c-%c"
258 (funcall decode-char-no-trans (car x)) 258 (funcall decode-char-no-trans (car x))
259 (funcall decode-char-no-trans (cdr x)))) 259 (funcall decode-char-no-trans (cdr x))))
260 ranges ""))) 260 ranges "")))
261 ;; This forces loading tables for utf-translate-cjk-mode. 261 ;; These forces loading and settting tables for
262 (setq utf-translate-cjk-lang-env nil)) 262 ;; utf-translate-cjk-mode.
263 (setq utf-translate-cjk-lang-env nil
264 ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
265 ucs-unicode-to-mule-cjk (make-hash-table :test 'eq)))
263 266
264 (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3) 267 (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3)
265 (#xff00 . #xffef)) 268 (#xff00 . #xffef))
266 "List of Unicode code ranges supported by `utf-translate-cjk-mode'. 269 "List of Unicode code ranges supported by `utf-translate-cjk-mode'.
267 Setting this variable directly does not take effect; 270 Setting this variable directly does not take effect;
490 493
491 (if (r0 < #xe0) 494 (if (r0 < #xe0)
492 ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx 495 ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
493 ((r1 &= #x3F) 496 ((r1 &= #x3F)
494 (r1 |= ((r0 & #x1F) << 6)) 497 (r1 |= ((r0 & #x1F) << 6))
495 ;; Now r2 holds scalar value. We don't have to check 498 ;; Now r1 holds scalar value. We don't have to check
496 ;; `overlong sequence' because r0 >= 0xC2. 499 ;; `overlong sequence' because r0 >= 0xC2.
497 500
498 (if (r1 >= 256) 501 (if (r1 >= 256)
499 ;; mule-unicode-0100-24ff (< 0800) 502 ;; mule-unicode-0100-24ff (< 0800)
500 ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) 503 ((r0 = r1)
501 (r1 -= #x0100) 504 (lookup-integer utf-subst-table-for-decode r0 r1)
502 (r2 = (((r1 / 96) + 32) << 7)) 505 (if (r7 == 0)
503 (r1 %= 96) 506 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
504 (r1 += (r2 + 32)) 507 (r1 -= #x0100)
505 (translate-character 508 (r2 = (((r1 / 96) + 32) << 7))
506 utf-translation-table-for-decode r0 r1) 509 (r1 %= 96)
510 (r1 += (r2 + 32))
511 (translate-character
512 utf-translation-table-for-decode r0 r1)))
507 (write-multibyte-character r0 r1) 513 (write-multibyte-character r0 r1)
508 (read r0) 514 (read r0)
509 (repeat)) 515 (repeat))
510 (if (r1 >= 160) 516 (if (r1 >= 160)
511 ;; latin-iso8859-1 517 ;; latin-iso8859-1
512 ((r1 -= 128) 518 ((r0 = r1)
513 (write-multibyte-character r6 r1) 519 (lookup-integer utf-subst-table-for-decode r0 r1)
520 (if (r7 == 0)
521 ((r1 -= 128)
522 (write-multibyte-character r6 r1))
523 ((write-multibyte-character r0 r1)))
514 (read r0) 524 (read r0)
515 (repeat)) 525 (repeat))
516 ;; eight-bit-control 526 ;; eight-bit-control
517 ((r0 = ,(charset-id 'eight-bit-control)) 527 ((r0 = ,(charset-id 'eight-bit-control))
518 (write-multibyte-character r0 r1) 528 (write-multibyte-character r0 r1)
547 (read r0) 557 (read r0)
548 (repeat))) 558 (repeat)))
549 559
550 (if (r3 < #x2500) 560 (if (r3 < #x2500)
551 ;; mule-unicode-0100-24ff (>= 0800) 561 ;; mule-unicode-0100-24ff (>= 0800)
552 ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) 562 ((r0 = r3)
553 (r3 -= #x0100) 563 (lookup-integer utf-subst-table-for-decode r0 r1)
554 (r3 //= 96) 564 (if (r7 == 0)
555 (r1 = (r7 + 32)) 565 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
556 (r1 += ((r3 + 32) << 7)) 566 (r3 -= #x0100)
557 (translate-character 567 (r3 //= 96)
558 utf-translation-table-for-decode r0 r1) 568 (r1 = (r7 + 32))
569 (r1 += ((r3 + 32) << 7))
570 (translate-character
571 utf-translation-table-for-decode r0 r1)))
559 (write-multibyte-character r0 r1) 572 (write-multibyte-character r0 r1)
560 (read r0) 573 (read r0)
561 (repeat))) 574 (repeat)))
562 575
563 (if (r3 < #x3400) 576 (if (r3 < #x3400)
947 (skip-chars-forward 960 (skip-chars-forward
948 (concat range utf-translate-cjk-unicode-range-string)) 961 (concat range utf-translate-cjk-unicode-range-string))
949 (unless (eobp) 962 (unless (eobp)
950 (utf-translate-cjk-load-tables) 963 (utf-translate-cjk-load-tables)
951 (setq range 964 (setq range
952 (concat range utf-translate-cjk-unicode-range-string)))) 965 (concat range utf-translate-cjk-unicode-range-string)))
953 (setq hash-table (get 'utf-subst-table-for-decode 966 (setq hash-table (get 'utf-subst-table-for-decode
954 'translation-hash-table))) 967 'translation-hash-table))))
955 (while (and (skip-chars-forward range) 968 (while (and (skip-chars-forward range)
956 (not (eobp))) 969 (not (eobp)))
957 (setq ch (following-char)) 970 (setq ch (following-char))
958 (if (< ch 256) 971 (if (< ch 256)
959 (utf-8-compose hash-table) 972 (utf-8-compose hash-table)