Mercurial > emacs
comparison lisp/international/utf-8.el @ 57737:e425df7605c9
(ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Unicode ranges.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 27 Oct 2004 12:50:09 +0000 |
parents | c3945be39e09 |
children | 13239a8e9e80 |
comparison
equal
deleted
inserted
replaced
57736:dd9868eeca9e | 57737:e425df7605c9 |
---|---|
256 (mapconcat #'(lambda (x) | 256 (mapconcat #'(lambda (x) |
257 (format "%c-%c" | 257 (format "%c-%c" |
258 (funcall decode-char-no-trans (car x)) | 258 (funcall decode-char-no-trans (car x)) |
259 (funcall decode-char-no-trans (cdr x)))) | 259 (funcall decode-char-no-trans (cdr x)))) |
260 ranges ""))) | 260 ranges ""))) |
261 ;; This forces loading tables for utf-translate-cjk-mode. | 261 ;; These forces loading and settting tables for |
262 (setq utf-translate-cjk-lang-env nil)) | 262 ;; utf-translate-cjk-mode. |
263 (setq utf-translate-cjk-lang-env nil | |
264 ucs-mule-cjk-to-unicode (make-hash-table :test 'eq) | |
265 ucs-unicode-to-mule-cjk (make-hash-table :test 'eq))) | |
263 | 266 |
264 (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3) | 267 (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3) |
265 (#xff00 . #xffef)) | 268 (#xff00 . #xffef)) |
266 "List of Unicode code ranges supported by `utf-translate-cjk-mode'. | 269 "List of Unicode code ranges supported by `utf-translate-cjk-mode'. |
267 Setting this variable directly does not take effect; | 270 Setting this variable directly does not take effect; |
490 | 493 |
491 (if (r0 < #xe0) | 494 (if (r0 < #xe0) |
492 ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx | 495 ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx |
493 ((r1 &= #x3F) | 496 ((r1 &= #x3F) |
494 (r1 |= ((r0 & #x1F) << 6)) | 497 (r1 |= ((r0 & #x1F) << 6)) |
495 ;; Now r2 holds scalar value. We don't have to check | 498 ;; Now r1 holds scalar value. We don't have to check |
496 ;; `overlong sequence' because r0 >= 0xC2. | 499 ;; `overlong sequence' because r0 >= 0xC2. |
497 | 500 |
498 (if (r1 >= 256) | 501 (if (r1 >= 256) |
499 ;; mule-unicode-0100-24ff (< 0800) | 502 ;; mule-unicode-0100-24ff (< 0800) |
500 ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) | 503 ((r0 = r1) |
501 (r1 -= #x0100) | 504 (lookup-integer utf-subst-table-for-decode r0 r1) |
502 (r2 = (((r1 / 96) + 32) << 7)) | 505 (if (r7 == 0) |
503 (r1 %= 96) | 506 ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) |
504 (r1 += (r2 + 32)) | 507 (r1 -= #x0100) |
505 (translate-character | 508 (r2 = (((r1 / 96) + 32) << 7)) |
506 utf-translation-table-for-decode r0 r1) | 509 (r1 %= 96) |
510 (r1 += (r2 + 32)) | |
511 (translate-character | |
512 utf-translation-table-for-decode r0 r1))) | |
507 (write-multibyte-character r0 r1) | 513 (write-multibyte-character r0 r1) |
508 (read r0) | 514 (read r0) |
509 (repeat)) | 515 (repeat)) |
510 (if (r1 >= 160) | 516 (if (r1 >= 160) |
511 ;; latin-iso8859-1 | 517 ;; latin-iso8859-1 |
512 ((r1 -= 128) | 518 ((r0 = r1) |
513 (write-multibyte-character r6 r1) | 519 (lookup-integer utf-subst-table-for-decode r0 r1) |
520 (if (r7 == 0) | |
521 ((r1 -= 128) | |
522 (write-multibyte-character r6 r1)) | |
523 ((write-multibyte-character r0 r1))) | |
514 (read r0) | 524 (read r0) |
515 (repeat)) | 525 (repeat)) |
516 ;; eight-bit-control | 526 ;; eight-bit-control |
517 ((r0 = ,(charset-id 'eight-bit-control)) | 527 ((r0 = ,(charset-id 'eight-bit-control)) |
518 (write-multibyte-character r0 r1) | 528 (write-multibyte-character r0 r1) |
547 (read r0) | 557 (read r0) |
548 (repeat))) | 558 (repeat))) |
549 | 559 |
550 (if (r3 < #x2500) | 560 (if (r3 < #x2500) |
551 ;; mule-unicode-0100-24ff (>= 0800) | 561 ;; mule-unicode-0100-24ff (>= 0800) |
552 ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) | 562 ((r0 = r3) |
553 (r3 -= #x0100) | 563 (lookup-integer utf-subst-table-for-decode r0 r1) |
554 (r3 //= 96) | 564 (if (r7 == 0) |
555 (r1 = (r7 + 32)) | 565 ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) |
556 (r1 += ((r3 + 32) << 7)) | 566 (r3 -= #x0100) |
557 (translate-character | 567 (r3 //= 96) |
558 utf-translation-table-for-decode r0 r1) | 568 (r1 = (r7 + 32)) |
569 (r1 += ((r3 + 32) << 7)) | |
570 (translate-character | |
571 utf-translation-table-for-decode r0 r1))) | |
559 (write-multibyte-character r0 r1) | 572 (write-multibyte-character r0 r1) |
560 (read r0) | 573 (read r0) |
561 (repeat))) | 574 (repeat))) |
562 | 575 |
563 (if (r3 < #x3400) | 576 (if (r3 < #x3400) |
947 (skip-chars-forward | 960 (skip-chars-forward |
948 (concat range utf-translate-cjk-unicode-range-string)) | 961 (concat range utf-translate-cjk-unicode-range-string)) |
949 (unless (eobp) | 962 (unless (eobp) |
950 (utf-translate-cjk-load-tables) | 963 (utf-translate-cjk-load-tables) |
951 (setq range | 964 (setq range |
952 (concat range utf-translate-cjk-unicode-range-string)))) | 965 (concat range utf-translate-cjk-unicode-range-string))) |
953 (setq hash-table (get 'utf-subst-table-for-decode | 966 (setq hash-table (get 'utf-subst-table-for-decode |
954 'translation-hash-table))) | 967 'translation-hash-table)))) |
955 (while (and (skip-chars-forward range) | 968 (while (and (skip-chars-forward range) |
956 (not (eobp))) | 969 (not (eobp))) |
957 (setq ch (following-char)) | 970 (setq ch (following-char)) |
958 (if (< ch 256) | 971 (if (< ch 256) |
959 (utf-8-compose hash-table) | 972 (utf-8-compose hash-table) |