Mercurial > emacs
comparison lisp/international/characters.el @ 88971:651b64e6dce3
Call map-charset-chars on big5
(not chinese-big5-1/2) to set categories `c', `C', and `|'.
(next-word-boundary-han): New function. Register it in
next-word-boundary-function-table.
(next-word-boundary-kana): Likewise.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Thu, 01 Aug 2002 12:36:17 +0000 |
parents | 70b2e9f6d8a8 |
children | fc96b539a43c |
comparison
equal
deleted
inserted
replaced
88970:a65b3bd9379c | 88971:651b64e6dce3 |
---|---|
155 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E) | 155 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E) |
156 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E) | 156 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E) |
157 | 157 |
158 ;; Chinese character set (BIG5) | 158 ;; Chinese character set (BIG5) |
159 | 159 |
160 (map-charset-chars #'modify-category-entry 'chinese-big5-1 ?c) | 160 (map-charset-chars #'modify-category-entry 'big5 ?c) |
161 (map-charset-chars #'modify-category-entry 'chinese-big5-2 ?c) | 161 (map-charset-chars #'modify-category-entry 'big5 ?C) |
162 (map-charset-chars #'modify-category-entry 'chinese-big5-1 ?C) | 162 (map-charset-chars #'modify-category-entry 'big5 ?|) |
163 (map-charset-chars #'modify-category-entry 'chinese-big5-2 ?C) | 163 |
164 (map-charset-chars #'modify-category-entry 'chinese-big5-1 ?|) | |
165 (map-charset-chars #'modify-category-entry 'chinese-big5-2 ?|) | |
166 | 164 |
167 ;; Chinese character set (CNS11643) | 165 ;; Chinese character set (CNS11643) |
168 | 166 |
169 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 | 167 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 |
170 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 | 168 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 |
772 (modify-category-entry '(#x20d0 . #x20e3) ?^) | 770 (modify-category-entry '(#x20d0 . #x20e3) ?^) |
773 | 771 |
774 ;; Fixme: syntax for symbols &c | 772 ;; Fixme: syntax for symbols &c |
775 ) | 773 ) |
776 | 774 |
777 ;;; Setting word boundary. | |
778 | |
779 (setq word-combining-categories | |
780 '((?l . ?l))) | |
781 | |
782 (setq word-separating-categories ; (2-byte character sets) | |
783 '((?A . ?K) ; Alpha numeric - Katakana | |
784 (?A . ?C) ; Alpha numeric - Chinese | |
785 (?H . ?A) ; Hiragana - Alpha numeric | |
786 (?H . ?K) ; Hiragana - Katakana | |
787 (?H . ?C) ; Hiragana - Chinese | |
788 (?K . ?A) ; Katakana - Alpha numeric | |
789 (?K . ?C) ; Katakana - Chinese | |
790 (?C . ?A) ; Chinese - Alpha numeric | |
791 (?C . ?K) ; Chinese - Katakana | |
792 )) | |
793 | |
794 | |
795 ;; For each character set, put the information of the most proper | 775 ;; For each character set, put the information of the most proper |
796 ;; coding system to encode it by `preferred-coding-system' property. | 776 ;; coding system to encode it by `preferred-coding-system' property. |
797 | 777 |
798 ;; Fixme: should this be junked? | 778 ;; Fixme: should this be junked? |
799 (let ((l '((latin-iso8859-1 . iso-latin-1) | 779 (let ((l '((latin-iso8859-1 . iso-latin-1) |
942 (#x16A0 #x16FF runic) | 922 (#x16A0 #x16FF runic) |
943 (#x1780 #x17FF khmer) | 923 (#x1780 #x17FF khmer) |
944 (#x1800 #x18AF mongolian) | 924 (#x1800 #x18AF mongolian) |
945 (#x1E00 #x1EFF latin) | 925 (#x1E00 #x1EFF latin) |
946 (#x1F00 #x1FFF greek) | 926 (#x1F00 #x1FFF greek) |
947 (#x20000 #x2AFFF han) | |
948 (#x20A0 #x20AF currency) | 927 (#x20A0 #x20AF currency) |
949 (#x2800 #x28FF braille) | 928 (#x2800 #x28FF braille) |
950 (#x2E80 #x2FDF han) | 929 (#x2E80 #x2FDF han) |
951 (#x2FF0 #x2FFF ideographic-description) | 930 (#x2FF0 #x2FFF ideographic-description) |
952 (#x3000 #x303F cjk-misc) | 931 (#x3000 #x303F cjk-misc) |
963 (#xFB50 #xFDFF arabic) | 942 (#xFB50 #xFDFF arabic) |
964 (#xFE70 #xFEFC arabic) | 943 (#xFE70 #xFEFC arabic) |
965 (#xFF00 #xFF5F cjk-misc) | 944 (#xFF00 #xFF5F cjk-misc) |
966 (#xFF61 #xFF9F kana) | 945 (#xFF61 #xFF9F kana) |
967 (#xFFE0 #xFFE6 cjk-misc) | 946 (#xFFE0 #xFFE6 cjk-misc) |
947 (#x20000 #x2AFFF han) | |
968 (#x2F800 #x2FFFF han))) | 948 (#x2F800 #x2FFFF han))) |
969 (set-char-table-range char-script-table | 949 (set-char-table-range char-script-table |
970 (cons (car elt) (nth 1 elt)) (nth 2 elt)) | 950 (cons (car elt) (nth 1 elt)) (nth 2 elt)) |
971 (or (memq (nth 2 elt) script-list) | 951 (or (memq (nth 2 elt) script-list) |
972 (setq script-list (cons (nth 2 elt) script-list)))) | 952 (setq script-list (cons (nth 2 elt) script-list)))) |
973 (set-char-table-extra-slot char-script-table 0 (nreverse script-list))) | 953 (set-char-table-extra-slot char-script-table 0 (nreverse script-list))) |
974 | 954 |
955 | |
956 ;;; Setting word boundary. | |
957 | |
958 (defun next-word-boundary-han (pos limit) | |
959 (if (<= pos limit) | |
960 (save-excursion | |
961 (goto-char pos) | |
962 (looking-at "\\cC+") | |
963 (goto-char (match-end 0)) | |
964 (if (looking-at "\\cK+\\|\\cH+") | |
965 (goto-char (match-end 0))) | |
966 (point)) | |
967 (while (and (> pos limit) | |
968 (eq (aref char-script-table (char-after (1- pos))) 'han)) | |
969 (setq pos (1- pos))) | |
970 pos)) | |
971 | |
972 (defun next-word-boundary-kana (pos limit) | |
973 (if (<= pos limit) | |
974 (save-excursion | |
975 (goto-char pos) | |
976 (if (looking-at "\\cK+") | |
977 (goto-char (match-end 0))) | |
978 (if (looking-at "\\cH+") | |
979 (goto-char (match-end 0))) | |
980 (point)) | |
981 (let ((category-set (char-category-set (char-after pos))) | |
982 category) | |
983 (if (aref category-set ?K) | |
984 (while (and (> pos limit) | |
985 (aref (char-category-set (char-after (1- pos))) ?K)) | |
986 (setq pos (1- pos))) | |
987 (while (and (> pos limit) | |
988 (aref (setq category-set | |
989 (char-category-set (char-after (1- pos)))) ?H)) | |
990 (setq pos (1- pos))) | |
991 (setq category (cond ((aref category-set ?C) ?C) | |
992 ((aref category-set ?K) ?K) | |
993 ((aref category-set ?A) ?A))) | |
994 (when category | |
995 (setq pos (1- pos)) | |
996 (while (and (> pos limit) | |
997 (aref (char-category-set (char-after (1- pos))) | |
998 category)) | |
999 (setq pos (1- pos))))) | |
1000 pos))) | |
1001 | |
1002 (map-char-table | |
1003 #'(lambda (char script) | |
1004 (cond ((eq script 'han) | |
1005 (set-char-table-range next-word-boundary-function-table | |
1006 char #'next-word-boundary-han)) | |
1007 ((eq script 'kana) | |
1008 (set-char-table-range next-word-boundary-function-table | |
1009 char #'next-word-boundary-kana)))) | |
1010 char-script-table) | |
1011 | |
1012 (setq word-combining-categories | |
1013 '((?l . ?l))) | |
1014 | |
1015 (setq word-separating-categories ; (2-byte character sets) | |
1016 '((?A . ?K) ; Alpha numeric - Katakana | |
1017 (?A . ?C) ; Alpha numeric - Chinese | |
1018 (?H . ?A) ; Hiragana - Alpha numeric | |
1019 (?H . ?K) ; Hiragana - Katakana | |
1020 (?H . ?C) ; Hiragana - Chinese | |
1021 (?K . ?A) ; Katakana - Alpha numeric | |
1022 (?K . ?C) ; Katakana - Chinese | |
1023 (?C . ?A) ; Chinese - Alpha numeric | |
1024 (?C . ?K) ; Chinese - Katakana | |
1025 )) | |
1026 | |
975 ;;; Local Variables: | 1027 ;;; Local Variables: |
976 ;;; coding: utf-8-emacs | 1028 ;;; coding: utf-8-emacs |
977 ;;; End: | 1029 ;;; End: |
978 | 1030 |
979 ;;; characters.el ends here | 1031 ;;; characters.el ends here |