comparison lisp/international/characters.el @ 88971:651b64e6dce3

Call map-charset-chars on big5 (not chinese-big5-1/2) to set categories `c', `C', and `|'. (next-word-boundary-han): New function. Register it in next-word-boundary-function-table. (next-word-boundary-kana): Likewise.
author Kenichi Handa <handa@m17n.org>
date Thu, 01 Aug 2002 12:36:17 +0000
parents 70b2e9f6d8a8
children fc96b539a43c
comparison
equal deleted inserted replaced
88970:a65b3bd9379c 88971:651b64e6dce3
155 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E) 155 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E)
156 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E) 156 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E)
157 157
158 ;; Chinese character set (BIG5) 158 ;; Chinese character set (BIG5)
159 159
160 (map-charset-chars #'modify-category-entry 'chinese-big5-1 ?c) 160 (map-charset-chars #'modify-category-entry 'big5 ?c)
161 (map-charset-chars #'modify-category-entry 'chinese-big5-2 ?c) 161 (map-charset-chars #'modify-category-entry 'big5 ?C)
162 (map-charset-chars #'modify-category-entry 'chinese-big5-1 ?C) 162 (map-charset-chars #'modify-category-entry 'big5 ?|)
163 (map-charset-chars #'modify-category-entry 'chinese-big5-2 ?C) 163
164 (map-charset-chars #'modify-category-entry 'chinese-big5-1 ?|)
165 (map-charset-chars #'modify-category-entry 'chinese-big5-2 ?|)
166 164
167 ;; Chinese character set (CNS11643) 165 ;; Chinese character set (CNS11643)
168 166
169 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 167 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
170 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 168 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
772 (modify-category-entry '(#x20d0 . #x20e3) ?^) 770 (modify-category-entry '(#x20d0 . #x20e3) ?^)
773 771
774 ;; Fixme: syntax for symbols &c 772 ;; Fixme: syntax for symbols &c
775 ) 773 )
776 774
777 ;;; Setting word boundary.
778
779 (setq word-combining-categories
780 '((?l . ?l)))
781
782 (setq word-separating-categories ; (2-byte character sets)
783 '((?A . ?K) ; Alpha numeric - Katakana
784 (?A . ?C) ; Alpha numeric - Chinese
785 (?H . ?A) ; Hiragana - Alpha numeric
786 (?H . ?K) ; Hiragana - Katakana
787 (?H . ?C) ; Hiragana - Chinese
788 (?K . ?A) ; Katakana - Alpha numeric
789 (?K . ?C) ; Katakana - Chinese
790 (?C . ?A) ; Chinese - Alpha numeric
791 (?C . ?K) ; Chinese - Katakana
792 ))
793
794
795 ;; For each character set, put the information of the most proper 775 ;; For each character set, put the information of the most proper
796 ;; coding system to encode it by `preferred-coding-system' property. 776 ;; coding system to encode it by `preferred-coding-system' property.
797 777
798 ;; Fixme: should this be junked? 778 ;; Fixme: should this be junked?
799 (let ((l '((latin-iso8859-1 . iso-latin-1) 779 (let ((l '((latin-iso8859-1 . iso-latin-1)
942 (#x16A0 #x16FF runic) 922 (#x16A0 #x16FF runic)
943 (#x1780 #x17FF khmer) 923 (#x1780 #x17FF khmer)
944 (#x1800 #x18AF mongolian) 924 (#x1800 #x18AF mongolian)
945 (#x1E00 #x1EFF latin) 925 (#x1E00 #x1EFF latin)
946 (#x1F00 #x1FFF greek) 926 (#x1F00 #x1FFF greek)
947 (#x20000 #x2AFFF han)
948 (#x20A0 #x20AF currency) 927 (#x20A0 #x20AF currency)
949 (#x2800 #x28FF braille) 928 (#x2800 #x28FF braille)
950 (#x2E80 #x2FDF han) 929 (#x2E80 #x2FDF han)
951 (#x2FF0 #x2FFF ideographic-description) 930 (#x2FF0 #x2FFF ideographic-description)
952 (#x3000 #x303F cjk-misc) 931 (#x3000 #x303F cjk-misc)
963 (#xFB50 #xFDFF arabic) 942 (#xFB50 #xFDFF arabic)
964 (#xFE70 #xFEFC arabic) 943 (#xFE70 #xFEFC arabic)
965 (#xFF00 #xFF5F cjk-misc) 944 (#xFF00 #xFF5F cjk-misc)
966 (#xFF61 #xFF9F kana) 945 (#xFF61 #xFF9F kana)
967 (#xFFE0 #xFFE6 cjk-misc) 946 (#xFFE0 #xFFE6 cjk-misc)
947 (#x20000 #x2AFFF han)
968 (#x2F800 #x2FFFF han))) 948 (#x2F800 #x2FFFF han)))
969 (set-char-table-range char-script-table 949 (set-char-table-range char-script-table
970 (cons (car elt) (nth 1 elt)) (nth 2 elt)) 950 (cons (car elt) (nth 1 elt)) (nth 2 elt))
971 (or (memq (nth 2 elt) script-list) 951 (or (memq (nth 2 elt) script-list)
972 (setq script-list (cons (nth 2 elt) script-list)))) 952 (setq script-list (cons (nth 2 elt) script-list))))
973 (set-char-table-extra-slot char-script-table 0 (nreverse script-list))) 953 (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
974 954
955
956 ;;; Setting word boundary.
957
958 (defun next-word-boundary-han (pos limit)
959 (if (<= pos limit)
960 (save-excursion
961 (goto-char pos)
962 (looking-at "\\cC+")
963 (goto-char (match-end 0))
964 (if (looking-at "\\cK+\\|\\cH+")
965 (goto-char (match-end 0)))
966 (point))
967 (while (and (> pos limit)
968 (eq (aref char-script-table (char-after (1- pos))) 'han))
969 (setq pos (1- pos)))
970 pos))
971
972 (defun next-word-boundary-kana (pos limit)
973 (if (<= pos limit)
974 (save-excursion
975 (goto-char pos)
976 (if (looking-at "\\cK+")
977 (goto-char (match-end 0)))
978 (if (looking-at "\\cH+")
979 (goto-char (match-end 0)))
980 (point))
981 (let ((category-set (char-category-set (char-after pos)))
982 category)
983 (if (aref category-set ?K)
984 (while (and (> pos limit)
985 (aref (char-category-set (char-after (1- pos))) ?K))
986 (setq pos (1- pos)))
987 (while (and (> pos limit)
988 (aref (setq category-set
989 (char-category-set (char-after (1- pos)))) ?H))
990 (setq pos (1- pos)))
991 (setq category (cond ((aref category-set ?C) ?C)
992 ((aref category-set ?K) ?K)
993 ((aref category-set ?A) ?A)))
994 (when category
995 (setq pos (1- pos))
996 (while (and (> pos limit)
997 (aref (char-category-set (char-after (1- pos)))
998 category))
999 (setq pos (1- pos)))))
1000 pos)))
1001
1002 (map-char-table
1003 #'(lambda (char script)
1004 (cond ((eq script 'han)
1005 (set-char-table-range next-word-boundary-function-table
1006 char #'next-word-boundary-han))
1007 ((eq script 'kana)
1008 (set-char-table-range next-word-boundary-function-table
1009 char #'next-word-boundary-kana))))
1010 char-script-table)
1011
1012 (setq word-combining-categories
1013 '((?l . ?l)))
1014
1015 (setq word-separating-categories ; (2-byte character sets)
1016 '((?A . ?K) ; Alpha numeric - Katakana
1017 (?A . ?C) ; Alpha numeric - Chinese
1018 (?H . ?A) ; Hiragana - Alpha numeric
1019 (?H . ?K) ; Hiragana - Katakana
1020 (?H . ?C) ; Hiragana - Chinese
1021 (?K . ?A) ; Katakana - Alpha numeric
1022 (?K . ?C) ; Katakana - Chinese
1023 (?C . ?A) ; Chinese - Alpha numeric
1024 (?C . ?K) ; Chinese - Katakana
1025 ))
1026
975 ;;; Local Variables: 1027 ;;; Local Variables:
976 ;;; coding: utf-8-emacs 1028 ;;; coding: utf-8-emacs
977 ;;; End: 1029 ;;; End:
978 1030
979 ;;; characters.el ends here 1031 ;;; characters.el ends here