Mercurial > emacs
comparison lisp/international/characters.el @ 89483:2f877ed80fa6
*** empty log message ***
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Mon, 08 Sep 2003 12:53:41 +0000 |
parents | 375f2633d815 f040012c16bb |
children | 1ad3832f1d1d |
comparison
equal
deleted
inserted
replaced
88123:375f2633d815 | 89483:2f877ed80fa6 |
---|---|
1 ;;; characters.el --- set syntax and category for multibyte characters | 1 ;;; characters.el --- set syntax and category for multibyte characters |
2 | 2 |
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. | 3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. |
4 ;; Licensed to the Free Software Foundation. | 4 ;; Licensed to the Free Software Foundation. |
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. | 5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. |
6 ;; Copyright (C) 2003 | |
7 ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
8 ;; Registration Number H13PRO009 | |
6 | 9 |
7 ;; Keywords: multibyte character, character set, syntax, category | 10 ;; Keywords: multibyte character, character set, syntax, category |
8 | 11 |
9 ;; This file is part of GNU Emacs. | 12 ;; This file is part of GNU Emacs. |
10 | 13 |
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the | 25 ;; along with GNU Emacs; see the file COPYING. If not, write to the |
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
24 ;; Boston, MA 02111-1307, USA. | 27 ;; Boston, MA 02111-1307, USA. |
25 | 28 |
26 ;;; Commentary: | 29 ;;; Commentary: |
27 | |
28 ;; This file contains multibyte characters. Save this file always in | |
29 ;; the coding system `iso-2022-7bit'. | |
30 | |
31 ;; This file does not define the syntax for Latin-N character sets; | |
32 ;; those are defined by the files latin-N.el. | |
33 | 30 |
34 ;;; Code: | 31 ;;; Code: |
35 | 32 |
36 ;;; Predefined categories. | 33 ;;; Predefined categories. |
37 | 34 |
96 | 93 |
97 ;;; Setting syntax and category. | 94 ;;; Setting syntax and category. |
98 | 95 |
99 ;; ASCII | 96 ;; ASCII |
100 | 97 |
101 (let ((ch 32)) | 98 ;; All ASCII characters have the category `a' (ASCII) and `l' (Latin). |
102 (while (< ch 127) ; All ASCII characters have | 99 (modify-category-entry '(32 . 127) ?a) |
103 (modify-category-entry ch ?a) ; the category `a' (ASCII) | 100 (modify-category-entry '(32 . 127) ?l) |
104 (modify-category-entry ch ?l) ; and `l' (Latin). | 101 |
105 (setq ch (1+ ch)))) | 102 ;; Deal with the CJK charsets first. Since the syntax of blocks is |
106 | 103 ;; defined per charset, and the charsets may contain e.g. Latin |
107 ;; Arabic character set | 104 ;; characters, we end up with the wrong syntax definitions if we're |
108 | 105 ;; not careful. |
109 (let ((charsets '(arabic-iso8859-6 | 106 |
110 arabic-digit | 107 ;; Chinese characters (Unicode) |
111 arabic-1-column | 108 (modify-category-entry '(#x3400 . #x9FAF) ?C) |
112 arabic-2-column))) | 109 (modify-category-entry '(#x3400 . #x9FAF) ?c) |
113 (while charsets | 110 (modify-category-entry '(#x3400 . #x9FAF) ?|) |
114 ;; (modify-syntax-entry (make-char (car charsets)) "w") | 111 (modify-category-entry '(#xF900 . #xFAFF) ?C) |
115 (modify-category-entry (make-char (car charsets)) ?b) | 112 (modify-category-entry '(#xF900 . #xFAFF) ?c) |
116 (setq charsets (cdr charsets)))) | 113 (modify-category-entry '(#xF900 . #xFAFF) ?|) |
117 (let ((ch #x600)) | |
118 (while (<= ch #x6ff) | |
119 (modify-category-entry (decode-char 'ucs ch) ?b) | |
120 (setq ch (1+ ch))) | |
121 (setq ch #xfb50) | |
122 (while (<= ch #xfdff) | |
123 (modify-category-entry (decode-char 'ucs ch) ?b) | |
124 (setq ch (1+ ch))) | |
125 (setq ch #xfe70) | |
126 (while (<= ch #xfefe) | |
127 (modify-category-entry (decode-char 'ucs ch) ?b) | |
128 (setq ch (1+ ch)))) | |
129 | 114 |
130 ;; Chinese character set (GB2312) | 115 ;; Chinese character set (GB2312) |
131 | 116 |
132 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w") | 117 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E) |
133 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_") | 118 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E) |
134 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_") | 119 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E) |
135 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_") | 120 |
136 (modify-syntax-entry ?\$A!2(B "($A!3(B") | 121 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c) |
137 (modify-syntax-entry ?\$A!4(B "($A!5(B") | 122 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?|) |
138 (modify-syntax-entry ?\$A!6(B "($A!7(B") | 123 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339) |
139 (modify-syntax-entry ?\$A!8(B "($A!9(B") | 124 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A) |
140 (modify-syntax-entry ?\$A!:(B "($A!;(B") | 125 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A) |
141 (modify-syntax-entry ?\$A!<(B "($A!=(B") | 126 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E) |
142 (modify-syntax-entry ?\$A!>(B "($A!?(B") | 127 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E) |
143 (modify-syntax-entry ?\$A#((B "($A#)(B") | 128 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E) |
144 (modify-syntax-entry ?\$A#{(B "($A#}(B") | 129 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E) |
145 (modify-syntax-entry ?\$A#[(B "($A#](B") | 130 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E) |
146 (modify-syntax-entry ?\$A!3(B ")$A!2(B") | |
147 (modify-syntax-entry ?\$A!5(B ")$A!4(B") | |
148 (modify-syntax-entry ?\$A!7(B ")$A!6(B") | |
149 (modify-syntax-entry ?\$A!9(B ")$A!8(B") | |
150 (modify-syntax-entry ?\$A!;(B ")$A!:(B") | |
151 (modify-syntax-entry ?\$A!=(B ")$A!<(B") | |
152 (modify-syntax-entry ?\$A!?(B ")$A!>(B") | |
153 (modify-syntax-entry ?\$A#)(B ")$A#((B") | |
154 (modify-syntax-entry ?\$A#}(B ")$A#{(B") | |
155 (modify-syntax-entry ?\$A#](B ")$A#[(B") | |
156 ;; Unicode equivalents of above | |
157 (modify-syntax-entry ?\$,2=T(B "($,2=U(B") | |
158 (modify-syntax-entry ?\$,2=H(B "($,2=I(B") | |
159 (modify-syntax-entry ?\$,2=J(B "($,2=K(B") | |
160 (modify-syntax-entry ?\$,2=L(B "($,2=M(B") | |
161 (modify-syntax-entry ?\$,2=N(B "($,2=O(B") | |
162 (modify-syntax-entry ?\$,2=V(B "($,2=W(B") | |
163 (modify-syntax-entry ?\$,2=P(B "($,2=Q(B") | |
164 (modify-syntax-entry ?\$,2=U(B ")$,2=T(B") | |
165 (modify-syntax-entry ?\$,2=I(B ")$,2=H(B") | |
166 (modify-syntax-entry ?\$,2=K(B ")$,2=J(B") | |
167 (modify-syntax-entry ?\$,2=M(B ")$,2=L(B") | |
168 (modify-syntax-entry ?\$,2=O(B ")$,2=N(B") | |
169 (modify-syntax-entry ?\$,2=W(B ")$,2=V(B") | |
170 (modify-syntax-entry ?\$,2=Q(B ")$,2=P(B") | |
171 | |
172 (let ((chars "$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d(B")) | |
173 (dotimes (i (length chars)) | |
174 (modify-syntax-entry (aref chars i) "."))) | |
175 | |
176 (modify-category-entry (make-char 'chinese-gb2312) ?c) | |
177 (modify-category-entry (make-char 'chinese-gb2312) ?\|) | |
178 (modify-category-entry (make-char 'chinese-gb2312 35) ?A) | |
179 (modify-category-entry (make-char 'chinese-gb2312 36) ?H) | |
180 (modify-category-entry (make-char 'chinese-gb2312 37) ?K) | |
181 (modify-category-entry (make-char 'chinese-gb2312 38) ?G) | |
182 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y) | |
183 (let ((row 48)) | |
184 (while (< row 127) | |
185 (modify-category-entry (make-char 'chinese-gb2312 row) ?C) | |
186 (setq row (1+ row)))) | |
187 | 131 |
188 ;; Chinese character set (BIG5) | 132 ;; Chinese character set (BIG5) |
189 | 133 |
190 | 134 (map-charset-chars #'modify-category-entry 'big5 ?c) |
191 | 135 (map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA25F) |
192 (let ((from (decode-big5-char #xA141)) | 136 (map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E) |
193 (to (decode-big5-char #xA15D))) | 137 (map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DF) |
194 (while (< from to) | 138 (map-charset-chars #'modify-category-entry 'big5 ?|) |
195 (modify-syntax-entry from ".") | 139 |
196 (setq from (1+ from)))) | 140 |
197 (let ((from (decode-big5-char #xA1A5)) | 141 ;; Chinese character set (CNS11643) |
198 (to (decode-big5-char #xA1AD))) | 142 |
199 (while (< from to) | 143 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 |
200 (modify-syntax-entry from ".") | 144 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 |
201 (setq from (1+ from)))) | 145 chinese-cns11643-7)) |
202 (let ((from (decode-big5-char #xA1AD)) | 146 (map-charset-chars #'modify-category-entry c ?c) |
203 (to (decode-big5-char #xA2AF))) | 147 (if (eq c 'chinese-cns11643-1) |
204 (while (< from to) | 148 (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E) |
205 (modify-syntax-entry from "_") | 149 (map-charset-chars #'modify-category-entry c ?C)) |
206 (setq from (1+ from)))) | 150 (map-charset-chars #'modify-category-entry c ?|)) |
207 | 151 |
208 (let ((parens "$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c(B") | 152 ;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213) |
153 | |
154 (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k) | |
155 | |
156 (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r) | |
157 | |
158 (dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212 | |
159 japanese-jisx0213-1 japanese-jisx0213-2)) | |
160 (map-charset-chars #'modify-category-entry l ?j) | |
161 (if (eq l 'japanese-jisx0213-1) | |
162 (map-charset-chars #'modify-category-entry l ?\| #x2E21 #x7E7F) | |
163 (map-charset-chars #'modify-category-entry l ?\|))) | |
164 | |
165 ;; Unicode equivalents of JISX0201-kana | |
166 (let ((range '(#xff61 . #xff9f))) | |
167 (modify-category-entry range ?k) | |
168 (modify-category-entry range ?j) | |
169 (modify-category-entry range ?\|)) | |
170 | |
171 ;; Katakana block | |
172 (let ((range '(#x30a0 . #x30ff))) | |
173 ;; ?K is double width, ?k isn't specified | |
174 (modify-category-entry range ?K) | |
175 (modify-category-entry range ?\|)) | |
176 | |
177 ;; Hiragana block | |
178 (let ((range '(#x3040 . #x309d))) | |
179 ;; ?H is actually defined to be double width | |
180 ;;(modify-category-entry range ?H) | |
181 ;;(modify-category-entry range ?\|) | |
182 ) | |
183 | |
184 ;; JISX0208 | |
185 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E) | |
186 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E) | |
187 (let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇))) | |
188 (dolist (elt chars) | |
189 (modify-syntax-entry (car chars) "w"))) | |
190 | |
191 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E) | |
192 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E) | |
193 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E) | |
194 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E) | |
195 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E) | |
196 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E) | |
197 (modify-category-entry ?ー ?K) | |
198 (let ((chars '(?゛ ?゜))) | |
199 (while chars | |
200 (modify-category-entry (car chars) ?K) | |
201 (modify-category-entry (car chars) ?H) | |
202 (setq chars (cdr chars)))) | |
203 (let ((chars '(?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇))) | |
204 (while chars | |
205 (modify-category-entry (car chars) ?C) | |
206 (setq chars (cdr chars)))) | |
207 | |
208 ;; JISX0212 | |
209 | |
210 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E) | |
211 | |
212 ;; JISX0201-Kana | |
213 | |
214 (let ((chars '(?。 ?、 ?・))) | |
215 (while chars | |
216 (modify-syntax-entry (car chars) ".") | |
217 (setq chars (cdr chars)))) | |
218 | |
219 (modify-syntax-entry ?\「 "(」") | |
220 (modify-syntax-entry ?\」 "(「") | |
221 | |
222 ;; Korean character set (KSC5601) | |
223 | |
224 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h) | |
225 | |
226 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E) | |
227 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E) | |
228 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E) | |
229 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E) | |
230 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339) | |
231 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A) | |
232 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A) | |
233 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E) | |
234 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E) | |
235 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E) | |
236 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E) | |
237 | |
238 ;; These are in more than one charset. | |
239 (let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛" | |
240 "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄" | |
241 "()[]{}")) | |
209 open close) | 242 open close) |
210 (dotimes (i (/ (length parens) 2)) | 243 (dotimes (i (/ (length parens) 2)) |
211 (setq open (aref parens (* i 2)) | 244 (setq open (aref parens (* i 2)) |
212 close (aref parens (1+ (* i 2)))) | 245 close (aref parens (1+ (* i 2)))) |
213 (modify-syntax-entry open (format "(%c" close)) | 246 (modify-syntax-entry open (format "(%c" close)) |
214 (modify-syntax-entry close (format ")%c" open)))) | 247 (modify-syntax-entry close (format ")%c" open)))) |
215 | 248 |
216 (let ((generic-big5-1-char (make-char 'chinese-big5-1)) | 249 ;; Arabic character set |
217 (generic-big5-2-char (make-char 'chinese-big5-2))) | 250 |
218 ;; (modify-syntax-entry generic-big5-1-char "w") | 251 (let ((charsets '(arabic-iso8859-6 |
219 ;; (modify-syntax-entry generic-big5-2-char "w") | 252 arabic-digit |
220 | 253 arabic-1-column |
221 (modify-category-entry generic-big5-1-char ?c) | 254 arabic-2-column))) |
222 (modify-category-entry generic-big5-2-char ?c) | 255 (while charsets |
223 | 256 (map-charset-chars #'modify-category-entry (car charsets) ?b) |
224 (modify-category-entry generic-big5-1-char ?C) | 257 (setq charsets (cdr charsets)))) |
225 (modify-category-entry generic-big5-2-char ?C) | 258 (modify-category-entry '(#x600 . #x6ff) ?b) |
226 | 259 (modify-category-entry '(#xfb50 . #xfdff) ?b) |
227 (modify-category-entry generic-big5-1-char ?\|) | 260 (modify-category-entry '(#xfe70 . #xfefe) ?b) |
228 (modify-category-entry generic-big5-2-char ?\|)) | |
229 | |
230 | |
231 ;; Chinese character set (CNS11643) | |
232 | |
233 (let ((cns-list '(chinese-cns11643-1 | |
234 chinese-cns11643-2 | |
235 chinese-cns11643-3 | |
236 chinese-cns11643-4 | |
237 chinese-cns11643-5 | |
238 chinese-cns11643-6 | |
239 chinese-cns11643-7)) | |
240 generic-char) | |
241 (while cns-list | |
242 (setq generic-char (make-char (car cns-list))) | |
243 ;; (modify-syntax-entry generic-char "w") | |
244 (modify-category-entry generic-char ?c) | |
245 (modify-category-entry generic-char ?C) | |
246 (modify-category-entry generic-char ?|) | |
247 (setq cns-list (cdr cns-list)))) | |
248 | 261 |
249 ;; Cyrillic character set (ISO-8859-5) | 262 ;; Cyrillic character set (ISO-8859-5) |
250 | 263 |
251 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y) | 264 (modify-syntax-entry ?№ ".") |
252 | |
253 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ") | |
254 (modify-syntax-entry ?,L-(B ".") | |
255 (modify-syntax-entry ?,Lp(B ".") | |
256 (modify-syntax-entry ?,L}(B ".") | |
257 (let ((tbl (standard-case-table))) | |
258 (set-case-syntax-pair ?,L!(B ?,Lq(B tbl) | |
259 (set-case-syntax-pair ?,L"(B ?,Lr(B tbl) | |
260 (set-case-syntax-pair ?,L#(B ?,Ls(B tbl) | |
261 (set-case-syntax-pair ?,L$(B ?,Lt(B tbl) | |
262 (set-case-syntax-pair ?,L%(B ?,Lu(B tbl) | |
263 (set-case-syntax-pair ?,L&(B ?,Lv(B tbl) | |
264 (set-case-syntax-pair ?,L'(B ?,Lw(B tbl) | |
265 (set-case-syntax-pair ?,L((B ?,Lx(B tbl) | |
266 (set-case-syntax-pair ?,L)(B ?,Ly(B tbl) | |
267 (set-case-syntax-pair ?,L*(B ?,Lz(B tbl) | |
268 (set-case-syntax-pair ?,L+(B ?,L{(B tbl) | |
269 (set-case-syntax-pair ?,L,(B ?,L|(B tbl) | |
270 (set-case-syntax-pair ?,L.(B ?,L~(B tbl) | |
271 (set-case-syntax-pair ?,L/(B ?,L(B tbl) | |
272 (set-case-syntax-pair ?,L0(B ?,LP(B tbl) | |
273 (set-case-syntax-pair ?,L1(B ?,LQ(B tbl) | |
274 (set-case-syntax-pair ?,L2(B ?,LR(B tbl) | |
275 (set-case-syntax-pair ?,L3(B ?,LS(B tbl) | |
276 (set-case-syntax-pair ?,L4(B ?,LT(B tbl) | |
277 (set-case-syntax-pair ?,L5(B ?,LU(B tbl) | |
278 (set-case-syntax-pair ?,L6(B ?,LV(B tbl) | |
279 (set-case-syntax-pair ?,L7(B ?,LW(B tbl) | |
280 (set-case-syntax-pair ?,L8(B ?,LX(B tbl) | |
281 (set-case-syntax-pair ?,L9(B ?,LY(B tbl) | |
282 (set-case-syntax-pair ?,L:(B ?,LZ(B tbl) | |
283 (set-case-syntax-pair ?,L;(B ?,L[(B tbl) | |
284 (set-case-syntax-pair ?,L<(B ?,L\(B tbl) | |
285 (set-case-syntax-pair ?,L=(B ?,L](B tbl) | |
286 (set-case-syntax-pair ?,L>(B ?,L^(B tbl) | |
287 (set-case-syntax-pair ?,L?(B ?,L_(B tbl) | |
288 (set-case-syntax-pair ?,L@(B ?,L`(B tbl) | |
289 (set-case-syntax-pair ?,LA(B ?,La(B tbl) | |
290 (set-case-syntax-pair ?,LB(B ?,Lb(B tbl) | |
291 (set-case-syntax-pair ?,LC(B ?,Lc(B tbl) | |
292 (set-case-syntax-pair ?,LD(B ?,Ld(B tbl) | |
293 (set-case-syntax-pair ?,LE(B ?,Le(B tbl) | |
294 (set-case-syntax-pair ?,LF(B ?,Lf(B tbl) | |
295 (set-case-syntax-pair ?,LG(B ?,Lg(B tbl) | |
296 (set-case-syntax-pair ?,LH(B ?,Lh(B tbl) | |
297 (set-case-syntax-pair ?,LI(B ?,Li(B tbl) | |
298 (set-case-syntax-pair ?,LJ(B ?,Lj(B tbl) | |
299 (set-case-syntax-pair ?,LK(B ?,Lk(B tbl) | |
300 (set-case-syntax-pair ?,LL(B ?,Ll(B tbl) | |
301 (set-case-syntax-pair ?,LM(B ?,Lm(B tbl) | |
302 (set-case-syntax-pair ?,LN(B ?,Ln(B tbl) | |
303 (set-case-syntax-pair ?,LO(B ?,Lo(B tbl) | |
304 (set-case-syntax-pair ?$,1(!(B ?$,1(q(B tbl) | |
305 (set-case-syntax-pair ?$,1("(B ?$,1(r(B tbl) | |
306 (set-case-syntax-pair ?$,1(#(B ?$,1(s(B tbl) | |
307 (set-case-syntax-pair ?$,1($(B ?$,1(t(B tbl) | |
308 (set-case-syntax-pair ?$,1(%(B ?$,1(u(B tbl) | |
309 (set-case-syntax-pair ?$,1(&(B ?$,1(v(B tbl) | |
310 (set-case-syntax-pair ?$,1('(B ?$,1(w(B tbl) | |
311 (set-case-syntax-pair ?$,1(((B ?$,1(x(B tbl) | |
312 (set-case-syntax-pair ?$,1()(B ?$,1(y(B tbl) | |
313 (set-case-syntax-pair ?$,1(*(B ?$,1(z(B tbl) | |
314 (set-case-syntax-pair ?$,1(+(B ?$,1({(B tbl) | |
315 (set-case-syntax-pair ?$,1(,(B ?$,1(|(B tbl) | |
316 (set-case-syntax-pair ?$,1(.(B ?$,1(~(B tbl) | |
317 (set-case-syntax-pair ?$,1(/(B ?$,1((B tbl) | |
318 (set-case-syntax-pair ?$,1(0(B ?$,1(P(B tbl) | |
319 (set-case-syntax-pair ?$,1(1(B ?$,1(Q(B tbl) | |
320 (set-case-syntax-pair ?$,1(2(B ?$,1(R(B tbl) | |
321 (set-case-syntax-pair ?$,1(3(B ?$,1(S(B tbl) | |
322 (set-case-syntax-pair ?$,1(4(B ?$,1(T(B tbl) | |
323 (set-case-syntax-pair ?$,1(5(B ?$,1(U(B tbl) | |
324 (set-case-syntax-pair ?$,1(6(B ?$,1(V(B tbl) | |
325 (set-case-syntax-pair ?$,1(7(B ?$,1(W(B tbl) | |
326 (set-case-syntax-pair ?$,1(8(B ?$,1(X(B tbl) | |
327 (set-case-syntax-pair ?$,1(9(B ?$,1(Y(B tbl) | |
328 (set-case-syntax-pair ?$,1(:(B ?$,1(Z(B tbl) | |
329 (set-case-syntax-pair ?$,1(;(B ?$,1([(B tbl) | |
330 (set-case-syntax-pair ?$,1(<(B ?$,1(\(B tbl) | |
331 (set-case-syntax-pair ?$,1(=(B ?$,1(](B tbl) | |
332 (set-case-syntax-pair ?$,1(>(B ?$,1(^(B tbl) | |
333 (set-case-syntax-pair ?$,1(?(B ?$,1(_(B tbl) | |
334 (set-case-syntax-pair ?$,1(@(B ?$,1(`(B tbl) | |
335 (set-case-syntax-pair ?$,1(A(B ?$,1(a(B tbl) | |
336 (set-case-syntax-pair ?$,1(B(B ?$,1(b(B tbl) | |
337 (set-case-syntax-pair ?$,1(C(B ?$,1(c(B tbl) | |
338 (set-case-syntax-pair ?$,1(D(B ?$,1(d(B tbl) | |
339 (set-case-syntax-pair ?$,1(E(B ?$,1(e(B tbl) | |
340 (set-case-syntax-pair ?$,1(F(B ?$,1(f(B tbl) | |
341 (set-case-syntax-pair ?$,1(G(B ?$,1(g(B tbl) | |
342 (set-case-syntax-pair ?$,1(H(B ?$,1(h(B tbl) | |
343 (set-case-syntax-pair ?$,1(I(B ?$,1(i(B tbl) | |
344 (set-case-syntax-pair ?$,1(J(B ?$,1(j(B tbl) | |
345 (set-case-syntax-pair ?$,1(K(B ?$,1(k(B tbl) | |
346 (set-case-syntax-pair ?$,1(L(B ?$,1(l(B tbl) | |
347 (set-case-syntax-pair ?$,1(M(B ?$,1(m(B tbl) | |
348 (set-case-syntax-pair ?$,1(N(B ?$,1(n(B tbl) | |
349 (set-case-syntax-pair ?$,1(O(B ?$,1(o(B tbl)) | |
350 | |
351 ;; Devanagari character set | |
352 | |
353 ;;; Commented out since the categories appear not to be used anywhere | |
354 ;;; and word syntax is the default. | |
355 ;; (let ((deflist '(;; chars syntax category | |
356 ;; ("$(5!!!"!#(B" "w" ?7) ; vowel-modifying diacritical mark | |
357 ;; ; chandrabindu, anuswar, visarga | |
358 ;; ("$(5!$(B-$(5!2(B" "w" ?1) ; independent vowel | |
359 ;; ("$(5!3(B-$(5!X(B" "w" ?0) ; consonant | |
360 ;; ("$(5!Z(B-$(5!g(B" "w" ?8) ; matra | |
361 ;; ("$(5!q(B-$(5!z(B" "w" ?6) ; digit | |
362 ;; ;; Unicode equivalents | |
363 ;; ("$,15A5B5C(B" "w" ?7) ; vowel-modifying diacritical mark | |
364 ;; ; chandrabindu, anuswar, visarga | |
365 ;; ("$,15E(B-$,15M(B" "w" ?1) ; independent vowel | |
366 ;; ("$,15U(B-$,15y(B" "w" ?0) ; consonant | |
367 ;; ("$,15~(B-$,16)(B" "w" ?8) ; matra | |
368 ;; ("$,16F(B-$,16O(B" "w" ?6) ; digit | |
369 ;; )) | |
370 ;; elm chars len syntax category to ch i) | |
371 ;; (while deflist | |
372 ;; (setq elm (car deflist)) | |
373 ;; (setq chars (car elm) | |
374 ;; len (length chars) | |
375 ;; syntax (nth 1 elm) | |
376 ;; category (nth 2 elm) | |
377 ;; i 0) | |
378 ;; (while (< i len) | |
379 ;; (if (= (aref chars i) ?-) | |
380 ;; (setq i (1+ i) | |
381 ;; to (aref chars i)) | |
382 ;; (setq ch (aref chars i) | |
383 ;; to ch)) | |
384 ;; (while (<= ch to) | |
385 ;; (modify-syntax-entry ch syntax) | |
386 ;; (modify-category-entry ch category) | |
387 ;; (setq ch (1+ ch))) | |
388 ;; (setq i (1+ i))) | |
389 ;; (setq deflist (cdr deflist)))) | |
390 | 265 |
391 ;; Ethiopic character set | 266 ;; Ethiopic character set |
392 | 267 |
393 (modify-category-entry (make-char 'ethiopic) ?e) | 268 (modify-category-entry '(#x1200 . #x137b) ?e) |
394 ;; (modify-syntax-entry (make-char 'ethiopic) "w") | 269 (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ? ? ? ? ? ?))) |
395 (dotimes (i (1+ (- #x137c #x1200))) | |
396 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e)) | |
397 (let ((chars '(?$(3$h(B ?$(3$i(B ?$(3$j(B ?$(3$k(B ?$(3$l(B ?$(3$m(B ?$(3$n(B ?$(3$o(B ?$(3%i(B ?$(3%t(B ?$(3%u(B ?$(3%v(B ?$(3%w(B ?$(3%x(B | |
398 ;; Unicode equivalents of the above: | |
399 ?$,1Q!(B ?$,1Q"(B ?$,1Q#(B ?$,1Q$(B ?$,1Q%(B ?$,1Q&(B ?$,1Q'(B ?$,1Q((B ?$,3op(B ?$,3o{(B ?$,3o|(B ?$,3o}(B ?$,3o~(B ?$,3o(B))) | |
400 (while chars | 270 (while chars |
401 (modify-syntax-entry (car chars) ".") | 271 (modify-syntax-entry (car chars) ".") |
402 (setq chars (cdr chars)))) | 272 (setq chars (cdr chars)))) |
403 | 273 (map-charset-chars #'modify-category-entry 'ethiopic ?e) |
404 ;; Greek character set (ISO-8859-7) | |
405 | |
406 (modify-category-entry (make-char 'greek-iso8859-7) ?g) | |
407 (let ((c #x370)) | |
408 (while (<= c #x3ff) | |
409 (modify-category-entry (decode-char 'ucs c) ?g) | |
410 (setq c (1+ c)))) | |
411 | |
412 ;; (let ((c 182)) | |
413 ;; (while (< c 255) | |
414 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w") | |
415 ;; (setq c (1+ c)))) | |
416 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP | |
417 (modify-syntax-entry ?,F7(B ".") | |
418 (modify-syntax-entry ?,F;(B ".") | |
419 (modify-syntax-entry ?,F=(B ".") | |
420 (let ((tbl (standard-case-table))) | |
421 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious | |
422 ;; in several cases. | |
423 (set-case-syntax ?,F!(B "." tbl) | |
424 (set-case-syntax ?,F"(B "." tbl) | |
425 (set-case-syntax ?,F&(B "." tbl) | |
426 (set-case-syntax ?,F&(B "_" tbl) | |
427 (set-case-syntax ?,F'(B "." tbl) | |
428 (set-case-syntax ?,F)(B "_" tbl) | |
429 (set-case-syntax ?,F+(B "." tbl) | |
430 (set-case-syntax ?,F,(B "_" tbl) | |
431 (set-case-syntax ?,F-(B "_" tbl) | |
432 (set-case-syntax ?,F/(B "." tbl) | |
433 (set-case-syntax ?,F0(B "_" tbl) | |
434 (set-case-syntax ?,F1(B "_" tbl) | |
435 ;; (set-case-syntax ?,F7(B "_" tbl) | |
436 ;; (set-case-syntax ?,F=(B "_" tbl) | |
437 (set-case-syntax-pair ?,FA(B ?,Fa(B tbl) | |
438 (set-case-syntax-pair ?,FB(B ?,Fb(B tbl) | |
439 (set-case-syntax-pair ?,FC(B ?,Fc(B tbl) | |
440 (set-case-syntax-pair ?,FD(B ?,Fd(B tbl) | |
441 (set-case-syntax-pair ?,FE(B ?,Fe(B tbl) | |
442 (set-case-syntax-pair ?,FF(B ?,Ff(B tbl) | |
443 (set-case-syntax-pair ?,FG(B ?,Fg(B tbl) | |
444 (set-case-syntax-pair ?,FH(B ?,Fh(B tbl) | |
445 (set-case-syntax-pair ?,FI(B ?,Fi(B tbl) | |
446 (set-case-syntax-pair ?,FJ(B ?,Fj(B tbl) | |
447 (set-case-syntax-pair ?,FK(B ?,Fk(B tbl) | |
448 (set-case-syntax-pair ?,FL(B ?,Fl(B tbl) | |
449 (set-case-syntax-pair ?,FM(B ?,Fm(B tbl) | |
450 (set-case-syntax-pair ?,FN(B ?,Fn(B tbl) | |
451 (set-case-syntax-pair ?,FO(B ?,Fo(B tbl) | |
452 (set-case-syntax-pair ?,FP(B ?,Fp(B tbl) | |
453 (set-case-syntax-pair ?,FQ(B ?,Fq(B tbl) | |
454 (set-case-syntax-pair ?,FS(B ?,Fs(B tbl) | |
455 (set-case-syntax-pair ?,FT(B ?,Ft(B tbl) | |
456 (set-case-syntax-pair ?,FU(B ?,Fu(B tbl) | |
457 (set-case-syntax-pair ?,FV(B ?,Fv(B tbl) | |
458 (set-case-syntax-pair ?,FW(B ?,Fw(B tbl) | |
459 (set-case-syntax-pair ?,FX(B ?,Fx(B tbl) | |
460 (set-case-syntax-pair ?,FY(B ?,Fy(B tbl) | |
461 (set-case-syntax-pair ?,FZ(B ?,Fz(B tbl) | |
462 (set-case-syntax-pair ?,F[(B ?,F{(B tbl) | |
463 (set-case-syntax-pair ?,F?(B ?,F~(B tbl) | |
464 (set-case-syntax-pair ?,F>(B ?,F}(B tbl) | |
465 (set-case-syntax-pair ?,F<(B ?,F|(B tbl) | |
466 (set-case-syntax-pair ?,F6(B ?,F\(B tbl) | |
467 (set-case-syntax-pair ?,F8(B ?,F](B tbl) | |
468 (set-case-syntax-pair ?,F9(B ?,F^(B tbl) | |
469 (set-case-syntax-pair ?,F:(B ?,F_(B tbl) | |
470 ;; Unicode equivalents | |
471 (set-case-syntax-pair ?$,1&q(B ?$,1'1(B tbl) | |
472 (set-case-syntax-pair ?$,1&r(B ?$,1'2(B tbl) | |
473 (set-case-syntax-pair ?$,1&s(B ?$,1'3(B tbl) | |
474 (set-case-syntax-pair ?$,1&t(B ?$,1'4(B tbl) | |
475 (set-case-syntax-pair ?$,1&u(B ?$,1'5(B tbl) | |
476 (set-case-syntax-pair ?$,1&v(B ?$,1'6(B tbl) | |
477 (set-case-syntax-pair ?$,1&w(B ?$,1'7(B tbl) | |
478 (set-case-syntax-pair ?$,1&x(B ?$,1'8(B tbl) | |
479 (set-case-syntax-pair ?$,1&y(B ?$,1'9(B tbl) | |
480 (set-case-syntax-pair ?$,1&z(B ?$,1':(B tbl) | |
481 (set-case-syntax-pair ?$,1&{(B ?$,1';(B tbl) | |
482 (set-case-syntax-pair ?$,1&|(B ?$,1'<(B tbl) | |
483 (set-case-syntax-pair ?$,1&}(B ?$,1'=(B tbl) | |
484 (set-case-syntax-pair ?$,1&~(B ?$,1'>(B tbl) | |
485 (set-case-syntax-pair ?$,1&(B ?$,1'?(B tbl) | |
486 (set-case-syntax-pair ?$,1' (B ?$,1'@(B tbl) | |
487 (set-case-syntax-pair ?$,1'!(B ?$,1'A(B tbl) | |
488 (set-case-syntax-pair ?$,1'#(B ?$,1'C(B tbl) | |
489 (set-case-syntax-pair ?$,1'$(B ?$,1'D(B tbl) | |
490 (set-case-syntax-pair ?$,1'%(B ?$,1'E(B tbl) | |
491 (set-case-syntax-pair ?$,1'&(B ?$,1'F(B tbl) | |
492 (set-case-syntax-pair ?$,1''(B ?$,1'G(B tbl) | |
493 (set-case-syntax-pair ?$,1'((B ?$,1'H(B tbl) | |
494 (set-case-syntax-pair ?$,1')(B ?$,1'I(B tbl) | |
495 (set-case-syntax-pair ?$,1'*(B ?$,1'J(B tbl) | |
496 (set-case-syntax-pair ?$,1'+(B ?$,1'K(B tbl) | |
497 (set-case-syntax-pair ?$,1&o(B ?$,1'N(B tbl) | |
498 (set-case-syntax-pair ?$,1&n(B ?$,1'M(B tbl) | |
499 (set-case-syntax-pair ?$,1&l(B ?$,1'L(B tbl) | |
500 (set-case-syntax-pair ?$,1&f(B ?$,1',(B tbl) | |
501 (set-case-syntax-pair ?$,1&h(B ?$,1'-(B tbl) | |
502 (set-case-syntax-pair ?$,1&i(B ?$,1'.(B tbl) | |
503 (set-case-syntax-pair ?$,1&j(B ?$,1'/(B tbl)) | |
504 | 274 |
505 ;; Hebrew character set (ISO-8859-8) | 275 ;; Hebrew character set (ISO-8859-8) |
506 | 276 |
507 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w) | 277 (modify-syntax-entry #x5be ".") ; MAQAF |
508 (let ((c #x591)) | 278 (modify-syntax-entry #x5c0 ".") ; PASEQ |
509 (while (<= c #x5f4) | 279 (modify-syntax-entry #x5c3 ".") ; SOF PASUQ |
510 (modify-category-entry (decode-char 'ucs c) ?w) | 280 (modify-syntax-entry #x5f3 ".") ; GERESH |
511 (setq c (1+ c)))) | 281 (modify-syntax-entry #x5f4 ".") ; GERSHAYIM |
512 | |
513 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ | |
514 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ | |
515 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF | |
516 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ | |
517 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ | |
518 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH | |
519 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM | |
520 | |
521 ;; (let ((c 224)) | |
522 ;; (while (< c 251) | |
523 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w") | |
524 ;; (setq c (1+ c)))) | |
525 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP | |
526 | 282 |
527 ;; Indian character set (IS 13194 and other Emacs original Indian charsets) | 283 ;; Indian character set (IS 13194 and other Emacs original Indian charsets) |
528 | 284 |
529 (modify-category-entry (make-char 'indian-is13194) ?i) | 285 (modify-category-entry '(#x901 . #x970) ?i) |
530 (modify-category-entry (make-char 'indian-2-column) ?I) | 286 (map-charset-chars #'modify-category-entry 'indian-is13194 ?i) |
531 (modify-category-entry (make-char 'indian-glyph) ?I) | 287 (map-charset-chars #'modify-category-entry 'indian-2-column ?i) |
532 ;; Unicode Devanagari block | |
533 (let ((c #x901)) | |
534 (while (<= c #x970) | |
535 (modify-category-entry (decode-char 'ucs c) ?i) | |
536 (setq c (1+ c)))) | |
537 | |
538 (let ((l '(;; RANGE CATEGORY MEANINGS | |
539 (#x01 #x03 ?7) ; vowel modifier | |
540 (#x05 #x14 ?1) ; base vowel | |
541 (#x15 #x39 ?0) ; consonants | |
542 (#x3e #x4d ?8) ; vowel modifier | |
543 (#x51 #x54 ?4) ; stress/tone mark | |
544 (#x58 #x5f ?0) ; consonants | |
545 (#x60 #x61 ?1) ; base vowel | |
546 (#x62 #x63 ?8) ; vowel modifier | |
547 (#x66 #x6f ?6) ; digits | |
548 ))) | |
549 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00)) | |
550 (dolist (elt2 l) | |
551 (let* ((from (car elt2)) | |
552 (counts (1+ (- (nth 1 elt2) from))) | |
553 (category (nth 2 elt2))) | |
554 (dotimes (i counts) | |
555 (modify-category-entry (decode-char 'ucs (+ elt1 from i)) | |
556 category)))))) | |
557 | |
558 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212) | |
559 | |
560 (modify-category-entry (make-char 'katakana-jisx0201) ?k) | |
561 (modify-category-entry (make-char 'katakana-jisx0201) ?j) | |
562 (modify-category-entry (make-char 'latin-jisx0201) ?r) | |
563 (modify-category-entry (make-char 'japanese-jisx0208) ?j) | |
564 (modify-category-entry (make-char 'japanese-jisx0212) ?j) | |
565 (modify-category-entry (make-char 'katakana-jisx0201) ?\|) | |
566 (modify-category-entry (make-char 'japanese-jisx0208) ?\|) | |
567 (modify-category-entry (make-char 'japanese-jisx0212) ?\|) | |
568 | |
569 ;; Unicode equivalents of JISX0201-kana | |
570 (let ((c #xff61)) | |
571 (while (<= c #xff9f) | |
572 (modify-category-entry (decode-char 'ucs c) ?k) | |
573 (modify-category-entry (decode-char 'ucs c) ?j) | |
574 (modify-category-entry (decode-char 'ucs c) ?\|) | |
575 (setq c (1+ c)))) | |
576 | |
577 ;; Katakana block | |
578 (let ((c #x30a0)) | |
579 (while (<= c #x30ff) | |
580 ;; ?K is double width, ?k isn't specified | |
581 (modify-category-entry (decode-char 'ucs c) ?k) | |
582 (modify-category-entry (decode-char 'ucs c) ?j) | |
583 (modify-category-entry (decode-char 'ucs c) ?\|) | |
584 (setq c (1+ c)))) | |
585 | |
586 ;; Hiragana block | |
587 (let ((c #x3040)) | |
588 (while (<= c #x309f) | |
589 ;; ?H is actually defined to be double width | |
590 (modify-category-entry (decode-char 'ucs c) ?H) | |
591 ;;(modify-category-entry (decode-char 'ucs c) ?j) | |
592 (modify-category-entry (decode-char 'ucs c) ?\|) | |
593 (setq c (1+ c)))) | |
594 | |
595 ;; JISX0208 | |
596 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w") | |
597 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_") | |
598 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_") | |
599 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_") | |
600 (let ((chars '(?$B!<(B ?$B!+(B ?$B!,(B ?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B))) | |
601 (while chars | |
602 (modify-syntax-entry (car chars) "w") | |
603 (setq chars (cdr chars)))) | |
604 (modify-syntax-entry ?\$B!J(B "($B!K(B") | |
605 (modify-syntax-entry ?\$B!N(B "($B!O(B") | |
606 (modify-syntax-entry ?\$B!P(B "($B!Q(B") | |
607 (modify-syntax-entry ?\$B!V(B "($B!W(B") | |
608 (modify-syntax-entry ?\$B!X(B "($B!Y(B") | |
609 (modify-syntax-entry ?\$B!K(B ")$B!J(B") | |
610 (modify-syntax-entry ?\$B!O(B ")$B!N(B") | |
611 (modify-syntax-entry ?\$B!Q(B ")$B!P(B") | |
612 (modify-syntax-entry ?\$B!W(B ")$B!V(B") | |
613 (modify-syntax-entry ?\$B!Y(B ")$B!X(B") | |
614 | |
615 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A) | |
616 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H) | |
617 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K) | |
618 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G) | |
619 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y) | |
620 (let ((row 48)) | |
621 (while (< row 127) | |
622 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C) | |
623 (setq row (1+ row)))) | |
624 (modify-category-entry ?$B!<(B ?K) | |
625 (let ((chars '(?$B!+(B ?$B!,(B))) | |
626 (while chars | |
627 (modify-category-entry (car chars) ?K) | |
628 (modify-category-entry (car chars) ?H) | |
629 (setq chars (cdr chars)))) | |
630 (let ((chars '(?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B))) | |
631 (while chars | |
632 (modify-category-entry (car chars) ?C) | |
633 (setq chars (cdr chars)))) | |
634 | |
635 ;; JISX0212 | |
636 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w") | |
637 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_") | |
638 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_") | |
639 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_") | |
640 | |
641 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C) | |
642 | |
643 ;; JISX0201-Kana | |
644 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w") | |
645 (let ((chars '(?(I!(B ?(I$(B ?(I%(B | |
646 ;; Unicode: | |
647 ?$,3sa(B ?$,3sd(B ?$,3se(B))) | |
648 (while chars | |
649 (modify-syntax-entry (car chars) ".") | |
650 (setq chars (cdr chars)))) | |
651 | |
652 (modify-syntax-entry ?\(I"(B "((I#(B") | |
653 (modify-syntax-entry ?\(I#(B "((I"(B") | |
654 | |
655 ;; Korean character set (KSC5601) | |
656 | |
657 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w") | |
658 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_") | |
659 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_") | |
660 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_") | |
661 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_") | |
662 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_") | |
663 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_") | |
664 | |
665 (modify-category-entry (make-char 'korean-ksc5601) ?h) | |
666 (modify-category-entry (make-char 'korean-ksc5601 35) ?A) | |
667 (modify-category-entry (make-char 'korean-ksc5601 37) ?G) | |
668 (modify-category-entry (make-char 'korean-ksc5601 42) ?H) | |
669 (modify-category-entry (make-char 'korean-ksc5601 43) ?K) | |
670 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y) | |
671 | |
672 ;; Latin character set (latin-1,2,3,4,5,8,9) | |
673 | |
674 (modify-category-entry (make-char 'latin-iso8859-1) ?l) | |
675 (modify-category-entry (make-char 'latin-iso8859-2) ?l) | |
676 (modify-category-entry (make-char 'latin-iso8859-3) ?l) | |
677 (modify-category-entry (make-char 'latin-iso8859-4) ?l) | |
678 (modify-category-entry (make-char 'latin-iso8859-9) ?l) | |
679 (modify-category-entry (make-char 'latin-iso8859-14) ?l) | |
680 (modify-category-entry (make-char 'latin-iso8859-15) ?l) | |
681 | |
682 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ ) | |
683 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ ) | |
684 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ ) | |
685 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ ) | |
686 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ ) | |
687 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ ) | |
688 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ ) | |
689 | 288 |
690 ;; Lao character set | 289 ;; Lao character set |
691 | 290 |
692 (modify-category-entry (make-char 'lao) ?o) | 291 (modify-category-entry '(#xe80 . #xeff) ?o) |
693 (dotimes (i (1+ (- #xeff #xe80))) | 292 (map-charset-chars #'modify-category-entry 'lao ?o) |
694 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o)) | 293 |
695 | 294 (let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant |
696 (let ((deflist '(;; chars syntax category | 295 ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base |
697 ("(1!(B-(1N(B" "w" ?0) ; consonant | 296 ("ັິ-ືົໍ" "w" ?2) ; vowel upper |
698 ("(1PRS]`(B-(1d(B" "w" ?1) ; vowel base | 297 ("ຸູ" "w" ?3) ; vowel lower |
699 ("(1QT(B-(1W[m(B" "w" ?2) ; vowel upper | 298 ("່-໋" "w" ?4) ; tone mark |
700 ("(1XY(B" "w" ?3) ; vowel lower | 299 ("ຼຽ" "w" ?9) ; semivowel lower |
701 ("(1h(B-(1l(B" "w" ?4) ; tone mark | 300 ("໐-໙" "w" ?6) ; digit |
702 ("(1\(B" "w" ?9) ; semivowel lower | 301 ("ຯໆ" "_" ?5) ; symbol |
703 ("(1p(B-(1y(B" "w" ?6) ; digit | |
704 ("(1Of(B" "_" ?5) ; symbol | |
705 ;; Unicode equivalents | |
706 ("$,1D!(B-$,1DN(B" "w" ?0) ; consonant | |
707 ("$,1DPDRDSD]D`(B-$,1Dd(B" "w" ?1) ; vowel base | |
708 ("$,1DQDT(B-$,1DWD[Dm(B" "w" ?2) ; vowel upper | |
709 ("$,1DXDY(B" "w" ?3) ; vowel lower | |
710 ("$,1Dh(B-$,1Dk(B" "w" ?4) ; tone mark | |
711 ("$,1D\D](B" "w" ?9) ; semivowel lower | |
712 ("$,1Dp(B-$,1Dy(B" "w" ?6) ; digit | |
713 ("$,1DODf(B" "_" ?5) ; symbol | |
714 )) | 302 )) |
715 elm chars len syntax category to ch i) | 303 elm chars len syntax category to ch i) |
716 (while deflist | 304 (while deflist |
717 (setq elm (car deflist)) | 305 (setq elm (car deflist)) |
718 (setq chars (car elm) | 306 (setq chars (car elm) |
734 (setq i (1+ i))) | 322 (setq i (1+ i))) |
735 (setq deflist (cdr deflist)))) | 323 (setq deflist (cdr deflist)))) |
736 | 324 |
737 ;; Thai character set (TIS620) | 325 ;; Thai character set (TIS620) |
738 | 326 |
739 (modify-category-entry (make-char 'thai-tis620) ?t) | 327 (modify-category-entry '(#xe00 . #xe7f) ?t) |
740 (dotimes (i (1+ (- #xe7f #xe00))) | 328 (map-charset-chars #'modify-category-entry 'thai-tis620 ?t) |
741 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t)) | |
742 | 329 |
743 (let ((deflist '(;; chars syntax category | 330 (let ((deflist '(;; chars syntax category |
744 (",T!(B-,TCEG(B-,TN(B" "w" ?0) ; consonant | 331 ("ก-รลว-ฮ" "w" ?0) ; consonant |
745 (",TDFPRS`(B-,Te(B" "w" ?1) ; vowel base | 332 ("ฤฦะาำเ-ๅ" "w" ?1) ; vowel base |
746 (",TQT(B-,TWgn(B" "w" ?2) ; vowel upper | 333 ("ัิ-ื็๎" "w" ?2) ; vowel upper |
747 (",TX(B-,TZ(B" "w" ?3) ; vowel lower | 334 ("ุ-ฺ" "w" ?3) ; vowel lower |
748 (",Th(B-,Tm(B" "w" ?4) ; tone mark | 335 ("่-ํ" "w" ?4) ; tone mark |
749 (",Tp(B-,Ty(B" "w" ?6) ; digit | 336 ("๐-๙" "w" ?6) ; digit |
750 (",TOf_oz{(B" "_" ?5) ; symbol | 337 ("ฯๆ฿๏๚๛" "_" ?5) ; symbol |
751 ;; Unicode equivalents | |
752 ("$,1Ba(B-$,1C#C%C'(B-$,1C.(B" "w" ?0) ; consonant | |
753 ("$,1C$C&C0C2C3C@(B-$,1CE(B" "w" ?1) ; vowel base | |
754 ("$,1C1C4(B-$,1C7CGCN(B" "w" ?2) ; vowel upper | |
755 ("$,1C8(B-$,1C:(B" "w" ?3) ; vowel lower | |
756 ("$,1CH(B-$,1CM(B" "w" ?4) ; tone mark | |
757 ("$,1CP(B-$,1CY(B" "w" ?6) ; digit | |
758 ("$,1C/CFC?COCZC[(B" "_" ?5) ; symbol | |
759 )) | 338 )) |
760 elm chars len syntax category to ch i) | 339 elm chars len syntax category to ch i) |
761 (while deflist | 340 (while deflist |
762 (setq elm (car deflist)) | 341 (setq elm (car deflist)) |
763 (setq chars (car elm) | 342 (setq chars (car elm) |
779 (setq i (1+ i))) | 358 (setq i (1+ i))) |
780 (setq deflist (cdr deflist)))) | 359 (setq deflist (cdr deflist)))) |
781 | 360 |
782 ;; Tibetan character set | 361 ;; Tibetan character set |
783 | 362 |
784 (modify-category-entry (make-char 'tibetan) ?q) | 363 (modify-category-entry '(#xf00 . #xfff) ?q) |
785 (modify-category-entry (make-char 'tibetan-1-column) ?q) | 364 (map-charset-chars #'modify-category-entry 'tibetan ?q) |
786 (dotimes (i (1+ (- #xfff #xf00))) | 365 (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q) |
787 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q)) | |
788 | 366 |
789 (let ((deflist '(;; chars syntax category | 367 (let ((deflist '(;; chars syntax category |
790 ("4$(7"!0"!1(B-4$(7"J0"J14"K0"K1(B" "w" ?0) ; consonant | 368 ("ཀ-ཀྵཪ" "w" ?0) ; consonant |
791 ("$(7#!(B-$(7#J#K#L#M!"!#(B" "w" ?0) ; | 369 ("ྐ-ྐྵྺྻྼ" "w" ?0) ; |
792 ("$(7$!(B-$(7$e(B" "w" ?0) ; | 370 ("-" "w" ?0) ; |
793 ("$(7%!(B-$(7%u(B" "w" ?0) ; | 371 ("-" "w" ?0) ; |
794 ("$(7"S"["\"]"^"a(B" "w" ?2) ; upper vowel | 372 ("ིེཻོཽྀ" "w" ?2) ; upper vowel |
795 ("$(7"_"c"d"g"h"i"j"k"l(B" "w" ?2) ; upper modifier | 373 ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier |
796 ("$(7!I"Q"R"U"e!e!g(B" "w" ?3) ; lowel vowel/modifier | 374 ("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier |
797 ("$(7!P(B-$(7!Y!Z(B-$(7!c(B" "w" ?6) ; digit | 375 ("" "w" ?3) ; invisible vowel a |
798 ("$(7!;!=(B-$(7!B!D"`(B" "." ?|) ; line-break char | 376 ("༠-༩༪-༳" "w" ?6) ; digit |
799 ("$(8!;!=!?!@!A!D"`(B" "." ?|) ; | 377 ("་།-༒༔ཿ" "." ?|) ; line-break char |
800 ("$(7!8!;!=(B-$(7!B!D"`!m!d(B" "." ?>) ; prohibition | 378 ("་།༏༐༑༔ཿ" "." ?|) ; |
801 ("$(8!;!=!?!@!A!D"`(B" "." ?>) ; | 379 ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition |
802 ("$(7!0(B-$(7!:!l#R#S"f(B" "." ?<) ; prohibition | 380 ("་།༏༐༑༔ཿ" "." ?>) ; |
803 ("$(7!C!E(B-$(7!H!J(B-$(7!O!f!h(B-$(7!k!n!o#O#P(B-$(7#`(B" "." ?q) ; others | 381 ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition |
804 | 382 ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others |
805 ;; Unicode version (not complete) | |
806 ("$,1F (B-$,1FIFJ(B" "w" ?0) ; consonant | |
807 ("$,1Fp(B-$,1G9G:G;G<(B" "w" ?0) ; | |
808 ("$,1FRFZF[F\F]F`(B" "w" ?2) ; upper vowel | |
809 ("$,1F^FbFcFfFgFhFiFjFk(B" "w" ?2) ; upper modifier | |
810 ("$,1EYFPFQFTFdEuEw(B" "w" ?3) ; lowel vowel/modifier | |
811 ("$,1E`(B-$,1EiEj(B-$,1Es(B" "w" ?6) ; digit | |
812 ("$,1EKEM(B-$,1ERETF_(B" "." ?|) ; line-break char | |
813 ("$,1EHEKEM(B-$,1ERETF_E}Et(B" "." ?>) ; prohibition | |
814 ("$,1E@(B-$,1EJE|GAGBFe(B" "." ?<) ; prohibition | |
815 ("$,1ESEU(B-$,1EXEZ(B-$,1E_EvEx(B-$,1E{E~EG>G?(B-$,1GO(B" "." ?q) ; others | |
816 )) | 383 )) |
817 elm chars len syntax category to ch i) | 384 elm chars len syntax category to ch i) |
818 (while deflist | 385 (while deflist |
819 (setq elm (car deflist)) | 386 (setq elm (car deflist)) |
820 (setq chars (car elm) | 387 (setq chars (car elm) |
836 (setq i (1+ i))) | 403 (setq i (1+ i))) |
837 (setq deflist (cdr deflist)))) | 404 (setq deflist (cdr deflist)))) |
838 | 405 |
839 ;; Vietnamese character set | 406 ;; Vietnamese character set |
840 | 407 |
841 (let ((lower (make-char 'vietnamese-viscii-lower)) | 408 ;; To make a word with Latin characters |
842 (upper (make-char 'vietnamese-viscii-upper))) | 409 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l) |
843 ;; (modify-syntax-entry lower "w") | 410 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v) |
844 ;; (modify-syntax-entry upper "w") | 411 |
845 (modify-category-entry lower ?v) | 412 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l) |
846 (modify-category-entry upper ?v) | 413 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v) |
847 (modify-category-entry lower ?l) ; To make a word with | |
848 (modify-category-entry upper ?l) ; latin characters. | |
849 ) | |
850 | 414 |
851 (let ((tbl (standard-case-table)) | 415 (let ((tbl (standard-case-table)) |
852 (i 32)) | 416 (i 32)) |
853 (while (< i 128) | 417 (while (< i 128) |
854 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i) | 418 (let* ((char (decode-char 'vietnamese-viscii-upper i)) |
855 (make-char 'vietnamese-viscii-lower i) | 419 (charl (decode-char 'vietnamese-viscii-lower i)) |
856 tbl) | 420 (uc (encode-char char 'ucs)) |
421 (lc (encode-char charl 'ucs))) | |
422 (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i) | |
423 tbl) | |
424 (if uc (modify-category-entry uc ?v)) | |
425 (if lc (modify-category-entry lc ?v))) | |
857 (setq i (1+ i)))) | 426 (setq i (1+ i)))) |
858 | 427 |
859 ;; Unicode (mule-unicode-0100-24ff) | 428 |
429 ;; Latin | |
430 | |
431 (modify-category-entry '(#x80 . #x024F) ?l) | |
860 | 432 |
861 (let ((tbl (standard-case-table)) c) | 433 (let ((tbl (standard-case-table)) c) |
862 | 434 |
863 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN | 435 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN |
864 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN | 436 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN |
865 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I. | 437 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I. |
866 ;; Thus we have to check language-environment to handle casing | 438 ;; See the Turkish language environment. |
867 ;; correctly. Currently only I<->i is available. | 439 |
440 ;; Latin-1 | |
441 | |
442 ;; Fixme: Some of the non-word syntaxes here perhaps should be | |
443 ;; reviewed. (Note that the following all implicitly have word | |
444 ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.) There should be a well-defined way of | |
445 ;; relating Unicode categories to Emacs syntax codes. | |
446 (set-case-syntax ? " " tbl) ; dubious | |
447 (set-case-syntax ?¡ "." tbl) | |
448 (set-case-syntax ?¦ "_" tbl) | |
449 (set-case-syntax ?§ "." tbl) | |
450 (set-case-syntax ?© "_" tbl) | |
451 (set-case-syntax-delims 171 187 tbl) ; « » | |
452 (set-case-syntax ?¬ "_" tbl) | |
453 (set-case-syntax ? "_" tbl) | |
454 (set-case-syntax ?® "_" tbl) | |
455 (set-case-syntax ?° "_" tbl) | |
456 (set-case-syntax ?± "_" tbl) | |
457 (set-case-syntax ?µ "_" tbl) | |
458 (set-case-syntax ?· "_" tbl) | |
459 (set-case-syntax ?¼ "_" tbl) | |
460 (set-case-syntax ?½ "_" tbl) | |
461 (set-case-syntax ?¾ "_" tbl) | |
462 (set-case-syntax ?¿ "." tbl) | |
463 (let ((c 192)) | |
464 (while (<= c 222) | |
465 (set-case-syntax-pair c (+ c 32) tbl) | |
466 (setq c (1+ c)))) | |
467 (set-case-syntax ?× "_" tbl) | |
468 (set-case-syntax ?ß "w" tbl) | |
469 (set-case-syntax ?÷ "_" tbl) | |
470 ;; See below for ÿ. | |
868 | 471 |
869 ;; Latin Extended-A, Latin Extended-B | 472 ;; Latin Extended-A, Latin Extended-B |
870 (setq c #x0100) | 473 (setq c #x0100) |
871 (while (<= c #x0233) | 474 (while (<= c #x0233) |
872 (modify-category-entry (decode-char 'ucs c) ?l) | |
873 (and (or (<= c #x012e) | 475 (and (or (<= c #x012e) |
874 (and (>= c #x014a) (<= c #x0177))) | 476 (and (>= c #x014a) (<= c #x0177))) |
875 (zerop (% c 2)) | 477 (zerop (% c 2)) |
876 (set-case-syntax-pair | 478 (set-case-syntax-pair c (1+ c) tbl)) |
877 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) | |
878 (and (>= c #x013a) | 479 (and (>= c #x013a) |
879 (<= c #x0148) | 480 (<= c #x0148) |
880 (zerop (% c 2)) | 481 (zerop (% c 2)) |
881 (set-case-syntax-pair | 482 (set-case-syntax-pair (1- c) c tbl)) |
882 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl)) | 483 (setq c (1+ c))) |
883 (setq c (1+ c))) | 484 (set-case-syntax-pair ?IJ ?ij tbl) |
884 (set-case-syntax-pair ?$,1 R(B ?$,1 S(B tbl) | 485 (set-case-syntax-pair ?Ĵ ?ĵ tbl) |
885 (set-case-syntax-pair ?$,1 T(B ?$,1 U(B tbl) | 486 (set-case-syntax-pair ?Ķ ?ķ tbl) |
886 (set-case-syntax-pair ?$,1 V(B ?$,1 W(B tbl) | 487 (set-case-syntax-pair ?Ÿ ?ÿ tbl) |
887 ;;; (set-case-syntax-pair ?$,1!8(B ?,A(B tbl) ; these two have different length! | 488 (set-case-syntax-pair ?Ź ?ź tbl) |
888 (set-case-syntax-pair ?$,1!9(B ?$,1!:(B tbl) | 489 (set-case-syntax-pair ?Ż ?ż tbl) |
889 (set-case-syntax-pair ?$,1!;(B ?$,1!<(B tbl) | 490 (set-case-syntax-pair ?Ž ?ž tbl) |
890 (set-case-syntax-pair ?$,1!=(B ?$,1!>(B tbl) | |
891 | 491 |
892 ;; Latin Extended-B | 492 ;; Latin Extended-B |
893 (set-case-syntax-pair ?$,1!A(B ?$,1#S(B tbl) | 493 (set-case-syntax-pair ?Ɓ ?ɓ tbl) |
894 (set-case-syntax-pair ?$,1!B(B ?$,1!C(B tbl) | 494 (set-case-syntax-pair ?Ƃ ?ƃ tbl) |
895 (set-case-syntax-pair ?$,1!D(B ?$,1!E(B tbl) | 495 (set-case-syntax-pair ?Ƅ ?ƅ tbl) |
896 (set-case-syntax-pair ?$,1!F(B ?$,1#T(B tbl) | 496 (set-case-syntax-pair ?Ɔ ?ɔ tbl) |
897 (set-case-syntax-pair ?$,1!G(B ?$,1!H(B tbl) | 497 (set-case-syntax-pair ?Ƈ ?ƈ tbl) |
898 (set-case-syntax-pair ?$,1!I(B ?$,1#V(B tbl) | 498 (set-case-syntax-pair ?Ɖ ?ɖ tbl) |
899 (set-case-syntax-pair ?$,1!J(B ?$,1#W(B tbl) | 499 (set-case-syntax-pair ?Ɗ ?ɗ tbl) |
900 (set-case-syntax-pair ?$,1!K(B ?$,1!L(B tbl) | 500 (set-case-syntax-pair ?Ƌ ?ƌ tbl) |
901 (set-case-syntax-pair ?$,1!N(B ?$,1"=(B tbl) | 501 (set-case-syntax-pair ?Ǝ ?ǝ tbl) |
902 (set-case-syntax-pair ?$,1!O(B ?$,1#Y(B tbl) | 502 (set-case-syntax-pair ?Ə ?ə tbl) |
903 (set-case-syntax-pair ?$,1!P(B ?$,1#[(B tbl) | 503 (set-case-syntax-pair ?Ɛ ?ɛ tbl) |
904 (set-case-syntax-pair ?$,1!Q(B ?$,1!R(B tbl) | 504 (set-case-syntax-pair ?Ƒ ?ƒ tbl) |
905 (set-case-syntax-pair ?$,1!S(B ?$,1#`(B tbl) | 505 (set-case-syntax-pair ?Ɠ ?ɠ tbl) |
906 (set-case-syntax-pair ?$,1!T(B ?$,1#c(B tbl) | 506 (set-case-syntax-pair ?Ɣ ?ɣ tbl) |
907 (set-case-syntax-pair ?$,1!V(B ?$,1#i(B tbl) | 507 (set-case-syntax-pair ?Ɩ ?ɩ tbl) |
908 (set-case-syntax-pair ?$,1!W(B ?$,1#h(B tbl) | 508 (set-case-syntax-pair ?Ɨ ?ɨ tbl) |
909 (set-case-syntax-pair ?$,1!X(B ?$,1!Y(B tbl) | 509 (set-case-syntax-pair ?Ƙ ?ƙ tbl) |
910 (set-case-syntax-pair ?$,1!\(B ?$,1#o(B tbl) | 510 (set-case-syntax-pair ?Ɯ ?ɯ tbl) |
911 (set-case-syntax-pair ?$,1!](B ?$,1#r(B tbl) | 511 (set-case-syntax-pair ?Ɲ ?ɲ tbl) |
912 (set-case-syntax-pair ?$,1!_(B ?$,1#u(B tbl) | 512 (set-case-syntax-pair ?Ɵ ?ɵ tbl) |
913 (set-case-syntax-pair ?$,1!`(B ?$,1!a(B tbl) | 513 (set-case-syntax-pair ?Ơ ?ơ tbl) |
914 (set-case-syntax-pair ?$,1!b(B ?$,1!c(B tbl) | 514 (set-case-syntax-pair ?Ƣ ?ƣ tbl) |
915 (set-case-syntax-pair ?$,1!d(B ?$,1!e(B tbl) | 515 (set-case-syntax-pair ?Ƥ ?ƥ tbl) |
916 (set-case-syntax-pair ?$,1!f(B ?$,1$ (B tbl) | 516 (set-case-syntax-pair ?Ʀ ?ʀ tbl) |
917 (set-case-syntax-pair ?$,1!g(B ?$,1!h(B tbl) | 517 (set-case-syntax-pair ?Ƨ ?ƨ tbl) |
918 (set-case-syntax-pair ?$,1!i(B ?$,1$#(B tbl) | 518 (set-case-syntax-pair ?Ʃ ?ʃ tbl) |
919 (set-case-syntax-pair ?$,1!l(B ?$,1!m(B tbl) | 519 (set-case-syntax-pair ?Ƭ ?ƭ tbl) |
920 (set-case-syntax-pair ?$,1!n(B ?$,1$((B tbl) | 520 (set-case-syntax-pair ?Ʈ ?ʈ tbl) |
921 (set-case-syntax-pair ?$,1!o(B ?$,1!p(B tbl) | 521 (set-case-syntax-pair ?Ư ?ư tbl) |
922 (set-case-syntax-pair ?$,1!q(B ?$,1$*(B tbl) | 522 (set-case-syntax-pair ?Ʊ ?ʊ tbl) |
923 (set-case-syntax-pair ?$,1!r(B ?$,1$+(B tbl) | 523 (set-case-syntax-pair ?Ʋ ?ʋ tbl) |
924 (set-case-syntax-pair ?$,1!s(B ?$,1!t(B tbl) | 524 (set-case-syntax-pair ?Ƴ ?ƴ tbl) |
925 (set-case-syntax-pair ?$,1!u(B ?$,1!v(B tbl) | 525 (set-case-syntax-pair ?Ƶ ?ƶ tbl) |
926 (set-case-syntax-pair ?$,1!w(B ?$,1$2(B tbl) | 526 (set-case-syntax-pair ?Ʒ ?ʒ tbl) |
927 (set-case-syntax-pair ?$,1!x(B ?$,1!y(B tbl) | 527 (set-case-syntax-pair ?Ƹ ?ƹ tbl) |
928 (set-case-syntax-pair ?$,1!|(B ?$,1!}(B tbl) | 528 (set-case-syntax-pair ?Ƽ ?ƽ tbl) |
929 (set-case-syntax-pair ?$,1"$(B ?$,1"&(B tbl) | 529 (set-case-syntax-pair ?DŽ ?dž tbl) |
930 (set-case-syntax-pair ?$,1"%(B ?$,1"&(B tbl) | 530 (set-case-syntax-pair ?Dž ?dž tbl) |
931 (set-case-syntax-pair ?$,1"'(B ?$,1")(B tbl) | 531 (set-case-syntax-pair ?LJ ?lj tbl) |
932 (set-case-syntax-pair ?$,1"((B ?$,1")(B tbl) | 532 (set-case-syntax-pair ?Lj ?lj tbl) |
933 (set-case-syntax-pair ?$,1"*(B ?$,1",(B tbl) | 533 (set-case-syntax-pair ?NJ ?nj tbl) |
934 (set-case-syntax-pair ?$,1"+(B ?$,1",(B tbl) | 534 (set-case-syntax-pair ?Nj ?nj tbl) |
935 (set-case-syntax-pair ?$,1"-(B ?$,1".(B tbl) | 535 (set-case-syntax-pair ?Ǎ ?ǎ tbl) |
936 (set-case-syntax-pair ?$,1"/(B ?$,1"0(B tbl) | 536 (set-case-syntax-pair ?Ǐ ?ǐ tbl) |
937 (set-case-syntax-pair ?$,1"1(B ?$,1"2(B tbl) | 537 (set-case-syntax-pair ?Ǒ ?ǒ tbl) |
938 (set-case-syntax-pair ?$,1"3(B ?$,1"4(B tbl) | 538 (set-case-syntax-pair ?Ǔ ?ǔ tbl) |
939 (set-case-syntax-pair ?$,1"5(B ?$,1"6(B tbl) | 539 (set-case-syntax-pair ?Ǖ ?ǖ tbl) |
940 (set-case-syntax-pair ?$,1"7(B ?$,1"8(B tbl) | 540 (set-case-syntax-pair ?Ǘ ?ǘ tbl) |
941 (set-case-syntax-pair ?$,1"9(B ?$,1":(B tbl) | 541 (set-case-syntax-pair ?Ǚ ?ǚ tbl) |
942 (set-case-syntax-pair ?$,1";(B ?$,1"<(B tbl) | 542 (set-case-syntax-pair ?Ǜ ?ǜ tbl) |
943 (set-case-syntax-pair ?$,1">(B ?$,1"?(B tbl) | 543 (set-case-syntax-pair ?Ǟ ?ǟ tbl) |
944 (set-case-syntax-pair ?$,1"@(B ?$,1"A(B tbl) | 544 (set-case-syntax-pair ?Ǡ ?ǡ tbl) |
945 (set-case-syntax-pair ?$,1"B(B ?$,1"C(B tbl) | 545 (set-case-syntax-pair ?Ǣ ?ǣ tbl) |
946 (set-case-syntax-pair ?$,1"D(B ?$,1"E(B tbl) | 546 (set-case-syntax-pair ?Ǥ ?ǥ tbl) |
947 (set-case-syntax-pair ?$,1"F(B ?$,1"G(B tbl) | 547 (set-case-syntax-pair ?Ǧ ?ǧ tbl) |
948 (set-case-syntax-pair ?$,1"H(B ?$,1"I(B tbl) | 548 (set-case-syntax-pair ?Ǩ ?ǩ tbl) |
949 (set-case-syntax-pair ?$,1"J(B ?$,1"K(B tbl) | 549 (set-case-syntax-pair ?Ǫ ?ǫ tbl) |
950 (set-case-syntax-pair ?$,1"L(B ?$,1"M(B tbl) | 550 (set-case-syntax-pair ?Ǭ ?ǭ tbl) |
951 (set-case-syntax-pair ?$,1"N(B ?$,1"O(B tbl) | 551 (set-case-syntax-pair ?Ǯ ?ǯ tbl) |
952 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON | 552 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON |
953 (set-case-syntax-pair ?$,1"Q(B ?$,1"S(B tbl) | 553 (set-case-syntax-pair ?DZ ?dz tbl) |
954 (set-case-syntax-pair ?$,1"R(B ?$,1"S(B tbl) | 554 (set-case-syntax-pair ?Dz ?dz tbl) |
955 (set-case-syntax-pair ?$,1"T(B ?$,1"U(B tbl) | 555 (set-case-syntax-pair ?Ǵ ?ǵ tbl) |
956 (set-case-syntax-pair ?$,1"V(B ?$,1!U(B tbl) | 556 (set-case-syntax-pair ?Ƕ ?ƕ tbl) |
957 (set-case-syntax-pair ?$,1"W(B ?$,1!(B tbl) | 557 (set-case-syntax-pair ?Ƿ ?ƿ tbl) |
958 (set-case-syntax-pair ?$,1"X(B ?$,1"Y(B tbl) | 558 (set-case-syntax-pair ?Ǹ ?ǹ tbl) |
959 (set-case-syntax-pair ?$,1"Z(B ?$,1"[(B tbl) | 559 (set-case-syntax-pair ?Ǻ ?ǻ tbl) |
960 (set-case-syntax-pair ?$,1"\(B ?$,1"](B tbl) | 560 (set-case-syntax-pair ?Ǽ ?ǽ tbl) |
961 (set-case-syntax-pair ?$,1"^(B ?$,1"_(B tbl) | 561 (set-case-syntax-pair ?Ǿ ?ǿ tbl) |
962 (set-case-syntax-pair ?$,1"`(B ?$,1"a(B tbl) | 562 (set-case-syntax-pair ?Ȁ ?ȁ tbl) |
963 (set-case-syntax-pair ?$,1"b(B ?$,1"c(B tbl) | 563 (set-case-syntax-pair ?Ȃ ?ȃ tbl) |
964 (set-case-syntax-pair ?$,1"d(B ?$,1"e(B tbl) | 564 (set-case-syntax-pair ?Ȅ ?ȅ tbl) |
965 (set-case-syntax-pair ?$,1"f(B ?$,1"g(B tbl) | 565 (set-case-syntax-pair ?Ȇ ?ȇ tbl) |
966 (set-case-syntax-pair ?$,1"h(B ?$,1"i(B tbl) | 566 (set-case-syntax-pair ?Ȉ ?ȉ tbl) |
967 (set-case-syntax-pair ?$,1"j(B ?$,1"k(B tbl) | 567 (set-case-syntax-pair ?Ȋ ?ȋ tbl) |
968 (set-case-syntax-pair ?$,1"l(B ?$,1"m(B tbl) | 568 (set-case-syntax-pair ?Ȍ ?ȍ tbl) |
969 (set-case-syntax-pair ?$,1"n(B ?$,1"o(B tbl) | 569 (set-case-syntax-pair ?Ȏ ?ȏ tbl) |
970 (set-case-syntax-pair ?$,1"p(B ?$,1"q(B tbl) | 570 (set-case-syntax-pair ?Ȑ ?ȑ tbl) |
971 (set-case-syntax-pair ?$,1"r(B ?$,1"s(B tbl) | 571 (set-case-syntax-pair ?Ȓ ?ȓ tbl) |
972 (set-case-syntax-pair ?$,1"t(B ?$,1"u(B tbl) | 572 (set-case-syntax-pair ?Ȕ ?ȕ tbl) |
973 (set-case-syntax-pair ?$,1"v(B ?$,1"w(B tbl) | 573 (set-case-syntax-pair ?Ȗ ?ȗ tbl) |
974 (set-case-syntax-pair ?$,1"x(B ?$,1"y(B tbl) | 574 (set-case-syntax-pair ?Ș ?ș tbl) |
975 (set-case-syntax-pair ?$,1"z(B ?$,1"{(B tbl) | 575 (set-case-syntax-pair ?Ț ?ț tbl) |
976 (set-case-syntax-pair ?$,1"|(B ?$,1"}(B tbl) | 576 (set-case-syntax-pair ?Ȝ ?ȝ tbl) |
977 (set-case-syntax-pair ?$,1"~(B ?$,1"(B tbl) | 577 (set-case-syntax-pair ?Ȟ ?ȟ tbl) |
978 (set-case-syntax-pair ?$,1#"(B ?$,1##(B tbl) | 578 (set-case-syntax-pair ?Ȣ ?ȣ tbl) |
979 (set-case-syntax-pair ?$,1#$(B ?$,1#%(B tbl) | 579 (set-case-syntax-pair ?Ȥ ?ȥ tbl) |
980 (set-case-syntax-pair ?$,1#&(B ?$,1#'(B tbl) | 580 (set-case-syntax-pair ?Ȧ ?ȧ tbl) |
981 (set-case-syntax-pair ?$,1#((B ?$,1#)(B tbl) | 581 (set-case-syntax-pair ?Ȩ ?ȩ tbl) |
982 (set-case-syntax-pair ?$,1#*(B ?$,1#+(B tbl) | 582 (set-case-syntax-pair ?Ȫ ?ȫ tbl) |
983 (set-case-syntax-pair ?$,1#,(B ?$,1#-(B tbl) | 583 (set-case-syntax-pair ?Ȭ ?ȭ tbl) |
984 (set-case-syntax-pair ?$,1#.(B ?$,1#/(B tbl) | 584 (set-case-syntax-pair ?Ȯ ?ȯ tbl) |
985 (set-case-syntax-pair ?$,1#0(B ?$,1#1(B tbl) | 585 (set-case-syntax-pair ?Ȱ ?ȱ tbl) |
986 (set-case-syntax-pair ?$,1#2(B ?$,1#3(B tbl) | 586 (set-case-syntax-pair ?Ȳ ?ȳ tbl) |
987 | 587 |
988 ;; Latin Extended Additional | 588 ;; Latin Extended Additional |
589 (modify-category-entry '(#x1e00 . #x1ef9) ?l) | |
989 (setq c #x1e00) | 590 (setq c #x1e00) |
990 (while (<= c #x1ef9) | 591 (while (<= c #x1ef9) |
991 (modify-category-entry (decode-char 'ucs c) ?l) | |
992 (and (zerop (% c 2)) | 592 (and (zerop (% c 2)) |
993 (or (<= c #x1e94) (>= c #x1ea0)) | 593 (or (<= c #x1e94) (>= c #x1ea0)) |
994 (set-case-syntax-pair | 594 (set-case-syntax-pair c (1+ c) tbl)) |
995 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) | |
996 (setq c (1+ c))) | 595 (setq c (1+ c))) |
997 | 596 |
998 ;; Greek | 597 ;; Greek |
598 (modify-category-entry '(#x0370 . #x03ff) ?g) | |
999 (setq c #x0370) | 599 (setq c #x0370) |
1000 (while (<= c #x03ff) | 600 (while (<= c #x03ff) |
1001 (modify-category-entry (decode-char 'ucs c) ?g) | |
1002 (if (or (and (>= c #x0391) (<= c #x03a1)) | 601 (if (or (and (>= c #x0391) (<= c #x03a1)) |
1003 (and (>= c #x03a3) (<= c #x03ab))) | 602 (and (>= c #x03a3) (<= c #x03ab))) |
1004 (set-case-syntax-pair | 603 (set-case-syntax-pair c (+ c 32) tbl)) |
1005 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl)) | |
1006 (and (>= c #x03da) | 604 (and (>= c #x03da) |
1007 (<= c #x03ee) | 605 (<= c #x03ee) |
1008 (zerop (% c 2)) | 606 (zerop (% c 2)) |
1009 (set-case-syntax-pair | 607 (set-case-syntax-pair c (1+ c) tbl)) |
1010 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) | 608 (setq c (1+ c))) |
1011 (setq c (1+ c))) | 609 (set-case-syntax-pair ?Ά ?ά tbl) |
1012 (set-case-syntax-pair ?$,1&f(B ?$,1',(B tbl) | 610 (set-case-syntax-pair ?Έ ?έ tbl) |
1013 (set-case-syntax-pair ?$,1&h(B ?$,1'-(B tbl) | 611 (set-case-syntax-pair ?Ή ?ή tbl) |
1014 (set-case-syntax-pair ?$,1&i(B ?$,1'.(B tbl) | 612 (set-case-syntax-pair ?Ί ?ί tbl) |
1015 (set-case-syntax-pair ?$,1&j(B ?$,1'/(B tbl) | 613 (set-case-syntax-pair ?Ό ?ό tbl) |
1016 (set-case-syntax-pair ?$,1&l(B ?$,1'L(B tbl) | 614 (set-case-syntax-pair ?Ύ ?ύ tbl) |
1017 (set-case-syntax-pair ?$,1&n(B ?$,1'M(B tbl) | 615 (set-case-syntax-pair ?Ώ ?ώ tbl) |
1018 (set-case-syntax-pair ?$,1&o(B ?$,1'N(B tbl) | |
1019 | 616 |
1020 ;; Armenian | 617 ;; Armenian |
1021 (setq c #x531) | 618 (setq c #x531) |
1022 (while (<= c #x556) | 619 (while (<= c #x556) |
1023 (set-case-syntax-pair (decode-char 'ucs c) | 620 (set-case-syntax-pair c (+ c #x30) tbl) |
1024 (decode-char 'ucs (+ c #x30)) tbl) | |
1025 (setq c (1+ c))) | 621 (setq c (1+ c))) |
1026 | 622 |
1027 ;; Greek Extended | 623 ;; Greek Extended |
624 (modify-category-entry '(#x1f00 . #x1fff) ?g) | |
1028 (setq c #x1f00) | 625 (setq c #x1f00) |
1029 (while (<= c #x1fff) | 626 (while (<= c #x1fff) |
1030 (modify-category-entry (decode-char 'ucs c) ?g) | |
1031 (and (<= (logand c #x000f) 7) | 627 (and (<= (logand c #x000f) 7) |
1032 (<= c #x1fa7) | 628 (<= c #x1fa7) |
1033 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56))) | 629 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56))) |
1034 (/= (logand c #x00f0) 7) | 630 (/= (logand c #x00f0) 7) |
1035 (set-case-syntax-pair | 631 (set-case-syntax-pair (+ c 8) c tbl)) |
1036 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl)) | 632 (setq c (1+ c))) |
1037 (setq c (1+ c))) | 633 (set-case-syntax-pair ?Ᾰ ?ᾰ tbl) |
1038 (set-case-syntax-pair ?$,1qx(B ?$,1qp(B tbl) | 634 (set-case-syntax-pair ?Ᾱ ?ᾱ tbl) |
1039 (set-case-syntax-pair ?$,1qy(B ?$,1qq(B tbl) | 635 (set-case-syntax-pair ?Ὰ ?ὰ tbl) |
1040 (set-case-syntax-pair ?$,1qz(B ?$,1q0(B tbl) | 636 (set-case-syntax-pair ?Ά ?ά tbl) |
1041 (set-case-syntax-pair ?$,1q{(B ?$,1q1(B tbl) | 637 (set-case-syntax-pair ?ᾼ ?ᾳ tbl) |
1042 (set-case-syntax-pair ?$,1q|(B ?$,1qs(B tbl) | 638 (set-case-syntax-pair ?Ὲ ?ὲ tbl) |
1043 (set-case-syntax-pair ?$,1r((B ?$,1q2(B tbl) | 639 (set-case-syntax-pair ?Έ ?έ tbl) |
1044 (set-case-syntax-pair ?$,1r)(B ?$,1q3(B tbl) | 640 (set-case-syntax-pair ?Ὴ ?ὴ tbl) |
1045 (set-case-syntax-pair ?$,1r*(B ?$,1q4(B tbl) | 641 (set-case-syntax-pair ?Ή ?ή tbl) |
1046 (set-case-syntax-pair ?$,1r+(B ?$,1q5(B tbl) | 642 (set-case-syntax-pair ?ῌ ?ῃ tbl) |
1047 (set-case-syntax-pair ?$,1r,(B ?$,1r#(B tbl) | 643 (set-case-syntax-pair ?Ῐ ?ῐ tbl) |
1048 (set-case-syntax-pair ?$,1r8(B ?$,1r0(B tbl) | 644 (set-case-syntax-pair ?Ῑ ?ῑ tbl) |
1049 (set-case-syntax-pair ?$,1r9(B ?$,1r1(B tbl) | 645 (set-case-syntax-pair ?Ὶ ?ὶ tbl) |
1050 (set-case-syntax-pair ?$,1r:(B ?$,1q6(B tbl) | 646 (set-case-syntax-pair ?Ί ?ί tbl) |
1051 (set-case-syntax-pair ?$,1r;(B ?$,1q7(B tbl) | 647 (set-case-syntax-pair ?Ῠ ?ῠ tbl) |
1052 (set-case-syntax-pair ?$,1rH(B ?$,1r@(B tbl) | 648 (set-case-syntax-pair ?Ῡ ?ῡ tbl) |
1053 (set-case-syntax-pair ?$,1rI(B ?$,1rA(B tbl) | 649 (set-case-syntax-pair ?Ὺ ?ὺ tbl) |
1054 (set-case-syntax-pair ?$,1rJ(B ?$,1q:(B tbl) | 650 (set-case-syntax-pair ?Ύ ?ύ tbl) |
1055 (set-case-syntax-pair ?$,1rK(B ?$,1q;(B tbl) | 651 (set-case-syntax-pair ?Ῥ ?ῥ tbl) |
1056 (set-case-syntax-pair ?$,1rL(B ?$,1rE(B tbl) | 652 (set-case-syntax-pair ?Ὸ ?ὸ tbl) |
1057 (set-case-syntax-pair ?$,1rX(B ?$,1q8(B tbl) | 653 (set-case-syntax-pair ?Ό ?ό tbl) |
1058 (set-case-syntax-pair ?$,1rY(B ?$,1q9(B tbl) | 654 (set-case-syntax-pair ?Ὼ ?ὼ tbl) |
1059 (set-case-syntax-pair ?$,1rZ(B ?$,1q<(B tbl) | 655 (set-case-syntax-pair ?Ώ ?ώ tbl) |
1060 (set-case-syntax-pair ?$,1r[(B ?$,1q=(B tbl) | 656 (set-case-syntax-pair ?ῼ ?ῳ tbl) |
1061 (set-case-syntax-pair ?$,1r\(B ?$,1rS(B tbl) | |
1062 | 657 |
1063 ;; cyrillic | 658 ;; cyrillic |
659 (modify-category-entry '(#x0400 . #x04FF) ?y) | |
1064 (setq c #x0400) | 660 (setq c #x0400) |
1065 (while (<= c #x04ff) | 661 (while (<= c #x04ff) |
1066 (modify-category-entry (decode-char 'ucs c) ?y) | |
1067 (and (>= c #x0400) | 662 (and (>= c #x0400) |
1068 (<= c #x040f) | 663 (<= c #x040f) |
1069 (set-case-syntax-pair | 664 (set-case-syntax-pair c (+ c 80) tbl)) |
1070 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl)) | |
1071 (and (>= c #x0410) | 665 (and (>= c #x0410) |
1072 (<= c #x042f) | 666 (<= c #x042f) |
1073 (set-case-syntax-pair | 667 (set-case-syntax-pair c (+ c 32) tbl)) |
1074 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl)) | |
1075 (and (zerop (% c 2)) | 668 (and (zerop (% c 2)) |
1076 (or (and (>= c #x0460) (<= c #x0480)) | 669 (or (and (>= c #x0460) (<= c #x0480)) |
1077 (and (>= c #x048c) (<= c #x04be)) | 670 (and (>= c #x048c) (<= c #x04be)) |
1078 (and (>= c #x04d0) (<= c #x04f4))) | 671 (and (>= c #x04d0) (<= c #x04f4))) |
1079 (set-case-syntax-pair | 672 (set-case-syntax-pair c (1+ c) tbl)) |
1080 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) | 673 (setq c (1+ c))) |
1081 (setq c (1+ c))) | 674 (set-case-syntax-pair ?Ӂ ?ӂ tbl) |
1082 (set-case-syntax-pair ?$,1*!(B ?$,1*"(B tbl) | 675 (set-case-syntax-pair ?Ӄ ?ӄ tbl) |
1083 (set-case-syntax-pair ?$,1*#(B ?$,1*$(B tbl) | 676 (set-case-syntax-pair ?Ӈ ?ӈ tbl) |
1084 (set-case-syntax-pair ?$,1*'(B ?$,1*((B tbl) | 677 (set-case-syntax-pair ?Ӌ ?ӌ tbl) |
1085 (set-case-syntax-pair ?$,1*+(B ?$,1*,(B tbl) | 678 (set-case-syntax-pair ?Ӹ ?ӹ tbl) |
1086 (set-case-syntax-pair ?$,1*X(B ?$,1*Y(B tbl) | |
1087 | 679 |
1088 ;; general punctuation | 680 ;; general punctuation |
1089 (setq c #x2000) | 681 (setq c #x2000) |
1090 (while (<= c #x200b) | 682 (while (<= c #x200b) |
1091 (set-case-syntax (decode-char 'ucs c) " " tbl) | 683 (set-case-syntax c " " tbl) |
1092 (setq c (decode-char 'ucs (1+ c)))) | 684 (setq c (1+ c))) |
1093 (setq c #x2010) | 685 (while (<= c #x200F) |
686 (set-case-syntax c "." tbl) | |
687 (setq c (1+ c))) | |
688 ;; Fixme: These aren't all right: | |
1094 (while (<= c #x2027) | 689 (while (<= c #x2027) |
1095 (set-case-syntax (decode-char 'ucs c) "_" tbl) | 690 (set-case-syntax c "_" tbl) |
1096 (setq c (decode-char 'ucs (1+ c)))) | 691 (setq c (1+ c))) |
692 (while (<= c #x206F) | |
693 (set-case-syntax c "." tbl) | |
694 (setq c (1+ c))) | |
1097 | 695 |
1098 ;; Roman numerals | 696 ;; Roman numerals |
1099 (setq c #x2160) | 697 (setq c #x2160) |
1100 (while (<= c #x216f) | 698 (while (<= c #x216f) |
1101 (set-case-syntax-pair (decode-char 'ucs c) | 699 (set-case-syntax-pair c (+ c #x10) tbl) |
1102 (decode-char 'ucs (+ c #x10)) tbl) | 700 (setq c (1+ c))) |
701 | |
702 ;; Fixme: The following blocks might be better as symbol rather than | |
703 ;; punctuation. | |
704 ;; Arrows | |
705 (setq c #x2190) | |
706 (while (<= c #x21FF) | |
707 (set-case-syntax c "." tbl) | |
708 (setq c (1+ c))) | |
709 ;; Mathematical Operators | |
710 (while (<= c #x22FF) | |
711 (set-case-syntax c "." tbl) | |
712 (setq c (1+ c))) | |
713 ;; Miscellaneous Technical | |
714 (while (<= c #x23FF) | |
715 (set-case-syntax c "." tbl) | |
716 (setq c (1+ c))) | |
717 ;; Control Pictures | |
718 (while (<= c #x243F) | |
719 (set-case-syntax c "_" tbl) | |
1103 (setq c (1+ c))) | 720 (setq c (1+ c))) |
1104 | 721 |
1105 ;; Circled Latin | 722 ;; Circled Latin |
1106 (setq c #x24b6) | 723 (setq c #x24b6) |
1107 (while (<= c #x24cf) | 724 (while (<= c #x24cf) |
1108 (set-case-syntax-pair (decode-char 'ucs c) | 725 (set-case-syntax-pair c (+ c 26) tbl) |
1109 (decode-char 'ucs (+ c 26)) tbl) | 726 (modify-category-entry c ?l) |
1110 (modify-category-entry (decode-char 'ucs c) ?l) | 727 (modify-category-entry (+ c 26) ?l) |
1111 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l) | |
1112 (setq c (1+ c))) | 728 (setq c (1+ c))) |
1113 | 729 |
1114 ;; Fullwidth Latin | 730 ;; Fullwidth Latin |
1115 (setq c #xff21) | 731 (setq c #xff21) |
1116 (while (<= c #xff3a) | 732 (while (<= c #xff3a) |
1117 (set-case-syntax-pair (decode-char 'ucs c) | 733 (set-case-syntax-pair c (+ c #x20) tbl) |
1118 (decode-char 'ucs (+ c #x20)) tbl) | 734 (modify-category-entry c ?l) |
1119 (modify-category-entry (decode-char 'ucs c) ?l) | 735 (modify-category-entry (+ c #x20) ?l) |
1120 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l) | |
1121 (setq c (1+ c))) | 736 (setq c (1+ c))) |
1122 | 737 |
1123 ;; Combining diacritics | 738 ;; Combining diacritics |
1124 (setq c #x300) | 739 (modify-category-entry '(#x300 . #x362) ?^) |
1125 (while (<= c #x362) | |
1126 (modify-category-entry (decode-char 'ucs c) ?^) | |
1127 (setq c (1+ c))) | |
1128 | |
1129 ;; Combining marks | 740 ;; Combining marks |
1130 (setq c #x20d0) | 741 (modify-category-entry '(#x20d0 . #x20e3) ?^) |
1131 (while (<= c #x20e3) | |
1132 (modify-category-entry (decode-char 'ucs c) ?^) | |
1133 (setq c (1+ c))) | |
1134 | 742 |
1135 ;; Fixme: syntax for symbols &c | 743 ;; Fixme: syntax for symbols &c |
1136 ) | 744 ) |
1137 | 745 |
1138 ;;; Setting word boundary. | |
1139 | |
1140 (setq word-combining-categories | |
1141 '((?l . ?l))) | |
1142 | |
1143 (setq word-separating-categories ; (2-byte character sets) | |
1144 '((?A . ?K) ; Alpha numeric - Katakana | |
1145 (?A . ?C) ; Alpha numeric - Chinese | |
1146 (?H . ?A) ; Hiragana - Alpha numeric | |
1147 (?H . ?K) ; Hiragana - Katakana | |
1148 (?H . ?C) ; Hiragana - Chinese | |
1149 (?K . ?A) ; Katakana - Alpha numeric | |
1150 (?K . ?C) ; Katakana - Chinese | |
1151 (?C . ?A) ; Chinese - Alpha numeric | |
1152 (?C . ?K) ; Chinese - Katakana | |
1153 )) | |
1154 | |
1155 | |
1156 ;; For each character set, put the information of the most proper | 746 ;; For each character set, put the information of the most proper |
1157 ;; coding system to encode it by `preferred-coding-system' property. | 747 ;; coding system to encode it by `preferred-coding-system' property. |
1158 | 748 |
749 ;; Fixme: should this be junked? | |
1159 (let ((l '((latin-iso8859-1 . iso-latin-1) | 750 (let ((l '((latin-iso8859-1 . iso-latin-1) |
1160 (latin-iso8859-2 . iso-latin-2) | 751 (latin-iso8859-2 . iso-latin-2) |
1161 (latin-iso8859-3 . iso-latin-3) | 752 (latin-iso8859-3 . iso-latin-3) |
1162 (latin-iso8859-4 . iso-latin-4) | 753 (latin-iso8859-4 . iso-latin-4) |
1163 (thai-tis620 . thai-tis620) | 754 (thai-tis620 . thai-tis620) |
1181 (ipa . iso-2022-7bit) | 772 (ipa . iso-2022-7bit) |
1182 (vietnamese-viscii-lower . vietnamese-viscii) | 773 (vietnamese-viscii-lower . vietnamese-viscii) |
1183 (vietnamese-viscii-upper . vietnamese-viscii) | 774 (vietnamese-viscii-upper . vietnamese-viscii) |
1184 (arabic-digit . iso-2022-7bit) | 775 (arabic-digit . iso-2022-7bit) |
1185 (arabic-1-column . iso-2022-7bit) | 776 (arabic-1-column . iso-2022-7bit) |
1186 (ascii-right-to-left . iso-2022-7bit) | |
1187 (lao . lao) | 777 (lao . lao) |
1188 (arabic-2-column . iso-2022-7bit) | 778 (arabic-2-column . iso-2022-7bit) |
1189 (indian-is13194 . devanagari) | 779 (indian-is13194 . devanagari) |
1190 (indian-glyph . devanagari) | 780 (indian-glyph . devanagari) |
1191 (tibetan-1-column . tibetan) | 781 (tibetan-1-column . tibetan) |
1209 ;; property on the charsets. | 799 ;; property on the charsets. |
1210 (let ((l '(katakana-jisx0201 | 800 (let ((l '(katakana-jisx0201 |
1211 japanese-jisx0208 japanese-jisx0212 | 801 japanese-jisx0208 japanese-jisx0212 |
1212 chinese-gb2312 chinese-big5-1 chinese-big5-2))) | 802 chinese-gb2312 chinese-big5-1 chinese-big5-2))) |
1213 (while l | 803 (while l |
1214 (aset auto-fill-chars (make-char (car l)) t) | 804 ;;(aset auto-fill-chars (make-char (car l)) t) |
1215 (put-charset-property (car l) 'nospace-between-words t) | 805 (put-charset-property (car l) 'nospace-between-words t) |
1216 (setq l (cdr l)))) | 806 (setq l (cdr l)))) |
1217 | 807 |
808 | |
809 ;; CJK double width characters. | |
810 (let ((l '((#x1100 . #x11FF) | |
811 (#x2E80 . #x9FAF) | |
812 (#xAC00 . #xD7AF) | |
813 (#xF900 . #xFAFF) | |
814 (#xFE30 . #xFE4F) | |
815 (#xFF00 . #xFF5F) | |
816 (#xFFE0 . #xFFEF) | |
817 (#x20000 . #x2AFFF) | |
818 (#x2F800 . #x2FFFF)))) | |
819 (dolist (elt l) | |
820 (set-char-table-range char-width-table | |
821 (cons (car elt) (cdr elt)) | |
822 2))) | |
823 ;; Fixme: Doing this affects non-CJK characters through unification, | |
824 ;; but presumably CJK users expect those characters to be | |
825 ;; double-width when using these charsets. | |
826 ;; (map-charset-chars | |
827 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
828 ;; 'japanese-jisx0208) | |
829 ;; (map-charset-chars | |
830 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
831 ;; 'japanese-jisx0212) | |
832 ;; (map-charset-chars | |
833 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
834 ;; 'japanese-jisx0213-1) | |
835 ;; (map-charset-chars | |
836 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
837 ;; 'japanese-jisx0213-2) | |
838 ;; (map-charset-chars | |
839 ;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
840 ;; 'korean-ksc5601) | |
841 | |
842 ;; Other double width | |
843 (map-charset-chars | |
844 (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
845 'ethiopic) | |
846 (map-charset-chars | |
847 (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
848 'tibetan) | |
849 (map-charset-chars | |
850 (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
851 'indian-2-column) | |
852 (map-charset-chars | |
853 (lambda (range ignore) (set-char-table-range char-width-table range 2)) | |
854 'arabic-2-column) | |
855 | |
856 (optimize-char-table (standard-case-table)) | |
857 (optimize-char-table char-width-table) | |
858 (optimize-char-table (standard-category-table)) | |
859 (optimize-char-table (standard-syntax-table)) | |
860 | |
861 ;; The Unicode blocks actually extend past some of these ranges with | |
862 ;; undefined codepoints. | |
863 (let ((script-list nil)) | |
864 (dolist | |
865 (elt | |
866 '((#x0000 #x007F latin) | |
867 (#x00A0 #x036F latin) | |
868 (#x0370 #x03E1 greek) | |
869 (#x03E2 #x03EF coptic) | |
870 (#x03F0 #x03F3 greek) | |
871 (#x0400 #x04FF cyrillic) | |
872 (#x0530 #x058F armenian) | |
873 (#x0590 #x05FF hebrew) | |
874 (#x0600 #x06FF arabic) | |
875 (#x0700 #x074F syriac) | |
876 (#x0780 #x07BF thaana) | |
877 (#x0900 #x097F devanagari) | |
878 (#x0980 #x09FF bengali) | |
879 (#x0A00 #x0A7F gurmukhi) | |
880 (#x0A80 #x0AFF gujarati) | |
881 (#x0B00 #x0B7F oriya) | |
882 (#x0B80 #x0BFF tamil) | |
883 (#x0C00 #x0C7F telugu) | |
884 (#x0C80 #x0CFF kannada) | |
885 (#x0D00 #x0D7F malayalam) | |
886 (#x0D80 #x0DFF sinhala) | |
887 (#x0E00 #x0E5F thai) | |
888 (#x0E80 #x0EDF lao) | |
889 (#x0F00 #x0FFF tibetan) | |
890 (#x1000 #x105F myanmar) | |
891 (#x10A0 #x10FF georgian) | |
892 (#x1100 #x11FF hangul) | |
893 (#x1200 #x137F ethiopic) | |
894 (#x13A0 #x13FF cherokee) | |
895 (#x1400 #x167F canadian-aboriginal) | |
896 (#x1680 #x169F ogham) | |
897 (#x16A0 #x16FF runic) | |
898 (#x1780 #x17FF khmer) | |
899 (#x1800 #x18AF mongolian) | |
900 (#x1E00 #x1EFF latin) | |
901 (#x1F00 #x1FFF greek) | |
902 (#x20A0 #x20AF currency) | |
903 (#x2800 #x28FF braille) | |
904 (#x2E80 #x2FDF han) | |
905 (#x2FF0 #x2FFF ideographic-description) | |
906 (#x3000 #x303F cjk-misc) | |
907 (#x3040 #x30FF kana) | |
908 (#x3100 #x312F bopomofo) | |
909 (#x3130 #x318F hangul) | |
910 (#x3190 #x319F kanbun) | |
911 (#x31A0 #x31BF bopomofo) | |
912 (#x3400 #x9FAF han) | |
913 (#xA000 #xA4CF yi) | |
914 (#xAC00 #xD7AF hangul) | |
915 (#xF900 #xFA5F han) | |
916 (#xFB1D #xFB4F hebrew) | |
917 (#xFB50 #xFDFF arabic) | |
918 (#xFE70 #xFEFC arabic) | |
919 (#xFF00 #xFF5F cjk-misc) | |
920 (#xFF61 #xFF9F kana) | |
921 (#xFFE0 #xFFE6 cjk-misc) | |
922 (#x20000 #x2AFFF han) | |
923 (#x2F800 #x2FFFF han))) | |
924 (set-char-table-range char-script-table | |
925 (cons (car elt) (nth 1 elt)) (nth 2 elt)) | |
926 (or (memq (nth 2 elt) script-list) | |
927 (setq script-list (cons (nth 2 elt) script-list)))) | |
928 (set-char-table-extra-slot char-script-table 0 (nreverse script-list))) | |
929 | |
930 (map-charset-chars | |
931 #'(lambda (range ignore) | |
932 (set-char-table-range char-script-table range 'tibetan)) | |
933 'tibetan) | |
934 | |
935 | |
936 ;;; Setting word boundary. | |
937 | |
938 (defun next-word-boundary-han (pos limit) | |
939 (if (<= pos limit) | |
940 (save-excursion | |
941 (goto-char pos) | |
942 (looking-at "\\cC+") | |
943 (goto-char (match-end 0)) | |
944 (if (looking-at "\\cH+") | |
945 (goto-char (match-end 0))) | |
946 (point)) | |
947 (while (and (> pos limit) | |
948 (eq (aref char-script-table (char-after (1- pos))) 'han)) | |
949 (setq pos (1- pos))) | |
950 pos)) | |
951 | |
952 (defun next-word-boundary-kana (pos limit) | |
953 (if (<= pos limit) | |
954 (save-excursion | |
955 (goto-char pos) | |
956 (if (looking-at "\\cK+") | |
957 (goto-char (match-end 0))) | |
958 (if (looking-at "\\cH+") | |
959 (goto-char (match-end 0))) | |
960 (point)) | |
961 (let ((category-set (char-category-set (char-after pos))) | |
962 category) | |
963 (if (aref category-set ?K) | |
964 (while (and (> pos limit) | |
965 (aref (char-category-set (char-after (1- pos))) ?K)) | |
966 (setq pos (1- pos))) | |
967 (while (and (> pos limit) | |
968 (aref (setq category-set | |
969 (char-category-set (char-after (1- pos)))) ?H)) | |
970 (setq pos (1- pos))) | |
971 (setq category (cond ((aref category-set ?C) ?C) | |
972 ((aref category-set ?K) ?K) | |
973 ((aref category-set ?A) ?A))) | |
974 (when category | |
975 (setq pos (1- pos)) | |
976 (while (and (> pos limit) | |
977 (aref (char-category-set (char-after (1- pos))) | |
978 category)) | |
979 (setq pos (1- pos))))) | |
980 pos))) | |
981 | |
982 (map-char-table | |
983 #'(lambda (char script) | |
984 (cond ((eq script 'han) | |
985 (set-char-table-range find-word-boundary-function-table | |
986 char #'next-word-boundary-han)) | |
987 ((eq script 'kana) | |
988 (set-char-table-range find-word-boundary-function-table | |
989 char #'next-word-boundary-kana)))) | |
990 char-script-table) | |
991 | |
992 (setq word-combining-categories | |
993 '((?l . ?l))) | |
994 | |
995 (setq word-separating-categories ; (2-byte character sets) | |
996 '((?A . ?K) ; Alpha numeric - Katakana | |
997 (?A . ?C) ; Alpha numeric - Chinese | |
998 (?H . ?A) ; Hiragana - Alpha numeric | |
999 (?H . ?K) ; Hiragana - Katakana | |
1000 (?H . ?C) ; Hiragana - Chinese | |
1001 (?K . ?A) ; Katakana - Alpha numeric | |
1002 (?K . ?C) ; Katakana - Chinese | |
1003 (?C . ?A) ; Chinese - Alpha numeric | |
1004 (?C . ?K) ; Chinese - Katakana | |
1005 )) | |
1006 | |
1218 ;;; Local Variables: | 1007 ;;; Local Variables: |
1219 ;;; coding: iso-2022-7bit | 1008 ;;; coding: utf-8-emacs |
1220 ;;; End: | 1009 ;;; End: |
1221 | 1010 |
1222 ;;; characters.el ends here | 1011 ;;; characters.el ends here |