comparison lisp/international/characters.el @ 89483:2f877ed80fa6

*** empty log message ***
author Kenichi Handa <handa@m17n.org>
date Mon, 08 Sep 2003 12:53:41 +0000
parents 375f2633d815 f040012c16bb
children 1ad3832f1d1d
comparison
equal deleted inserted replaced
88123:375f2633d815 89483:2f877ed80fa6
1 ;;; characters.el --- set syntax and category for multibyte characters 1 ;;; characters.el --- set syntax and category for multibyte characters
2 2
3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. 3 ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation. 4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. 5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6 ;; Copyright (C) 2003
7 ;; National Institute of Advanced Industrial Science and Technology (AIST)
8 ;; Registration Number H13PRO009
6 9
7 ;; Keywords: multibyte character, character set, syntax, category 10 ;; Keywords: multibyte character, character set, syntax, category
8 11
9 ;; This file is part of GNU Emacs. 12 ;; This file is part of GNU Emacs.
10 13
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the 25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, 26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA. 27 ;; Boston, MA 02111-1307, USA.
25 28
26 ;;; Commentary: 29 ;;; Commentary:
27
28 ;; This file contains multibyte characters. Save this file always in
29 ;; the coding system `iso-2022-7bit'.
30
31 ;; This file does not define the syntax for Latin-N character sets;
32 ;; those are defined by the files latin-N.el.
33 30
34 ;;; Code: 31 ;;; Code:
35 32
36 ;;; Predefined categories. 33 ;;; Predefined categories.
37 34
96 93
97 ;;; Setting syntax and category. 94 ;;; Setting syntax and category.
98 95
99 ;; ASCII 96 ;; ASCII
100 97
101 (let ((ch 32)) 98 ;; All ASCII characters have the category `a' (ASCII) and `l' (Latin).
102 (while (< ch 127) ; All ASCII characters have 99 (modify-category-entry '(32 . 127) ?a)
103 (modify-category-entry ch ?a) ; the category `a' (ASCII) 100 (modify-category-entry '(32 . 127) ?l)
104 (modify-category-entry ch ?l) ; and `l' (Latin). 101
105 (setq ch (1+ ch)))) 102 ;; Deal with the CJK charsets first. Since the syntax of blocks is
106 103 ;; defined per charset, and the charsets may contain e.g. Latin
107 ;; Arabic character set 104 ;; characters, we end up with the wrong syntax definitions if we're
108 105 ;; not careful.
109 (let ((charsets '(arabic-iso8859-6 106
110 arabic-digit 107 ;; Chinese characters (Unicode)
111 arabic-1-column 108 (modify-category-entry '(#x3400 . #x9FAF) ?C)
112 arabic-2-column))) 109 (modify-category-entry '(#x3400 . #x9FAF) ?c)
113 (while charsets 110 (modify-category-entry '(#x3400 . #x9FAF) ?|)
114 ;; (modify-syntax-entry (make-char (car charsets)) "w") 111 (modify-category-entry '(#xF900 . #xFAFF) ?C)
115 (modify-category-entry (make-char (car charsets)) ?b) 112 (modify-category-entry '(#xF900 . #xFAFF) ?c)
116 (setq charsets (cdr charsets)))) 113 (modify-category-entry '(#xF900 . #xFAFF) ?|)
117 (let ((ch #x600))
118 (while (<= ch #x6ff)
119 (modify-category-entry (decode-char 'ucs ch) ?b)
120 (setq ch (1+ ch)))
121 (setq ch #xfb50)
122 (while (<= ch #xfdff)
123 (modify-category-entry (decode-char 'ucs ch) ?b)
124 (setq ch (1+ ch)))
125 (setq ch #xfe70)
126 (while (<= ch #xfefe)
127 (modify-category-entry (decode-char 'ucs ch) ?b)
128 (setq ch (1+ ch))))
129 114
130 ;; Chinese character set (GB2312) 115 ;; Chinese character set (GB2312)
131 116
132 ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w") 117 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2121 #x217E)
133 (modify-syntax-entry (make-char 'chinese-gb2312 33) "_") 118 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2221 #x227E)
134 (modify-syntax-entry (make-char 'chinese-gb2312 34) "_") 119 (map-charset-chars #'modify-syntax-entry 'chinese-gb2312 "_" #x2921 #x297E)
135 (modify-syntax-entry (make-char 'chinese-gb2312 41) "_") 120
136 (modify-syntax-entry ?\$A!2(B "($A!3(B") 121 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)
137 (modify-syntax-entry ?\$A!4(B "($A!5(B") 122 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?|)
138 (modify-syntax-entry ?\$A!6(B "($A!7(B") 123 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2330 #x2339)
139 (modify-syntax-entry ?\$A!8(B "($A!9(B") 124 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2341 #x235A)
140 (modify-syntax-entry ?\$A!:(B "($A!;(B") 125 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?A #x2361 #x237A)
141 (modify-syntax-entry ?\$A!<(B "($A!=(B") 126 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?H #x2421 #x247E)
142 (modify-syntax-entry ?\$A!>(B "($A!?(B") 127 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?K #x2521 #x257E)
143 (modify-syntax-entry ?\$A#((B "($A#)(B") 128 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?G #x2621 #x267E)
144 (modify-syntax-entry ?\$A#{(B "($A#}(B") 129 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?Y #x2721 #x277E)
145 (modify-syntax-entry ?\$A#[(B "($A#](B") 130 (map-charset-chars #'modify-category-entry 'chinese-gb2312 ?C #x3021 #x7E7E)
146 (modify-syntax-entry ?\$A!3(B ")$A!2(B")
147 (modify-syntax-entry ?\$A!5(B ")$A!4(B")
148 (modify-syntax-entry ?\$A!7(B ")$A!6(B")
149 (modify-syntax-entry ?\$A!9(B ")$A!8(B")
150 (modify-syntax-entry ?\$A!;(B ")$A!:(B")
151 (modify-syntax-entry ?\$A!=(B ")$A!<(B")
152 (modify-syntax-entry ?\$A!?(B ")$A!>(B")
153 (modify-syntax-entry ?\$A#)(B ")$A#((B")
154 (modify-syntax-entry ?\$A#}(B ")$A#{(B")
155 (modify-syntax-entry ?\$A#](B ")$A#[(B")
156 ;; Unicode equivalents of above
157 (modify-syntax-entry ?\$,2=T(B "($,2=U(B")
158 (modify-syntax-entry ?\$,2=H(B "($,2=I(B")
159 (modify-syntax-entry ?\$,2=J(B "($,2=K(B")
160 (modify-syntax-entry ?\$,2=L(B "($,2=M(B")
161 (modify-syntax-entry ?\$,2=N(B "($,2=O(B")
162 (modify-syntax-entry ?\$,2=V(B "($,2=W(B")
163 (modify-syntax-entry ?\$,2=P(B "($,2=Q(B")
164 (modify-syntax-entry ?\$,2=U(B ")$,2=T(B")
165 (modify-syntax-entry ?\$,2=I(B ")$,2=H(B")
166 (modify-syntax-entry ?\$,2=K(B ")$,2=J(B")
167 (modify-syntax-entry ?\$,2=M(B ")$,2=L(B")
168 (modify-syntax-entry ?\$,2=O(B ")$,2=N(B")
169 (modify-syntax-entry ?\$,2=W(B ")$,2=V(B")
170 (modify-syntax-entry ?\$,2=Q(B ")$,2=P(B")
171
172 (let ((chars "$A#,!"!##.!$#;#:#?#!!C!-!'#|#_!.!/!0!1#"!e#`!d(B"))
173 (dotimes (i (length chars))
174 (modify-syntax-entry (aref chars i) ".")))
175
176 (modify-category-entry (make-char 'chinese-gb2312) ?c)
177 (modify-category-entry (make-char 'chinese-gb2312) ?\|)
178 (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
179 (modify-category-entry (make-char 'chinese-gb2312 36) ?H)
180 (modify-category-entry (make-char 'chinese-gb2312 37) ?K)
181 (modify-category-entry (make-char 'chinese-gb2312 38) ?G)
182 (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
183 (let ((row 48))
184 (while (< row 127)
185 (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
186 (setq row (1+ row))))
187 131
188 ;; Chinese character set (BIG5) 132 ;; Chinese character set (BIG5)
189 133
190 134 (map-charset-chars #'modify-category-entry 'big5 ?c)
191 135 (map-charset-chars #'modify-category-entry 'big5 ?C #xA259 #xA25F)
192 (let ((from (decode-big5-char #xA141)) 136 (map-charset-chars #'modify-category-entry 'big5 ?C #xA440 #xC67E)
193 (to (decode-big5-char #xA15D))) 137 (map-charset-chars #'modify-category-entry 'big5 ?C #xC940 #xF9DF)
194 (while (< from to) 138 (map-charset-chars #'modify-category-entry 'big5 ?|)
195 (modify-syntax-entry from ".") 139
196 (setq from (1+ from)))) 140
197 (let ((from (decode-big5-char #xA1A5)) 141 ;; Chinese character set (CNS11643)
198 (to (decode-big5-char #xA1AD))) 142
199 (while (< from to) 143 (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
200 (modify-syntax-entry from ".") 144 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
201 (setq from (1+ from)))) 145 chinese-cns11643-7))
202 (let ((from (decode-big5-char #xA1AD)) 146 (map-charset-chars #'modify-category-entry c ?c)
203 (to (decode-big5-char #xA2AF))) 147 (if (eq c 'chinese-cns11643-1)
204 (while (< from to) 148 (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E)
205 (modify-syntax-entry from "_") 149 (map-charset-chars #'modify-category-entry c ?C))
206 (setq from (1+ from)))) 150 (map-charset-chars #'modify-category-entry c ?|))
207 151
208 (let ((parens "$(0!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c(B") 152 ;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213)
153
154 (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k)
155
156 (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r)
157
158 (dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212
159 japanese-jisx0213-1 japanese-jisx0213-2))
160 (map-charset-chars #'modify-category-entry l ?j)
161 (if (eq l 'japanese-jisx0213-1)
162 (map-charset-chars #'modify-category-entry l ?\| #x2E21 #x7E7F)
163 (map-charset-chars #'modify-category-entry l ?\|)))
164
165 ;; Unicode equivalents of JISX0201-kana
166 (let ((range '(#xff61 . #xff9f)))
167 (modify-category-entry range ?k)
168 (modify-category-entry range ?j)
169 (modify-category-entry range ?\|))
170
171 ;; Katakana block
172 (let ((range '(#x30a0 . #x30ff)))
173 ;; ?K is double width, ?k isn't specified
174 (modify-category-entry range ?K)
175 (modify-category-entry range ?\|))
176
177 ;; Hiragana block
178 (let ((range '(#x3040 . #x309d)))
179 ;; ?H is actually defined to be double width
180 ;;(modify-category-entry range ?H)
181 ;;(modify-category-entry range ?\|)
182 )
183
184 ;; JISX0208
185 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E)
186 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E)
187 (let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
188 (dolist (elt chars)
189 (modify-syntax-entry (car chars) "w")))
190
191 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E)
192 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E)
193 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E)
194 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E)
195 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E)
196 (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E)
197 (modify-category-entry ?ー ?K)
198 (let ((chars '(?゛ ?゜)))
199 (while chars
200 (modify-category-entry (car chars) ?K)
201 (modify-category-entry (car chars) ?H)
202 (setq chars (cdr chars))))
203 (let ((chars '(?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇)))
204 (while chars
205 (modify-category-entry (car chars) ?C)
206 (setq chars (cdr chars))))
207
208 ;; JISX0212
209
210 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0212 "_" #x2121 #x237E)
211
212 ;; JISX0201-Kana
213
214 (let ((chars '(?。 ?、 ?・)))
215 (while chars
216 (modify-syntax-entry (car chars) ".")
217 (setq chars (cdr chars))))
218
219 (modify-syntax-entry ?\「 "(」")
220 (modify-syntax-entry ?\」 "(「")
221
222 ;; Korean character set (KSC5601)
223
224 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h)
225
226 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E)
227 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E)
228 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E)
229 (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E)
230 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339)
231 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A)
232 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A)
233 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E)
234 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E)
235 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E)
236 (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E)
237
238 ;; These are in more than one charset.
239 (let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛"
240 "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄"
241 "()[]{}"))
209 open close) 242 open close)
210 (dotimes (i (/ (length parens) 2)) 243 (dotimes (i (/ (length parens) 2))
211 (setq open (aref parens (* i 2)) 244 (setq open (aref parens (* i 2))
212 close (aref parens (1+ (* i 2)))) 245 close (aref parens (1+ (* i 2))))
213 (modify-syntax-entry open (format "(%c" close)) 246 (modify-syntax-entry open (format "(%c" close))
214 (modify-syntax-entry close (format ")%c" open)))) 247 (modify-syntax-entry close (format ")%c" open))))
215 248
216 (let ((generic-big5-1-char (make-char 'chinese-big5-1)) 249 ;; Arabic character set
217 (generic-big5-2-char (make-char 'chinese-big5-2))) 250
218 ;; (modify-syntax-entry generic-big5-1-char "w") 251 (let ((charsets '(arabic-iso8859-6
219 ;; (modify-syntax-entry generic-big5-2-char "w") 252 arabic-digit
220 253 arabic-1-column
221 (modify-category-entry generic-big5-1-char ?c) 254 arabic-2-column)))
222 (modify-category-entry generic-big5-2-char ?c) 255 (while charsets
223 256 (map-charset-chars #'modify-category-entry (car charsets) ?b)
224 (modify-category-entry generic-big5-1-char ?C) 257 (setq charsets (cdr charsets))))
225 (modify-category-entry generic-big5-2-char ?C) 258 (modify-category-entry '(#x600 . #x6ff) ?b)
226 259 (modify-category-entry '(#xfb50 . #xfdff) ?b)
227 (modify-category-entry generic-big5-1-char ?\|) 260 (modify-category-entry '(#xfe70 . #xfefe) ?b)
228 (modify-category-entry generic-big5-2-char ?\|))
229
230
231 ;; Chinese character set (CNS11643)
232
233 (let ((cns-list '(chinese-cns11643-1
234 chinese-cns11643-2
235 chinese-cns11643-3
236 chinese-cns11643-4
237 chinese-cns11643-5
238 chinese-cns11643-6
239 chinese-cns11643-7))
240 generic-char)
241 (while cns-list
242 (setq generic-char (make-char (car cns-list)))
243 ;; (modify-syntax-entry generic-char "w")
244 (modify-category-entry generic-char ?c)
245 (modify-category-entry generic-char ?C)
246 (modify-category-entry generic-char ?|)
247 (setq cns-list (cdr cns-list))))
248 261
249 ;; Cyrillic character set (ISO-8859-5) 262 ;; Cyrillic character set (ISO-8859-5)
250 263
251 (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y) 264 (modify-syntax-entry ?№ ".")
252
253 (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ")
254 (modify-syntax-entry ?,L-(B ".")
255 (modify-syntax-entry ?,Lp(B ".")
256 (modify-syntax-entry ?,L}(B ".")
257 (let ((tbl (standard-case-table)))
258 (set-case-syntax-pair ?,L!(B ?,Lq(B tbl)
259 (set-case-syntax-pair ?,L"(B ?,Lr(B tbl)
260 (set-case-syntax-pair ?,L#(B ?,Ls(B tbl)
261 (set-case-syntax-pair ?,L$(B ?,Lt(B tbl)
262 (set-case-syntax-pair ?,L%(B ?,Lu(B tbl)
263 (set-case-syntax-pair ?,L&(B ?,Lv(B tbl)
264 (set-case-syntax-pair ?,L'(B ?,Lw(B tbl)
265 (set-case-syntax-pair ?,L((B ?,Lx(B tbl)
266 (set-case-syntax-pair ?,L)(B ?,Ly(B tbl)
267 (set-case-syntax-pair ?,L*(B ?,Lz(B tbl)
268 (set-case-syntax-pair ?,L+(B ?,L{(B tbl)
269 (set-case-syntax-pair ?,L,(B ?,L|(B tbl)
270 (set-case-syntax-pair ?,L.(B ?,L~(B tbl)
271 (set-case-syntax-pair ?,L/(B ?,L(B tbl)
272 (set-case-syntax-pair ?,L0(B ?,LP(B tbl)
273 (set-case-syntax-pair ?,L1(B ?,LQ(B tbl)
274 (set-case-syntax-pair ?,L2(B ?,LR(B tbl)
275 (set-case-syntax-pair ?,L3(B ?,LS(B tbl)
276 (set-case-syntax-pair ?,L4(B ?,LT(B tbl)
277 (set-case-syntax-pair ?,L5(B ?,LU(B tbl)
278 (set-case-syntax-pair ?,L6(B ?,LV(B tbl)
279 (set-case-syntax-pair ?,L7(B ?,LW(B tbl)
280 (set-case-syntax-pair ?,L8(B ?,LX(B tbl)
281 (set-case-syntax-pair ?,L9(B ?,LY(B tbl)
282 (set-case-syntax-pair ?,L:(B ?,LZ(B tbl)
283 (set-case-syntax-pair ?,L;(B ?,L[(B tbl)
284 (set-case-syntax-pair ?,L<(B ?,L\(B tbl)
285 (set-case-syntax-pair ?,L=(B ?,L](B tbl)
286 (set-case-syntax-pair ?,L>(B ?,L^(B tbl)
287 (set-case-syntax-pair ?,L?(B ?,L_(B tbl)
288 (set-case-syntax-pair ?,L@(B ?,L`(B tbl)
289 (set-case-syntax-pair ?,LA(B ?,La(B tbl)
290 (set-case-syntax-pair ?,LB(B ?,Lb(B tbl)
291 (set-case-syntax-pair ?,LC(B ?,Lc(B tbl)
292 (set-case-syntax-pair ?,LD(B ?,Ld(B tbl)
293 (set-case-syntax-pair ?,LE(B ?,Le(B tbl)
294 (set-case-syntax-pair ?,LF(B ?,Lf(B tbl)
295 (set-case-syntax-pair ?,LG(B ?,Lg(B tbl)
296 (set-case-syntax-pair ?,LH(B ?,Lh(B tbl)
297 (set-case-syntax-pair ?,LI(B ?,Li(B tbl)
298 (set-case-syntax-pair ?,LJ(B ?,Lj(B tbl)
299 (set-case-syntax-pair ?,LK(B ?,Lk(B tbl)
300 (set-case-syntax-pair ?,LL(B ?,Ll(B tbl)
301 (set-case-syntax-pair ?,LM(B ?,Lm(B tbl)
302 (set-case-syntax-pair ?,LN(B ?,Ln(B tbl)
303 (set-case-syntax-pair ?,LO(B ?,Lo(B tbl)
304 (set-case-syntax-pair ?$,1(!(B ?$,1(q(B tbl)
305 (set-case-syntax-pair ?$,1("(B ?$,1(r(B tbl)
306 (set-case-syntax-pair ?$,1(#(B ?$,1(s(B tbl)
307 (set-case-syntax-pair ?$,1($(B ?$,1(t(B tbl)
308 (set-case-syntax-pair ?$,1(%(B ?$,1(u(B tbl)
309 (set-case-syntax-pair ?$,1(&(B ?$,1(v(B tbl)
310 (set-case-syntax-pair ?$,1('(B ?$,1(w(B tbl)
311 (set-case-syntax-pair ?$,1(((B ?$,1(x(B tbl)
312 (set-case-syntax-pair ?$,1()(B ?$,1(y(B tbl)
313 (set-case-syntax-pair ?$,1(*(B ?$,1(z(B tbl)
314 (set-case-syntax-pair ?$,1(+(B ?$,1({(B tbl)
315 (set-case-syntax-pair ?$,1(,(B ?$,1(|(B tbl)
316 (set-case-syntax-pair ?$,1(.(B ?$,1(~(B tbl)
317 (set-case-syntax-pair ?$,1(/(B ?$,1((B tbl)
318 (set-case-syntax-pair ?$,1(0(B ?$,1(P(B tbl)
319 (set-case-syntax-pair ?$,1(1(B ?$,1(Q(B tbl)
320 (set-case-syntax-pair ?$,1(2(B ?$,1(R(B tbl)
321 (set-case-syntax-pair ?$,1(3(B ?$,1(S(B tbl)
322 (set-case-syntax-pair ?$,1(4(B ?$,1(T(B tbl)
323 (set-case-syntax-pair ?$,1(5(B ?$,1(U(B tbl)
324 (set-case-syntax-pair ?$,1(6(B ?$,1(V(B tbl)
325 (set-case-syntax-pair ?$,1(7(B ?$,1(W(B tbl)
326 (set-case-syntax-pair ?$,1(8(B ?$,1(X(B tbl)
327 (set-case-syntax-pair ?$,1(9(B ?$,1(Y(B tbl)
328 (set-case-syntax-pair ?$,1(:(B ?$,1(Z(B tbl)
329 (set-case-syntax-pair ?$,1(;(B ?$,1([(B tbl)
330 (set-case-syntax-pair ?$,1(<(B ?$,1(\(B tbl)
331 (set-case-syntax-pair ?$,1(=(B ?$,1(](B tbl)
332 (set-case-syntax-pair ?$,1(>(B ?$,1(^(B tbl)
333 (set-case-syntax-pair ?$,1(?(B ?$,1(_(B tbl)
334 (set-case-syntax-pair ?$,1(@(B ?$,1(`(B tbl)
335 (set-case-syntax-pair ?$,1(A(B ?$,1(a(B tbl)
336 (set-case-syntax-pair ?$,1(B(B ?$,1(b(B tbl)
337 (set-case-syntax-pair ?$,1(C(B ?$,1(c(B tbl)
338 (set-case-syntax-pair ?$,1(D(B ?$,1(d(B tbl)
339 (set-case-syntax-pair ?$,1(E(B ?$,1(e(B tbl)
340 (set-case-syntax-pair ?$,1(F(B ?$,1(f(B tbl)
341 (set-case-syntax-pair ?$,1(G(B ?$,1(g(B tbl)
342 (set-case-syntax-pair ?$,1(H(B ?$,1(h(B tbl)
343 (set-case-syntax-pair ?$,1(I(B ?$,1(i(B tbl)
344 (set-case-syntax-pair ?$,1(J(B ?$,1(j(B tbl)
345 (set-case-syntax-pair ?$,1(K(B ?$,1(k(B tbl)
346 (set-case-syntax-pair ?$,1(L(B ?$,1(l(B tbl)
347 (set-case-syntax-pair ?$,1(M(B ?$,1(m(B tbl)
348 (set-case-syntax-pair ?$,1(N(B ?$,1(n(B tbl)
349 (set-case-syntax-pair ?$,1(O(B ?$,1(o(B tbl))
350
351 ;; Devanagari character set
352
353 ;;; Commented out since the categories appear not to be used anywhere
354 ;;; and word syntax is the default.
355 ;; (let ((deflist '(;; chars syntax category
356 ;; ("$(5!!!"!#(B" "w" ?7) ; vowel-modifying diacritical mark
357 ;; ; chandrabindu, anuswar, visarga
358 ;; ("$(5!$(B-$(5!2(B" "w" ?1) ; independent vowel
359 ;; ("$(5!3(B-$(5!X(B" "w" ?0) ; consonant
360 ;; ("$(5!Z(B-$(5!g(B" "w" ?8) ; matra
361 ;; ("$(5!q(B-$(5!z(B" "w" ?6) ; digit
362 ;; ;; Unicode equivalents
363 ;; ("$,15A5B5C(B" "w" ?7) ; vowel-modifying diacritical mark
364 ;; ; chandrabindu, anuswar, visarga
365 ;; ("$,15E(B-$,15M(B" "w" ?1) ; independent vowel
366 ;; ("$,15U(B-$,15y(B" "w" ?0) ; consonant
367 ;; ("$,15~(B-$,16)(B" "w" ?8) ; matra
368 ;; ("$,16F(B-$,16O(B" "w" ?6) ; digit
369 ;; ))
370 ;; elm chars len syntax category to ch i)
371 ;; (while deflist
372 ;; (setq elm (car deflist))
373 ;; (setq chars (car elm)
374 ;; len (length chars)
375 ;; syntax (nth 1 elm)
376 ;; category (nth 2 elm)
377 ;; i 0)
378 ;; (while (< i len)
379 ;; (if (= (aref chars i) ?-)
380 ;; (setq i (1+ i)
381 ;; to (aref chars i))
382 ;; (setq ch (aref chars i)
383 ;; to ch))
384 ;; (while (<= ch to)
385 ;; (modify-syntax-entry ch syntax)
386 ;; (modify-category-entry ch category)
387 ;; (setq ch (1+ ch)))
388 ;; (setq i (1+ i)))
389 ;; (setq deflist (cdr deflist))))
390 265
391 ;; Ethiopic character set 266 ;; Ethiopic character set
392 267
393 (modify-category-entry (make-char 'ethiopic) ?e) 268 (modify-category-entry '(#x1200 . #x137b) ?e)
394 ;; (modify-syntax-entry (make-char 'ethiopic) "w") 269 (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ? ? ? ? ? ?)))
395 (dotimes (i (1+ (- #x137c #x1200)))
396 (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e))
397 (let ((chars '(?$(3$h(B ?$(3$i(B ?$(3$j(B ?$(3$k(B ?$(3$l(B ?$(3$m(B ?$(3$n(B ?$(3$o(B ?$(3%i(B ?$(3%t(B ?$(3%u(B ?$(3%v(B ?$(3%w(B ?$(3%x(B
398 ;; Unicode equivalents of the above:
399 ?$,1Q!(B ?$,1Q"(B ?$,1Q#(B ?$,1Q$(B ?$,1Q%(B ?$,1Q&(B ?$,1Q'(B ?$,1Q((B ?$,3op(B ?$,3o{(B ?$,3o|(B ?$,3o}(B ?$,3o~(B ?$,3o(B)))
400 (while chars 270 (while chars
401 (modify-syntax-entry (car chars) ".") 271 (modify-syntax-entry (car chars) ".")
402 (setq chars (cdr chars)))) 272 (setq chars (cdr chars))))
403 273 (map-charset-chars #'modify-category-entry 'ethiopic ?e)
404 ;; Greek character set (ISO-8859-7)
405
406 (modify-category-entry (make-char 'greek-iso8859-7) ?g)
407 (let ((c #x370))
408 (while (<= c #x3ff)
409 (modify-category-entry (decode-char 'ucs c) ?g)
410 (setq c (1+ c))))
411
412 ;; (let ((c 182))
413 ;; (while (< c 255)
414 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
415 ;; (setq c (1+ c))))
416 ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
417 (modify-syntax-entry ?,F7(B ".")
418 (modify-syntax-entry ?,F;(B ".")
419 (modify-syntax-entry ?,F=(B ".")
420 (let ((tbl (standard-case-table)))
421 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
422 ;; in several cases.
423 (set-case-syntax ?,F!(B "." tbl)
424 (set-case-syntax ?,F"(B "." tbl)
425 (set-case-syntax ?,F&(B "." tbl)
426 (set-case-syntax ?,F&(B "_" tbl)
427 (set-case-syntax ?,F'(B "." tbl)
428 (set-case-syntax ?,F)(B "_" tbl)
429 (set-case-syntax ?,F+(B "." tbl)
430 (set-case-syntax ?,F,(B "_" tbl)
431 (set-case-syntax ?,F-(B "_" tbl)
432 (set-case-syntax ?,F/(B "." tbl)
433 (set-case-syntax ?,F0(B "_" tbl)
434 (set-case-syntax ?,F1(B "_" tbl)
435 ;; (set-case-syntax ?,F7(B "_" tbl)
436 ;; (set-case-syntax ?,F=(B "_" tbl)
437 (set-case-syntax-pair ?,FA(B ?,Fa(B tbl)
438 (set-case-syntax-pair ?,FB(B ?,Fb(B tbl)
439 (set-case-syntax-pair ?,FC(B ?,Fc(B tbl)
440 (set-case-syntax-pair ?,FD(B ?,Fd(B tbl)
441 (set-case-syntax-pair ?,FE(B ?,Fe(B tbl)
442 (set-case-syntax-pair ?,FF(B ?,Ff(B tbl)
443 (set-case-syntax-pair ?,FG(B ?,Fg(B tbl)
444 (set-case-syntax-pair ?,FH(B ?,Fh(B tbl)
445 (set-case-syntax-pair ?,FI(B ?,Fi(B tbl)
446 (set-case-syntax-pair ?,FJ(B ?,Fj(B tbl)
447 (set-case-syntax-pair ?,FK(B ?,Fk(B tbl)
448 (set-case-syntax-pair ?,FL(B ?,Fl(B tbl)
449 (set-case-syntax-pair ?,FM(B ?,Fm(B tbl)
450 (set-case-syntax-pair ?,FN(B ?,Fn(B tbl)
451 (set-case-syntax-pair ?,FO(B ?,Fo(B tbl)
452 (set-case-syntax-pair ?,FP(B ?,Fp(B tbl)
453 (set-case-syntax-pair ?,FQ(B ?,Fq(B tbl)
454 (set-case-syntax-pair ?,FS(B ?,Fs(B tbl)
455 (set-case-syntax-pair ?,FT(B ?,Ft(B tbl)
456 (set-case-syntax-pair ?,FU(B ?,Fu(B tbl)
457 (set-case-syntax-pair ?,FV(B ?,Fv(B tbl)
458 (set-case-syntax-pair ?,FW(B ?,Fw(B tbl)
459 (set-case-syntax-pair ?,FX(B ?,Fx(B tbl)
460 (set-case-syntax-pair ?,FY(B ?,Fy(B tbl)
461 (set-case-syntax-pair ?,FZ(B ?,Fz(B tbl)
462 (set-case-syntax-pair ?,F[(B ?,F{(B tbl)
463 (set-case-syntax-pair ?,F?(B ?,F~(B tbl)
464 (set-case-syntax-pair ?,F>(B ?,F}(B tbl)
465 (set-case-syntax-pair ?,F<(B ?,F|(B tbl)
466 (set-case-syntax-pair ?,F6(B ?,F\(B tbl)
467 (set-case-syntax-pair ?,F8(B ?,F](B tbl)
468 (set-case-syntax-pair ?,F9(B ?,F^(B tbl)
469 (set-case-syntax-pair ?,F:(B ?,F_(B tbl)
470 ;; Unicode equivalents
471 (set-case-syntax-pair ?$,1&q(B ?$,1'1(B tbl)
472 (set-case-syntax-pair ?$,1&r(B ?$,1'2(B tbl)
473 (set-case-syntax-pair ?$,1&s(B ?$,1'3(B tbl)
474 (set-case-syntax-pair ?$,1&t(B ?$,1'4(B tbl)
475 (set-case-syntax-pair ?$,1&u(B ?$,1'5(B tbl)
476 (set-case-syntax-pair ?$,1&v(B ?$,1'6(B tbl)
477 (set-case-syntax-pair ?$,1&w(B ?$,1'7(B tbl)
478 (set-case-syntax-pair ?$,1&x(B ?$,1'8(B tbl)
479 (set-case-syntax-pair ?$,1&y(B ?$,1'9(B tbl)
480 (set-case-syntax-pair ?$,1&z(B ?$,1':(B tbl)
481 (set-case-syntax-pair ?$,1&{(B ?$,1';(B tbl)
482 (set-case-syntax-pair ?$,1&|(B ?$,1'<(B tbl)
483 (set-case-syntax-pair ?$,1&}(B ?$,1'=(B tbl)
484 (set-case-syntax-pair ?$,1&~(B ?$,1'>(B tbl)
485 (set-case-syntax-pair ?$,1&(B ?$,1'?(B tbl)
486 (set-case-syntax-pair ?$,1' (B ?$,1'@(B tbl)
487 (set-case-syntax-pair ?$,1'!(B ?$,1'A(B tbl)
488 (set-case-syntax-pair ?$,1'#(B ?$,1'C(B tbl)
489 (set-case-syntax-pair ?$,1'$(B ?$,1'D(B tbl)
490 (set-case-syntax-pair ?$,1'%(B ?$,1'E(B tbl)
491 (set-case-syntax-pair ?$,1'&(B ?$,1'F(B tbl)
492 (set-case-syntax-pair ?$,1''(B ?$,1'G(B tbl)
493 (set-case-syntax-pair ?$,1'((B ?$,1'H(B tbl)
494 (set-case-syntax-pair ?$,1')(B ?$,1'I(B tbl)
495 (set-case-syntax-pair ?$,1'*(B ?$,1'J(B tbl)
496 (set-case-syntax-pair ?$,1'+(B ?$,1'K(B tbl)
497 (set-case-syntax-pair ?$,1&o(B ?$,1'N(B tbl)
498 (set-case-syntax-pair ?$,1&n(B ?$,1'M(B tbl)
499 (set-case-syntax-pair ?$,1&l(B ?$,1'L(B tbl)
500 (set-case-syntax-pair ?$,1&f(B ?$,1',(B tbl)
501 (set-case-syntax-pair ?$,1&h(B ?$,1'-(B tbl)
502 (set-case-syntax-pair ?$,1&i(B ?$,1'.(B tbl)
503 (set-case-syntax-pair ?$,1&j(B ?$,1'/(B tbl))
504 274
505 ;; Hebrew character set (ISO-8859-8) 275 ;; Hebrew character set (ISO-8859-8)
506 276
507 (modify-category-entry (make-char 'hebrew-iso8859-8) ?w) 277 (modify-syntax-entry #x5be ".") ; MAQAF
508 (let ((c #x591)) 278 (modify-syntax-entry #x5c0 ".") ; PASEQ
509 (while (<= c #x5f4) 279 (modify-syntax-entry #x5c3 ".") ; SOF PASUQ
510 (modify-category-entry (decode-char 'ucs c) ?w) 280 (modify-syntax-entry #x5f3 ".") ; GERESH
511 (setq c (1+ c)))) 281 (modify-syntax-entry #x5f4 ".") ; GERSHAYIM
512
513 (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
514 (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
515 (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF
516 (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ
517 (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ
518 (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH
519 (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM
520
521 ;; (let ((c 224))
522 ;; (while (< c 251)
523 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
524 ;; (setq c (1+ c))))
525 ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
526 282
527 ;; Indian character set (IS 13194 and other Emacs original Indian charsets) 283 ;; Indian character set (IS 13194 and other Emacs original Indian charsets)
528 284
529 (modify-category-entry (make-char 'indian-is13194) ?i) 285 (modify-category-entry '(#x901 . #x970) ?i)
530 (modify-category-entry (make-char 'indian-2-column) ?I) 286 (map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
531 (modify-category-entry (make-char 'indian-glyph) ?I) 287 (map-charset-chars #'modify-category-entry 'indian-2-column ?i)
532 ;; Unicode Devanagari block
533 (let ((c #x901))
534 (while (<= c #x970)
535 (modify-category-entry (decode-char 'ucs c) ?i)
536 (setq c (1+ c))))
537
538 (let ((l '(;; RANGE CATEGORY MEANINGS
539 (#x01 #x03 ?7) ; vowel modifier
540 (#x05 #x14 ?1) ; base vowel
541 (#x15 #x39 ?0) ; consonants
542 (#x3e #x4d ?8) ; vowel modifier
543 (#x51 #x54 ?4) ; stress/tone mark
544 (#x58 #x5f ?0) ; consonants
545 (#x60 #x61 ?1) ; base vowel
546 (#x62 #x63 ?8) ; vowel modifier
547 (#x66 #x6f ?6) ; digits
548 )))
549 (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00))
550 (dolist (elt2 l)
551 (let* ((from (car elt2))
552 (counts (1+ (- (nth 1 elt2) from)))
553 (category (nth 2 elt2)))
554 (dotimes (i counts)
555 (modify-category-entry (decode-char 'ucs (+ elt1 from i))
556 category))))))
557
558 ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
559
560 (modify-category-entry (make-char 'katakana-jisx0201) ?k)
561 (modify-category-entry (make-char 'katakana-jisx0201) ?j)
562 (modify-category-entry (make-char 'latin-jisx0201) ?r)
563 (modify-category-entry (make-char 'japanese-jisx0208) ?j)
564 (modify-category-entry (make-char 'japanese-jisx0212) ?j)
565 (modify-category-entry (make-char 'katakana-jisx0201) ?\|)
566 (modify-category-entry (make-char 'japanese-jisx0208) ?\|)
567 (modify-category-entry (make-char 'japanese-jisx0212) ?\|)
568
569 ;; Unicode equivalents of JISX0201-kana
570 (let ((c #xff61))
571 (while (<= c #xff9f)
572 (modify-category-entry (decode-char 'ucs c) ?k)
573 (modify-category-entry (decode-char 'ucs c) ?j)
574 (modify-category-entry (decode-char 'ucs c) ?\|)
575 (setq c (1+ c))))
576
577 ;; Katakana block
578 (let ((c #x30a0))
579 (while (<= c #x30ff)
580 ;; ?K is double width, ?k isn't specified
581 (modify-category-entry (decode-char 'ucs c) ?k)
582 (modify-category-entry (decode-char 'ucs c) ?j)
583 (modify-category-entry (decode-char 'ucs c) ?\|)
584 (setq c (1+ c))))
585
586 ;; Hiragana block
587 (let ((c #x3040))
588 (while (<= c #x309f)
589 ;; ?H is actually defined to be double width
590 (modify-category-entry (decode-char 'ucs c) ?H)
591 ;;(modify-category-entry (decode-char 'ucs c) ?j)
592 (modify-category-entry (decode-char 'ucs c) ?\|)
593 (setq c (1+ c))))
594
595 ;; JISX0208
596 ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w")
597 (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_")
598 (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_")
599 (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_")
600 (let ((chars '(?$B!<(B ?$B!+(B ?$B!,(B ?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B)))
601 (while chars
602 (modify-syntax-entry (car chars) "w")
603 (setq chars (cdr chars))))
604 (modify-syntax-entry ?\$B!J(B "($B!K(B")
605 (modify-syntax-entry ?\$B!N(B "($B!O(B")
606 (modify-syntax-entry ?\$B!P(B "($B!Q(B")
607 (modify-syntax-entry ?\$B!V(B "($B!W(B")
608 (modify-syntax-entry ?\$B!X(B "($B!Y(B")
609 (modify-syntax-entry ?\$B!K(B ")$B!J(B")
610 (modify-syntax-entry ?\$B!O(B ")$B!N(B")
611 (modify-syntax-entry ?\$B!Q(B ")$B!P(B")
612 (modify-syntax-entry ?\$B!W(B ")$B!V(B")
613 (modify-syntax-entry ?\$B!Y(B ")$B!X(B")
614
615 (modify-category-entry (make-char 'japanese-jisx0208 35) ?A)
616 (modify-category-entry (make-char 'japanese-jisx0208 36) ?H)
617 (modify-category-entry (make-char 'japanese-jisx0208 37) ?K)
618 (modify-category-entry (make-char 'japanese-jisx0208 38) ?G)
619 (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y)
620 (let ((row 48))
621 (while (< row 127)
622 (modify-category-entry (make-char 'japanese-jisx0208 row) ?C)
623 (setq row (1+ row))))
624 (modify-category-entry ?$B!<(B ?K)
625 (let ((chars '(?$B!+(B ?$B!,(B)))
626 (while chars
627 (modify-category-entry (car chars) ?K)
628 (modify-category-entry (car chars) ?H)
629 (setq chars (cdr chars))))
630 (let ((chars '(?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B)))
631 (while chars
632 (modify-category-entry (car chars) ?C)
633 (setq chars (cdr chars))))
634
635 ;; JISX0212
636 ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w")
637 (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_")
638 (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_")
639 (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_")
640
641 (modify-category-entry (make-char 'japanese-jisx0212 ) ?C)
642
643 ;; JISX0201-Kana
644 ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
645 (let ((chars '(?(I!(B ?(I$(B ?(I%(B
646 ;; Unicode:
647 ?$,3sa(B ?$,3sd(B ?$,3se(B)))
648 (while chars
649 (modify-syntax-entry (car chars) ".")
650 (setq chars (cdr chars))))
651
652 (modify-syntax-entry ?\(I"(B "((I#(B")
653 (modify-syntax-entry ?\(I#(B "((I"(B")
654
655 ;; Korean character set (KSC5601)
656
657 ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
658 (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
659 (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
660 (modify-syntax-entry (make-char 'korean-ksc5601 38) "_")
661 (modify-syntax-entry (make-char 'korean-ksc5601 39) "_")
662 (modify-syntax-entry (make-char 'korean-ksc5601 40) "_")
663 (modify-syntax-entry (make-char 'korean-ksc5601 41) "_")
664
665 (modify-category-entry (make-char 'korean-ksc5601) ?h)
666 (modify-category-entry (make-char 'korean-ksc5601 35) ?A)
667 (modify-category-entry (make-char 'korean-ksc5601 37) ?G)
668 (modify-category-entry (make-char 'korean-ksc5601 42) ?H)
669 (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
670 (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
671
672 ;; Latin character set (latin-1,2,3,4,5,8,9)
673
674 (modify-category-entry (make-char 'latin-iso8859-1) ?l)
675 (modify-category-entry (make-char 'latin-iso8859-2) ?l)
676 (modify-category-entry (make-char 'latin-iso8859-3) ?l)
677 (modify-category-entry (make-char 'latin-iso8859-4) ?l)
678 (modify-category-entry (make-char 'latin-iso8859-9) ?l)
679 (modify-category-entry (make-char 'latin-iso8859-14) ?l)
680 (modify-category-entry (make-char 'latin-iso8859-15) ?l)
681
682 (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
683 (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
684 (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
685 (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
686 (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
687 (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
688 (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
689 288
690 ;; Lao character set 289 ;; Lao character set
691 290
692 (modify-category-entry (make-char 'lao) ?o) 291 (modify-category-entry '(#xe80 . #xeff) ?o)
693 (dotimes (i (1+ (- #xeff #xe80))) 292 (map-charset-chars #'modify-category-entry 'lao ?o)
694 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o)) 293
695 294 (let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant
696 (let ((deflist '(;; chars syntax category 295 ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base
697 ("(1!(B-(1N(B" "w" ?0) ; consonant 296 ("ັິ-ືົໍ" "w" ?2) ; vowel upper
698 ("(1PRS]`(B-(1d(B" "w" ?1) ; vowel base 297 ("ຸູ" "w" ?3) ; vowel lower
699 ("(1QT(B-(1W[m(B" "w" ?2) ; vowel upper 298 ("່-໋" "w" ?4) ; tone mark
700 ("(1XY(B" "w" ?3) ; vowel lower 299 ("ຼຽ" "w" ?9) ; semivowel lower
701 ("(1h(B-(1l(B" "w" ?4) ; tone mark 300 ("໐-໙" "w" ?6) ; digit
702 ("(1\(B" "w" ?9) ; semivowel lower 301 ("ຯໆ" "_" ?5) ; symbol
703 ("(1p(B-(1y(B" "w" ?6) ; digit
704 ("(1Of(B" "_" ?5) ; symbol
705 ;; Unicode equivalents
706 ("$,1D!(B-$,1DN(B" "w" ?0) ; consonant
707 ("$,1DPDRDSD]D`(B-$,1Dd(B" "w" ?1) ; vowel base
708 ("$,1DQDT(B-$,1DWD[Dm(B" "w" ?2) ; vowel upper
709 ("$,1DXDY(B" "w" ?3) ; vowel lower
710 ("$,1Dh(B-$,1Dk(B" "w" ?4) ; tone mark
711 ("$,1D\D](B" "w" ?9) ; semivowel lower
712 ("$,1Dp(B-$,1Dy(B" "w" ?6) ; digit
713 ("$,1DODf(B" "_" ?5) ; symbol
714 )) 302 ))
715 elm chars len syntax category to ch i) 303 elm chars len syntax category to ch i)
716 (while deflist 304 (while deflist
717 (setq elm (car deflist)) 305 (setq elm (car deflist))
718 (setq chars (car elm) 306 (setq chars (car elm)
734 (setq i (1+ i))) 322 (setq i (1+ i)))
735 (setq deflist (cdr deflist)))) 323 (setq deflist (cdr deflist))))
736 324
737 ;; Thai character set (TIS620) 325 ;; Thai character set (TIS620)
738 326
739 (modify-category-entry (make-char 'thai-tis620) ?t) 327 (modify-category-entry '(#xe00 . #xe7f) ?t)
740 (dotimes (i (1+ (- #xe7f #xe00))) 328 (map-charset-chars #'modify-category-entry 'thai-tis620 ?t)
741 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
742 329
743 (let ((deflist '(;; chars syntax category 330 (let ((deflist '(;; chars syntax category
744 (",T!(B-,TCEG(B-,TN(B" "w" ?0) ; consonant 331 ("ก-รลว-ฮ" "w" ?0) ; consonant
745 (",TDFPRS`(B-,Te(B" "w" ?1) ; vowel base 332 ("ฤฦะาำเ-ๅ" "w" ?1) ; vowel base
746 (",TQT(B-,TWgn(B" "w" ?2) ; vowel upper 333 ("ัิ-ื็๎" "w" ?2) ; vowel upper
747 (",TX(B-,TZ(B" "w" ?3) ; vowel lower 334 ("ุ-ฺ" "w" ?3) ; vowel lower
748 (",Th(B-,Tm(B" "w" ?4) ; tone mark 335 ("่-ํ" "w" ?4) ; tone mark
749 (",Tp(B-,Ty(B" "w" ?6) ; digit 336 ("๐-๙" "w" ?6) ; digit
750 (",TOf_oz{(B" "_" ?5) ; symbol 337 ("ฯๆ฿๏๚๛" "_" ?5) ; symbol
751 ;; Unicode equivalents
752 ("$,1Ba(B-$,1C#C%C'(B-$,1C.(B" "w" ?0) ; consonant
753 ("$,1C$C&C0C2C3C@(B-$,1CE(B" "w" ?1) ; vowel base
754 ("$,1C1C4(B-$,1C7CGCN(B" "w" ?2) ; vowel upper
755 ("$,1C8(B-$,1C:(B" "w" ?3) ; vowel lower
756 ("$,1CH(B-$,1CM(B" "w" ?4) ; tone mark
757 ("$,1CP(B-$,1CY(B" "w" ?6) ; digit
758 ("$,1C/CFC?COCZC[(B" "_" ?5) ; symbol
759 )) 338 ))
760 elm chars len syntax category to ch i) 339 elm chars len syntax category to ch i)
761 (while deflist 340 (while deflist
762 (setq elm (car deflist)) 341 (setq elm (car deflist))
763 (setq chars (car elm) 342 (setq chars (car elm)
779 (setq i (1+ i))) 358 (setq i (1+ i)))
780 (setq deflist (cdr deflist)))) 359 (setq deflist (cdr deflist))))
781 360
782 ;; Tibetan character set 361 ;; Tibetan character set
783 362
784 (modify-category-entry (make-char 'tibetan) ?q) 363 (modify-category-entry '(#xf00 . #xfff) ?q)
785 (modify-category-entry (make-char 'tibetan-1-column) ?q) 364 (map-charset-chars #'modify-category-entry 'tibetan ?q)
786 (dotimes (i (1+ (- #xfff #xf00))) 365 (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
787 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
788 366
789 (let ((deflist '(;; chars syntax category 367 (let ((deflist '(;; chars syntax category
790 ("4$(7"!0"!1(B-4$(7"J0"J14"K0"K1(B" "w" ?0) ; consonant 368 ("ཀ-ཀྵཪ" "w" ?0) ; consonant
791 ("$(7#!(B-$(7#J#K#L#M!"!#(B" "w" ?0) ; 369 ("ྐ-ྐྵྺྻྼ" "w" ?0) ;
792 ("$(7$!(B-$(7$e(B" "w" ?0) ; 370 ("-" "w" ?0) ;
793 ("$(7%!(B-$(7%u(B" "w" ?0) ; 371 ("-" "w" ?0) ;
794 ("$(7"S"["\"]"^"a(B" "w" ?2) ; upper vowel 372 ("ིེཻོཽྀ" "w" ?2) ; upper vowel
795 ("$(7"_"c"d"g"h"i"j"k"l(B" "w" ?2) ; upper modifier 373 ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier
796 ("$(7!I"Q"R"U"e!e!g(B" "w" ?3) ; lowel vowel/modifier 374 ("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier
797 ("$(7!P(B-$(7!Y!Z(B-$(7!c(B" "w" ?6) ; digit 375 ("཰" "w" ?3) ; invisible vowel a
798 ("$(7!;!=(B-$(7!B!D"`(B" "." ?|) ; line-break char 376 ("༠-༩༪-༳" "w" ?6) ; digit
799 ("$(8!;!=!?!@!A!D"`(B" "." ?|) ; 377 ("་།-༒༔ཿ" "." ?|) ; line-break char
800 ("$(7!8!;!=(B-$(7!B!D"`!m!d(B" "." ?>) ; prohibition 378 ("་།༏༐༑༔ཿ" "." ?|) ;
801 ("$(8!;!=!?!@!A!D"`(B" "." ?>) ; 379 ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition
802 ("$(7!0(B-$(7!:!l#R#S"f(B" "." ?<) ; prohibition 380 ("་།༏༐༑༔ཿ" "." ?>) ;
803 ("$(7!C!E(B-$(7!H!J(B-$(7!O!f!h(B-$(7!k!n!o#O#P(B-$(7#`(B" "." ?q) ; others 381 ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition
804 382 ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others
805 ;; Unicode version (not complete)
806 ("$,1F (B-$,1FIFJ(B" "w" ?0) ; consonant
807 ("$,1Fp(B-$,1G9G:G;G<(B" "w" ?0) ;
808 ("$,1FRFZF[F\F]F`(B" "w" ?2) ; upper vowel
809 ("$,1F^FbFcFfFgFhFiFjFk(B" "w" ?2) ; upper modifier
810 ("$,1EYFPFQFTFdEuEw(B" "w" ?3) ; lowel vowel/modifier
811 ("$,1E`(B-$,1EiEj(B-$,1Es(B" "w" ?6) ; digit
812 ("$,1EKEM(B-$,1ERETF_(B" "." ?|) ; line-break char
813 ("$,1EHEKEM(B-$,1ERETF_E}Et(B" "." ?>) ; prohibition
814 ("$,1E@(B-$,1EJE|GAGBFe(B" "." ?<) ; prohibition
815 ("$,1ESEU(B-$,1EXEZ(B-$,1E_EvEx(B-$,1E{E~EG>G?(B-$,1GO(B" "." ?q) ; others
816 )) 383 ))
817 elm chars len syntax category to ch i) 384 elm chars len syntax category to ch i)
818 (while deflist 385 (while deflist
819 (setq elm (car deflist)) 386 (setq elm (car deflist))
820 (setq chars (car elm) 387 (setq chars (car elm)
836 (setq i (1+ i))) 403 (setq i (1+ i)))
837 (setq deflist (cdr deflist)))) 404 (setq deflist (cdr deflist))))
838 405
839 ;; Vietnamese character set 406 ;; Vietnamese character set
840 407
841 (let ((lower (make-char 'vietnamese-viscii-lower)) 408 ;; To make a word with Latin characters
842 (upper (make-char 'vietnamese-viscii-upper))) 409 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l)
843 ;; (modify-syntax-entry lower "w") 410 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v)
844 ;; (modify-syntax-entry upper "w") 411
845 (modify-category-entry lower ?v) 412 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l)
846 (modify-category-entry upper ?v) 413 (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v)
847 (modify-category-entry lower ?l) ; To make a word with
848 (modify-category-entry upper ?l) ; latin characters.
849 )
850 414
851 (let ((tbl (standard-case-table)) 415 (let ((tbl (standard-case-table))
852 (i 32)) 416 (i 32))
853 (while (< i 128) 417 (while (< i 128)
854 (set-case-syntax-pair (make-char 'vietnamese-viscii-upper i) 418 (let* ((char (decode-char 'vietnamese-viscii-upper i))
855 (make-char 'vietnamese-viscii-lower i) 419 (charl (decode-char 'vietnamese-viscii-lower i))
856 tbl) 420 (uc (encode-char char 'ucs))
421 (lc (encode-char charl 'ucs)))
422 (set-case-syntax-pair char (decode-char 'vietnamese-viscii-lower i)
423 tbl)
424 (if uc (modify-category-entry uc ?v))
425 (if lc (modify-category-entry lc ?v)))
857 (setq i (1+ i)))) 426 (setq i (1+ i))))
858 427
859 ;; Unicode (mule-unicode-0100-24ff) 428
429 ;; Latin
430
431 (modify-category-entry '(#x80 . #x024F) ?l)
860 432
861 (let ((tbl (standard-case-table)) c) 433 (let ((tbl (standard-case-table)) c)
862 434
863 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN 435 ;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
864 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN 436 ;; SMALL LETTER DOTLESS I make a case pair, and so do U+0130 LATIN
865 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I. 437 ;; CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN SMALL LETTER I.
866 ;; Thus we have to check language-environment to handle casing 438 ;; See the Turkish language environment.
867 ;; correctly. Currently only I<->i is available. 439
440 ;; Latin-1
441
442 ;; Fixme: Some of the non-word syntaxes here perhaps should be
443 ;; reviewed. (Note that the following all implicitly have word
444 ;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.) There should be a well-defined way of
445 ;; relating Unicode categories to Emacs syntax codes.
446 (set-case-syntax ?  " " tbl) ; dubious
447 (set-case-syntax ?¡ "." tbl)
448 (set-case-syntax ?¦ "_" tbl)
449 (set-case-syntax ?§ "." tbl)
450 (set-case-syntax ?© "_" tbl)
451 (set-case-syntax-delims 171 187 tbl) ; « »
452 (set-case-syntax ?¬ "_" tbl)
453 (set-case-syntax ?­ "_" tbl)
454 (set-case-syntax ?® "_" tbl)
455 (set-case-syntax ?° "_" tbl)
456 (set-case-syntax ?± "_" tbl)
457 (set-case-syntax ?µ "_" tbl)
458 (set-case-syntax ?· "_" tbl)
459 (set-case-syntax ?¼ "_" tbl)
460 (set-case-syntax ?½ "_" tbl)
461 (set-case-syntax ?¾ "_" tbl)
462 (set-case-syntax ?¿ "." tbl)
463 (let ((c 192))
464 (while (<= c 222)
465 (set-case-syntax-pair c (+ c 32) tbl)
466 (setq c (1+ c))))
467 (set-case-syntax ?× "_" tbl)
468 (set-case-syntax ?ß "w" tbl)
469 (set-case-syntax ?÷ "_" tbl)
470 ;; See below for ÿ.
868 471
869 ;; Latin Extended-A, Latin Extended-B 472 ;; Latin Extended-A, Latin Extended-B
870 (setq c #x0100) 473 (setq c #x0100)
871 (while (<= c #x0233) 474 (while (<= c #x0233)
872 (modify-category-entry (decode-char 'ucs c) ?l)
873 (and (or (<= c #x012e) 475 (and (or (<= c #x012e)
874 (and (>= c #x014a) (<= c #x0177))) 476 (and (>= c #x014a) (<= c #x0177)))
875 (zerop (% c 2)) 477 (zerop (% c 2))
876 (set-case-syntax-pair 478 (set-case-syntax-pair c (1+ c) tbl))
877 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
878 (and (>= c #x013a) 479 (and (>= c #x013a)
879 (<= c #x0148) 480 (<= c #x0148)
880 (zerop (% c 2)) 481 (zerop (% c 2))
881 (set-case-syntax-pair 482 (set-case-syntax-pair (1- c) c tbl))
882 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl)) 483 (setq c (1+ c)))
883 (setq c (1+ c))) 484 (set-case-syntax-pair ?IJ ?ij tbl)
884 (set-case-syntax-pair ?$,1 R(B ?$,1 S(B tbl) 485 (set-case-syntax-pair ?Ĵ ?ĵ tbl)
885 (set-case-syntax-pair ?$,1 T(B ?$,1 U(B tbl) 486 (set-case-syntax-pair ?Ķ ?ķ tbl)
886 (set-case-syntax-pair ?$,1 V(B ?$,1 W(B tbl) 487 (set-case-syntax-pair ?Ÿ ?ÿ tbl)
887 ;;; (set-case-syntax-pair ?$,1!8(B ?,A(B tbl) ; these two have different length! 488 (set-case-syntax-pair ?Ź ?ź tbl)
888 (set-case-syntax-pair ?$,1!9(B ?$,1!:(B tbl) 489 (set-case-syntax-pair ?Ż ?ż tbl)
889 (set-case-syntax-pair ?$,1!;(B ?$,1!<(B tbl) 490 (set-case-syntax-pair ?Ž ?ž tbl)
890 (set-case-syntax-pair ?$,1!=(B ?$,1!>(B tbl)
891 491
892 ;; Latin Extended-B 492 ;; Latin Extended-B
893 (set-case-syntax-pair ?$,1!A(B ?$,1#S(B tbl) 493 (set-case-syntax-pair ?Ɓ ?ɓ tbl)
894 (set-case-syntax-pair ?$,1!B(B ?$,1!C(B tbl) 494 (set-case-syntax-pair ?Ƃ ?ƃ tbl)
895 (set-case-syntax-pair ?$,1!D(B ?$,1!E(B tbl) 495 (set-case-syntax-pair ?Ƅ ?ƅ tbl)
896 (set-case-syntax-pair ?$,1!F(B ?$,1#T(B tbl) 496 (set-case-syntax-pair ?Ɔ ?ɔ tbl)
897 (set-case-syntax-pair ?$,1!G(B ?$,1!H(B tbl) 497 (set-case-syntax-pair ?Ƈ ?ƈ tbl)
898 (set-case-syntax-pair ?$,1!I(B ?$,1#V(B tbl) 498 (set-case-syntax-pair ?Ɖ ?ɖ tbl)
899 (set-case-syntax-pair ?$,1!J(B ?$,1#W(B tbl) 499 (set-case-syntax-pair ?Ɗ ?ɗ tbl)
900 (set-case-syntax-pair ?$,1!K(B ?$,1!L(B tbl) 500 (set-case-syntax-pair ?Ƌ ?ƌ tbl)
901 (set-case-syntax-pair ?$,1!N(B ?$,1"=(B tbl) 501 (set-case-syntax-pair ?Ǝ ?ǝ tbl)
902 (set-case-syntax-pair ?$,1!O(B ?$,1#Y(B tbl) 502 (set-case-syntax-pair ?Ə ?ə tbl)
903 (set-case-syntax-pair ?$,1!P(B ?$,1#[(B tbl) 503 (set-case-syntax-pair ?Ɛ ?ɛ tbl)
904 (set-case-syntax-pair ?$,1!Q(B ?$,1!R(B tbl) 504 (set-case-syntax-pair ?Ƒ ?ƒ tbl)
905 (set-case-syntax-pair ?$,1!S(B ?$,1#`(B tbl) 505 (set-case-syntax-pair ?Ɠ ?ɠ tbl)
906 (set-case-syntax-pair ?$,1!T(B ?$,1#c(B tbl) 506 (set-case-syntax-pair ?Ɣ ?ɣ tbl)
907 (set-case-syntax-pair ?$,1!V(B ?$,1#i(B tbl) 507 (set-case-syntax-pair ?Ɩ ?ɩ tbl)
908 (set-case-syntax-pair ?$,1!W(B ?$,1#h(B tbl) 508 (set-case-syntax-pair ?Ɨ ?ɨ tbl)
909 (set-case-syntax-pair ?$,1!X(B ?$,1!Y(B tbl) 509 (set-case-syntax-pair ?Ƙ ?ƙ tbl)
910 (set-case-syntax-pair ?$,1!\(B ?$,1#o(B tbl) 510 (set-case-syntax-pair ?Ɯ ?ɯ tbl)
911 (set-case-syntax-pair ?$,1!](B ?$,1#r(B tbl) 511 (set-case-syntax-pair ?Ɲ ?ɲ tbl)
912 (set-case-syntax-pair ?$,1!_(B ?$,1#u(B tbl) 512 (set-case-syntax-pair ?Ɵ ?ɵ tbl)
913 (set-case-syntax-pair ?$,1!`(B ?$,1!a(B tbl) 513 (set-case-syntax-pair ?Ơ ?ơ tbl)
914 (set-case-syntax-pair ?$,1!b(B ?$,1!c(B tbl) 514 (set-case-syntax-pair ?Ƣ ?ƣ tbl)
915 (set-case-syntax-pair ?$,1!d(B ?$,1!e(B tbl) 515 (set-case-syntax-pair ?Ƥ ?ƥ tbl)
916 (set-case-syntax-pair ?$,1!f(B ?$,1$ (B tbl) 516 (set-case-syntax-pair ?Ʀ ?ʀ tbl)
917 (set-case-syntax-pair ?$,1!g(B ?$,1!h(B tbl) 517 (set-case-syntax-pair ?Ƨ ?ƨ tbl)
918 (set-case-syntax-pair ?$,1!i(B ?$,1$#(B tbl) 518 (set-case-syntax-pair ?Ʃ ?ʃ tbl)
919 (set-case-syntax-pair ?$,1!l(B ?$,1!m(B tbl) 519 (set-case-syntax-pair ?Ƭ ?ƭ tbl)
920 (set-case-syntax-pair ?$,1!n(B ?$,1$((B tbl) 520 (set-case-syntax-pair ?Ʈ ?ʈ tbl)
921 (set-case-syntax-pair ?$,1!o(B ?$,1!p(B tbl) 521 (set-case-syntax-pair ?Ư ?ư tbl)
922 (set-case-syntax-pair ?$,1!q(B ?$,1$*(B tbl) 522 (set-case-syntax-pair ?Ʊ ?ʊ tbl)
923 (set-case-syntax-pair ?$,1!r(B ?$,1$+(B tbl) 523 (set-case-syntax-pair ?Ʋ ?ʋ tbl)
924 (set-case-syntax-pair ?$,1!s(B ?$,1!t(B tbl) 524 (set-case-syntax-pair ?Ƴ ?ƴ tbl)
925 (set-case-syntax-pair ?$,1!u(B ?$,1!v(B tbl) 525 (set-case-syntax-pair ?Ƶ ?ƶ tbl)
926 (set-case-syntax-pair ?$,1!w(B ?$,1$2(B tbl) 526 (set-case-syntax-pair ?Ʒ ?ʒ tbl)
927 (set-case-syntax-pair ?$,1!x(B ?$,1!y(B tbl) 527 (set-case-syntax-pair ?Ƹ ?ƹ tbl)
928 (set-case-syntax-pair ?$,1!|(B ?$,1!}(B tbl) 528 (set-case-syntax-pair ?Ƽ ?ƽ tbl)
929 (set-case-syntax-pair ?$,1"$(B ?$,1"&(B tbl) 529 (set-case-syntax-pair ?DŽ ?dž tbl)
930 (set-case-syntax-pair ?$,1"%(B ?$,1"&(B tbl) 530 (set-case-syntax-pair ?Dž ?dž tbl)
931 (set-case-syntax-pair ?$,1"'(B ?$,1")(B tbl) 531 (set-case-syntax-pair ?LJ ?lj tbl)
932 (set-case-syntax-pair ?$,1"((B ?$,1")(B tbl) 532 (set-case-syntax-pair ?Lj ?lj tbl)
933 (set-case-syntax-pair ?$,1"*(B ?$,1",(B tbl) 533 (set-case-syntax-pair ?NJ ?nj tbl)
934 (set-case-syntax-pair ?$,1"+(B ?$,1",(B tbl) 534 (set-case-syntax-pair ?Nj ?nj tbl)
935 (set-case-syntax-pair ?$,1"-(B ?$,1".(B tbl) 535 (set-case-syntax-pair ?Ǎ ?ǎ tbl)
936 (set-case-syntax-pair ?$,1"/(B ?$,1"0(B tbl) 536 (set-case-syntax-pair ?Ǐ ?ǐ tbl)
937 (set-case-syntax-pair ?$,1"1(B ?$,1"2(B tbl) 537 (set-case-syntax-pair ?Ǒ ?ǒ tbl)
938 (set-case-syntax-pair ?$,1"3(B ?$,1"4(B tbl) 538 (set-case-syntax-pair ?Ǔ ?ǔ tbl)
939 (set-case-syntax-pair ?$,1"5(B ?$,1"6(B tbl) 539 (set-case-syntax-pair ?Ǖ ?ǖ tbl)
940 (set-case-syntax-pair ?$,1"7(B ?$,1"8(B tbl) 540 (set-case-syntax-pair ?Ǘ ?ǘ tbl)
941 (set-case-syntax-pair ?$,1"9(B ?$,1":(B tbl) 541 (set-case-syntax-pair ?Ǚ ?ǚ tbl)
942 (set-case-syntax-pair ?$,1";(B ?$,1"<(B tbl) 542 (set-case-syntax-pair ?Ǜ ?ǜ tbl)
943 (set-case-syntax-pair ?$,1">(B ?$,1"?(B tbl) 543 (set-case-syntax-pair ?Ǟ ?ǟ tbl)
944 (set-case-syntax-pair ?$,1"@(B ?$,1"A(B tbl) 544 (set-case-syntax-pair ?Ǡ ?ǡ tbl)
945 (set-case-syntax-pair ?$,1"B(B ?$,1"C(B tbl) 545 (set-case-syntax-pair ?Ǣ ?ǣ tbl)
946 (set-case-syntax-pair ?$,1"D(B ?$,1"E(B tbl) 546 (set-case-syntax-pair ?Ǥ ?ǥ tbl)
947 (set-case-syntax-pair ?$,1"F(B ?$,1"G(B tbl) 547 (set-case-syntax-pair ?Ǧ ?ǧ tbl)
948 (set-case-syntax-pair ?$,1"H(B ?$,1"I(B tbl) 548 (set-case-syntax-pair ?Ǩ ?ǩ tbl)
949 (set-case-syntax-pair ?$,1"J(B ?$,1"K(B tbl) 549 (set-case-syntax-pair ?Ǫ ?ǫ tbl)
950 (set-case-syntax-pair ?$,1"L(B ?$,1"M(B tbl) 550 (set-case-syntax-pair ?Ǭ ?ǭ tbl)
951 (set-case-syntax-pair ?$,1"N(B ?$,1"O(B tbl) 551 (set-case-syntax-pair ?Ǯ ?ǯ tbl)
952 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON 552 ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
953 (set-case-syntax-pair ?$,1"Q(B ?$,1"S(B tbl) 553 (set-case-syntax-pair ?DZ ?dz tbl)
954 (set-case-syntax-pair ?$,1"R(B ?$,1"S(B tbl) 554 (set-case-syntax-pair ?Dz ?dz tbl)
955 (set-case-syntax-pair ?$,1"T(B ?$,1"U(B tbl) 555 (set-case-syntax-pair ?Ǵ ?ǵ tbl)
956 (set-case-syntax-pair ?$,1"V(B ?$,1!U(B tbl) 556 (set-case-syntax-pair ?Ƕ ?ƕ tbl)
957 (set-case-syntax-pair ?$,1"W(B ?$,1!(B tbl) 557 (set-case-syntax-pair ?Ƿ ?ƿ tbl)
958 (set-case-syntax-pair ?$,1"X(B ?$,1"Y(B tbl) 558 (set-case-syntax-pair ?Ǹ ?ǹ tbl)
959 (set-case-syntax-pair ?$,1"Z(B ?$,1"[(B tbl) 559 (set-case-syntax-pair ?Ǻ ?ǻ tbl)
960 (set-case-syntax-pair ?$,1"\(B ?$,1"](B tbl) 560 (set-case-syntax-pair ?Ǽ ?ǽ tbl)
961 (set-case-syntax-pair ?$,1"^(B ?$,1"_(B tbl) 561 (set-case-syntax-pair ?Ǿ ?ǿ tbl)
962 (set-case-syntax-pair ?$,1"`(B ?$,1"a(B tbl) 562 (set-case-syntax-pair ?Ȁ ?ȁ tbl)
963 (set-case-syntax-pair ?$,1"b(B ?$,1"c(B tbl) 563 (set-case-syntax-pair ?Ȃ ?ȃ tbl)
964 (set-case-syntax-pair ?$,1"d(B ?$,1"e(B tbl) 564 (set-case-syntax-pair ?Ȅ ?ȅ tbl)
965 (set-case-syntax-pair ?$,1"f(B ?$,1"g(B tbl) 565 (set-case-syntax-pair ?Ȇ ?ȇ tbl)
966 (set-case-syntax-pair ?$,1"h(B ?$,1"i(B tbl) 566 (set-case-syntax-pair ?Ȉ ?ȉ tbl)
967 (set-case-syntax-pair ?$,1"j(B ?$,1"k(B tbl) 567 (set-case-syntax-pair ?Ȋ ?ȋ tbl)
968 (set-case-syntax-pair ?$,1"l(B ?$,1"m(B tbl) 568 (set-case-syntax-pair ?Ȍ ?ȍ tbl)
969 (set-case-syntax-pair ?$,1"n(B ?$,1"o(B tbl) 569 (set-case-syntax-pair ?Ȏ ?ȏ tbl)
970 (set-case-syntax-pair ?$,1"p(B ?$,1"q(B tbl) 570 (set-case-syntax-pair ?Ȑ ?ȑ tbl)
971 (set-case-syntax-pair ?$,1"r(B ?$,1"s(B tbl) 571 (set-case-syntax-pair ?Ȓ ?ȓ tbl)
972 (set-case-syntax-pair ?$,1"t(B ?$,1"u(B tbl) 572 (set-case-syntax-pair ?Ȕ ?ȕ tbl)
973 (set-case-syntax-pair ?$,1"v(B ?$,1"w(B tbl) 573 (set-case-syntax-pair ?Ȗ ?ȗ tbl)
974 (set-case-syntax-pair ?$,1"x(B ?$,1"y(B tbl) 574 (set-case-syntax-pair ?Ș ?ș tbl)
975 (set-case-syntax-pair ?$,1"z(B ?$,1"{(B tbl) 575 (set-case-syntax-pair ?Ț ?ț tbl)
976 (set-case-syntax-pair ?$,1"|(B ?$,1"}(B tbl) 576 (set-case-syntax-pair ?Ȝ ?ȝ tbl)
977 (set-case-syntax-pair ?$,1"~(B ?$,1"(B tbl) 577 (set-case-syntax-pair ?Ȟ ?ȟ tbl)
978 (set-case-syntax-pair ?$,1#"(B ?$,1##(B tbl) 578 (set-case-syntax-pair ?Ȣ ?ȣ tbl)
979 (set-case-syntax-pair ?$,1#$(B ?$,1#%(B tbl) 579 (set-case-syntax-pair ?Ȥ ?ȥ tbl)
980 (set-case-syntax-pair ?$,1#&(B ?$,1#'(B tbl) 580 (set-case-syntax-pair ?Ȧ ?ȧ tbl)
981 (set-case-syntax-pair ?$,1#((B ?$,1#)(B tbl) 581 (set-case-syntax-pair ?Ȩ ?ȩ tbl)
982 (set-case-syntax-pair ?$,1#*(B ?$,1#+(B tbl) 582 (set-case-syntax-pair ?Ȫ ?ȫ tbl)
983 (set-case-syntax-pair ?$,1#,(B ?$,1#-(B tbl) 583 (set-case-syntax-pair ?Ȭ ?ȭ tbl)
984 (set-case-syntax-pair ?$,1#.(B ?$,1#/(B tbl) 584 (set-case-syntax-pair ?Ȯ ?ȯ tbl)
985 (set-case-syntax-pair ?$,1#0(B ?$,1#1(B tbl) 585 (set-case-syntax-pair ?Ȱ ?ȱ tbl)
986 (set-case-syntax-pair ?$,1#2(B ?$,1#3(B tbl) 586 (set-case-syntax-pair ?Ȳ ?ȳ tbl)
987 587
988 ;; Latin Extended Additional 588 ;; Latin Extended Additional
589 (modify-category-entry '(#x1e00 . #x1ef9) ?l)
989 (setq c #x1e00) 590 (setq c #x1e00)
990 (while (<= c #x1ef9) 591 (while (<= c #x1ef9)
991 (modify-category-entry (decode-char 'ucs c) ?l)
992 (and (zerop (% c 2)) 592 (and (zerop (% c 2))
993 (or (<= c #x1e94) (>= c #x1ea0)) 593 (or (<= c #x1e94) (>= c #x1ea0))
994 (set-case-syntax-pair 594 (set-case-syntax-pair c (1+ c) tbl))
995 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
996 (setq c (1+ c))) 595 (setq c (1+ c)))
997 596
998 ;; Greek 597 ;; Greek
598 (modify-category-entry '(#x0370 . #x03ff) ?g)
999 (setq c #x0370) 599 (setq c #x0370)
1000 (while (<= c #x03ff) 600 (while (<= c #x03ff)
1001 (modify-category-entry (decode-char 'ucs c) ?g)
1002 (if (or (and (>= c #x0391) (<= c #x03a1)) 601 (if (or (and (>= c #x0391) (<= c #x03a1))
1003 (and (>= c #x03a3) (<= c #x03ab))) 602 (and (>= c #x03a3) (<= c #x03ab)))
1004 (set-case-syntax-pair 603 (set-case-syntax-pair c (+ c 32) tbl))
1005 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1006 (and (>= c #x03da) 604 (and (>= c #x03da)
1007 (<= c #x03ee) 605 (<= c #x03ee)
1008 (zerop (% c 2)) 606 (zerop (% c 2))
1009 (set-case-syntax-pair 607 (set-case-syntax-pair c (1+ c) tbl))
1010 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) 608 (setq c (1+ c)))
1011 (setq c (1+ c))) 609 (set-case-syntax-pair ?Ά ?ά tbl)
1012 (set-case-syntax-pair ?$,1&f(B ?$,1',(B tbl) 610 (set-case-syntax-pair ?Έ ?έ tbl)
1013 (set-case-syntax-pair ?$,1&h(B ?$,1'-(B tbl) 611 (set-case-syntax-pair ?Ή ?ή tbl)
1014 (set-case-syntax-pair ?$,1&i(B ?$,1'.(B tbl) 612 (set-case-syntax-pair ?Ί ?ί tbl)
1015 (set-case-syntax-pair ?$,1&j(B ?$,1'/(B tbl) 613 (set-case-syntax-pair ?Ό ?ό tbl)
1016 (set-case-syntax-pair ?$,1&l(B ?$,1'L(B tbl) 614 (set-case-syntax-pair ?Ύ ?ύ tbl)
1017 (set-case-syntax-pair ?$,1&n(B ?$,1'M(B tbl) 615 (set-case-syntax-pair ?Ώ ?ώ tbl)
1018 (set-case-syntax-pair ?$,1&o(B ?$,1'N(B tbl)
1019 616
1020 ;; Armenian 617 ;; Armenian
1021 (setq c #x531) 618 (setq c #x531)
1022 (while (<= c #x556) 619 (while (<= c #x556)
1023 (set-case-syntax-pair (decode-char 'ucs c) 620 (set-case-syntax-pair c (+ c #x30) tbl)
1024 (decode-char 'ucs (+ c #x30)) tbl)
1025 (setq c (1+ c))) 621 (setq c (1+ c)))
1026 622
1027 ;; Greek Extended 623 ;; Greek Extended
624 (modify-category-entry '(#x1f00 . #x1fff) ?g)
1028 (setq c #x1f00) 625 (setq c #x1f00)
1029 (while (<= c #x1fff) 626 (while (<= c #x1fff)
1030 (modify-category-entry (decode-char 'ucs c) ?g)
1031 (and (<= (logand c #x000f) 7) 627 (and (<= (logand c #x000f) 7)
1032 (<= c #x1fa7) 628 (<= c #x1fa7)
1033 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56))) 629 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
1034 (/= (logand c #x00f0) 7) 630 (/= (logand c #x00f0) 7)
1035 (set-case-syntax-pair 631 (set-case-syntax-pair (+ c 8) c tbl))
1036 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl)) 632 (setq c (1+ c)))
1037 (setq c (1+ c))) 633 (set-case-syntax-pair ?Ᾰ ?ᾰ tbl)
1038 (set-case-syntax-pair ?$,1qx(B ?$,1qp(B tbl) 634 (set-case-syntax-pair ?Ᾱ ?ᾱ tbl)
1039 (set-case-syntax-pair ?$,1qy(B ?$,1qq(B tbl) 635 (set-case-syntax-pair ?Ὰ ?ὰ tbl)
1040 (set-case-syntax-pair ?$,1qz(B ?$,1q0(B tbl) 636 (set-case-syntax-pair ?Ά ?ά tbl)
1041 (set-case-syntax-pair ?$,1q{(B ?$,1q1(B tbl) 637 (set-case-syntax-pair ?ᾼ ?ᾳ tbl)
1042 (set-case-syntax-pair ?$,1q|(B ?$,1qs(B tbl) 638 (set-case-syntax-pair ?Ὲ ?ὲ tbl)
1043 (set-case-syntax-pair ?$,1r((B ?$,1q2(B tbl) 639 (set-case-syntax-pair ?Έ ?έ tbl)
1044 (set-case-syntax-pair ?$,1r)(B ?$,1q3(B tbl) 640 (set-case-syntax-pair ?Ὴ ?ὴ tbl)
1045 (set-case-syntax-pair ?$,1r*(B ?$,1q4(B tbl) 641 (set-case-syntax-pair ?Ή ?ή tbl)
1046 (set-case-syntax-pair ?$,1r+(B ?$,1q5(B tbl) 642 (set-case-syntax-pair ?ῌ ?ῃ tbl)
1047 (set-case-syntax-pair ?$,1r,(B ?$,1r#(B tbl) 643 (set-case-syntax-pair ?Ῐ ?ῐ tbl)
1048 (set-case-syntax-pair ?$,1r8(B ?$,1r0(B tbl) 644 (set-case-syntax-pair ?Ῑ ?ῑ tbl)
1049 (set-case-syntax-pair ?$,1r9(B ?$,1r1(B tbl) 645 (set-case-syntax-pair ?Ὶ ?ὶ tbl)
1050 (set-case-syntax-pair ?$,1r:(B ?$,1q6(B tbl) 646 (set-case-syntax-pair ?Ί ?ί tbl)
1051 (set-case-syntax-pair ?$,1r;(B ?$,1q7(B tbl) 647 (set-case-syntax-pair ?Ῠ ?ῠ tbl)
1052 (set-case-syntax-pair ?$,1rH(B ?$,1r@(B tbl) 648 (set-case-syntax-pair ?Ῡ ?ῡ tbl)
1053 (set-case-syntax-pair ?$,1rI(B ?$,1rA(B tbl) 649 (set-case-syntax-pair ?Ὺ ?ὺ tbl)
1054 (set-case-syntax-pair ?$,1rJ(B ?$,1q:(B tbl) 650 (set-case-syntax-pair ?Ύ ?ύ tbl)
1055 (set-case-syntax-pair ?$,1rK(B ?$,1q;(B tbl) 651 (set-case-syntax-pair ?Ῥ ?ῥ tbl)
1056 (set-case-syntax-pair ?$,1rL(B ?$,1rE(B tbl) 652 (set-case-syntax-pair ?Ὸ ?ὸ tbl)
1057 (set-case-syntax-pair ?$,1rX(B ?$,1q8(B tbl) 653 (set-case-syntax-pair ?Ό ?ό tbl)
1058 (set-case-syntax-pair ?$,1rY(B ?$,1q9(B tbl) 654 (set-case-syntax-pair ?Ὼ ?ὼ tbl)
1059 (set-case-syntax-pair ?$,1rZ(B ?$,1q<(B tbl) 655 (set-case-syntax-pair ?Ώ ?ώ tbl)
1060 (set-case-syntax-pair ?$,1r[(B ?$,1q=(B tbl) 656 (set-case-syntax-pair ?ῼ ?ῳ tbl)
1061 (set-case-syntax-pair ?$,1r\(B ?$,1rS(B tbl)
1062 657
1063 ;; cyrillic 658 ;; cyrillic
659 (modify-category-entry '(#x0400 . #x04FF) ?y)
1064 (setq c #x0400) 660 (setq c #x0400)
1065 (while (<= c #x04ff) 661 (while (<= c #x04ff)
1066 (modify-category-entry (decode-char 'ucs c) ?y)
1067 (and (>= c #x0400) 662 (and (>= c #x0400)
1068 (<= c #x040f) 663 (<= c #x040f)
1069 (set-case-syntax-pair 664 (set-case-syntax-pair c (+ c 80) tbl))
1070 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
1071 (and (>= c #x0410) 665 (and (>= c #x0410)
1072 (<= c #x042f) 666 (<= c #x042f)
1073 (set-case-syntax-pair 667 (set-case-syntax-pair c (+ c 32) tbl))
1074 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
1075 (and (zerop (% c 2)) 668 (and (zerop (% c 2))
1076 (or (and (>= c #x0460) (<= c #x0480)) 669 (or (and (>= c #x0460) (<= c #x0480))
1077 (and (>= c #x048c) (<= c #x04be)) 670 (and (>= c #x048c) (<= c #x04be))
1078 (and (>= c #x04d0) (<= c #x04f4))) 671 (and (>= c #x04d0) (<= c #x04f4)))
1079 (set-case-syntax-pair 672 (set-case-syntax-pair c (1+ c) tbl))
1080 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) 673 (setq c (1+ c)))
1081 (setq c (1+ c))) 674 (set-case-syntax-pair ?Ӂ ?ӂ tbl)
1082 (set-case-syntax-pair ?$,1*!(B ?$,1*"(B tbl) 675 (set-case-syntax-pair ?Ӄ ?ӄ tbl)
1083 (set-case-syntax-pair ?$,1*#(B ?$,1*$(B tbl) 676 (set-case-syntax-pair ?Ӈ ?ӈ tbl)
1084 (set-case-syntax-pair ?$,1*'(B ?$,1*((B tbl) 677 (set-case-syntax-pair ?Ӌ ?ӌ tbl)
1085 (set-case-syntax-pair ?$,1*+(B ?$,1*,(B tbl) 678 (set-case-syntax-pair ?Ӹ ?ӹ tbl)
1086 (set-case-syntax-pair ?$,1*X(B ?$,1*Y(B tbl)
1087 679
1088 ;; general punctuation 680 ;; general punctuation
1089 (setq c #x2000) 681 (setq c #x2000)
1090 (while (<= c #x200b) 682 (while (<= c #x200b)
1091 (set-case-syntax (decode-char 'ucs c) " " tbl) 683 (set-case-syntax c " " tbl)
1092 (setq c (decode-char 'ucs (1+ c)))) 684 (setq c (1+ c)))
1093 (setq c #x2010) 685 (while (<= c #x200F)
686 (set-case-syntax c "." tbl)
687 (setq c (1+ c)))
688 ;; Fixme: These aren't all right:
1094 (while (<= c #x2027) 689 (while (<= c #x2027)
1095 (set-case-syntax (decode-char 'ucs c) "_" tbl) 690 (set-case-syntax c "_" tbl)
1096 (setq c (decode-char 'ucs (1+ c)))) 691 (setq c (1+ c)))
692 (while (<= c #x206F)
693 (set-case-syntax c "." tbl)
694 (setq c (1+ c)))
1097 695
1098 ;; Roman numerals 696 ;; Roman numerals
1099 (setq c #x2160) 697 (setq c #x2160)
1100 (while (<= c #x216f) 698 (while (<= c #x216f)
1101 (set-case-syntax-pair (decode-char 'ucs c) 699 (set-case-syntax-pair c (+ c #x10) tbl)
1102 (decode-char 'ucs (+ c #x10)) tbl) 700 (setq c (1+ c)))
701
702 ;; Fixme: The following blocks might be better as symbol rather than
703 ;; punctuation.
704 ;; Arrows
705 (setq c #x2190)
706 (while (<= c #x21FF)
707 (set-case-syntax c "." tbl)
708 (setq c (1+ c)))
709 ;; Mathematical Operators
710 (while (<= c #x22FF)
711 (set-case-syntax c "." tbl)
712 (setq c (1+ c)))
713 ;; Miscellaneous Technical
714 (while (<= c #x23FF)
715 (set-case-syntax c "." tbl)
716 (setq c (1+ c)))
717 ;; Control Pictures
718 (while (<= c #x243F)
719 (set-case-syntax c "_" tbl)
1103 (setq c (1+ c))) 720 (setq c (1+ c)))
1104 721
1105 ;; Circled Latin 722 ;; Circled Latin
1106 (setq c #x24b6) 723 (setq c #x24b6)
1107 (while (<= c #x24cf) 724 (while (<= c #x24cf)
1108 (set-case-syntax-pair (decode-char 'ucs c) 725 (set-case-syntax-pair c (+ c 26) tbl)
1109 (decode-char 'ucs (+ c 26)) tbl) 726 (modify-category-entry c ?l)
1110 (modify-category-entry (decode-char 'ucs c) ?l) 727 (modify-category-entry (+ c 26) ?l)
1111 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1112 (setq c (1+ c))) 728 (setq c (1+ c)))
1113 729
1114 ;; Fullwidth Latin 730 ;; Fullwidth Latin
1115 (setq c #xff21) 731 (setq c #xff21)
1116 (while (<= c #xff3a) 732 (while (<= c #xff3a)
1117 (set-case-syntax-pair (decode-char 'ucs c) 733 (set-case-syntax-pair c (+ c #x20) tbl)
1118 (decode-char 'ucs (+ c #x20)) tbl) 734 (modify-category-entry c ?l)
1119 (modify-category-entry (decode-char 'ucs c) ?l) 735 (modify-category-entry (+ c #x20) ?l)
1120 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1121 (setq c (1+ c))) 736 (setq c (1+ c)))
1122 737
1123 ;; Combining diacritics 738 ;; Combining diacritics
1124 (setq c #x300) 739 (modify-category-entry '(#x300 . #x362) ?^)
1125 (while (<= c #x362)
1126 (modify-category-entry (decode-char 'ucs c) ?^)
1127 (setq c (1+ c)))
1128
1129 ;; Combining marks 740 ;; Combining marks
1130 (setq c #x20d0) 741 (modify-category-entry '(#x20d0 . #x20e3) ?^)
1131 (while (<= c #x20e3)
1132 (modify-category-entry (decode-char 'ucs c) ?^)
1133 (setq c (1+ c)))
1134 742
1135 ;; Fixme: syntax for symbols &c 743 ;; Fixme: syntax for symbols &c
1136 ) 744 )
1137 745
1138 ;;; Setting word boundary.
1139
1140 (setq word-combining-categories
1141 '((?l . ?l)))
1142
1143 (setq word-separating-categories ; (2-byte character sets)
1144 '((?A . ?K) ; Alpha numeric - Katakana
1145 (?A . ?C) ; Alpha numeric - Chinese
1146 (?H . ?A) ; Hiragana - Alpha numeric
1147 (?H . ?K) ; Hiragana - Katakana
1148 (?H . ?C) ; Hiragana - Chinese
1149 (?K . ?A) ; Katakana - Alpha numeric
1150 (?K . ?C) ; Katakana - Chinese
1151 (?C . ?A) ; Chinese - Alpha numeric
1152 (?C . ?K) ; Chinese - Katakana
1153 ))
1154
1155
1156 ;; For each character set, put the information of the most proper 746 ;; For each character set, put the information of the most proper
1157 ;; coding system to encode it by `preferred-coding-system' property. 747 ;; coding system to encode it by `preferred-coding-system' property.
1158 748
749 ;; Fixme: should this be junked?
1159 (let ((l '((latin-iso8859-1 . iso-latin-1) 750 (let ((l '((latin-iso8859-1 . iso-latin-1)
1160 (latin-iso8859-2 . iso-latin-2) 751 (latin-iso8859-2 . iso-latin-2)
1161 (latin-iso8859-3 . iso-latin-3) 752 (latin-iso8859-3 . iso-latin-3)
1162 (latin-iso8859-4 . iso-latin-4) 753 (latin-iso8859-4 . iso-latin-4)
1163 (thai-tis620 . thai-tis620) 754 (thai-tis620 . thai-tis620)
1181 (ipa . iso-2022-7bit) 772 (ipa . iso-2022-7bit)
1182 (vietnamese-viscii-lower . vietnamese-viscii) 773 (vietnamese-viscii-lower . vietnamese-viscii)
1183 (vietnamese-viscii-upper . vietnamese-viscii) 774 (vietnamese-viscii-upper . vietnamese-viscii)
1184 (arabic-digit . iso-2022-7bit) 775 (arabic-digit . iso-2022-7bit)
1185 (arabic-1-column . iso-2022-7bit) 776 (arabic-1-column . iso-2022-7bit)
1186 (ascii-right-to-left . iso-2022-7bit)
1187 (lao . lao) 777 (lao . lao)
1188 (arabic-2-column . iso-2022-7bit) 778 (arabic-2-column . iso-2022-7bit)
1189 (indian-is13194 . devanagari) 779 (indian-is13194 . devanagari)
1190 (indian-glyph . devanagari) 780 (indian-glyph . devanagari)
1191 (tibetan-1-column . tibetan) 781 (tibetan-1-column . tibetan)
1209 ;; property on the charsets. 799 ;; property on the charsets.
1210 (let ((l '(katakana-jisx0201 800 (let ((l '(katakana-jisx0201
1211 japanese-jisx0208 japanese-jisx0212 801 japanese-jisx0208 japanese-jisx0212
1212 chinese-gb2312 chinese-big5-1 chinese-big5-2))) 802 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
1213 (while l 803 (while l
1214 (aset auto-fill-chars (make-char (car l)) t) 804 ;;(aset auto-fill-chars (make-char (car l)) t)
1215 (put-charset-property (car l) 'nospace-between-words t) 805 (put-charset-property (car l) 'nospace-between-words t)
1216 (setq l (cdr l)))) 806 (setq l (cdr l))))
1217 807
808
809 ;; CJK double width characters.
810 (let ((l '((#x1100 . #x11FF)
811 (#x2E80 . #x9FAF)
812 (#xAC00 . #xD7AF)
813 (#xF900 . #xFAFF)
814 (#xFE30 . #xFE4F)
815 (#xFF00 . #xFF5F)
816 (#xFFE0 . #xFFEF)
817 (#x20000 . #x2AFFF)
818 (#x2F800 . #x2FFFF))))
819 (dolist (elt l)
820 (set-char-table-range char-width-table
821 (cons (car elt) (cdr elt))
822 2)))
823 ;; Fixme: Doing this affects non-CJK characters through unification,
824 ;; but presumably CJK users expect those characters to be
825 ;; double-width when using these charsets.
826 ;; (map-charset-chars
827 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
828 ;; 'japanese-jisx0208)
829 ;; (map-charset-chars
830 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
831 ;; 'japanese-jisx0212)
832 ;; (map-charset-chars
833 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
834 ;; 'japanese-jisx0213-1)
835 ;; (map-charset-chars
836 ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2))
837 ;; 'japanese-jisx0213-2)
838 ;; (map-charset-chars
839 ;; (lambda (range ignore) (set-char-table-range char-width-table range 2))
840 ;; 'korean-ksc5601)
841
842 ;; Other double width
843 (map-charset-chars
844 (lambda (range ignore) (set-char-table-range char-width-table range 2))
845 'ethiopic)
846 (map-charset-chars
847 (lambda (range ignore) (set-char-table-range char-width-table range 2))
848 'tibetan)
849 (map-charset-chars
850 (lambda (range ignore) (set-char-table-range char-width-table range 2))
851 'indian-2-column)
852 (map-charset-chars
853 (lambda (range ignore) (set-char-table-range char-width-table range 2))
854 'arabic-2-column)
855
856 (optimize-char-table (standard-case-table))
857 (optimize-char-table char-width-table)
858 (optimize-char-table (standard-category-table))
859 (optimize-char-table (standard-syntax-table))
860
861 ;; The Unicode blocks actually extend past some of these ranges with
862 ;; undefined codepoints.
863 (let ((script-list nil))
864 (dolist
865 (elt
866 '((#x0000 #x007F latin)
867 (#x00A0 #x036F latin)
868 (#x0370 #x03E1 greek)
869 (#x03E2 #x03EF coptic)
870 (#x03F0 #x03F3 greek)
871 (#x0400 #x04FF cyrillic)
872 (#x0530 #x058F armenian)
873 (#x0590 #x05FF hebrew)
874 (#x0600 #x06FF arabic)
875 (#x0700 #x074F syriac)
876 (#x0780 #x07BF thaana)
877 (#x0900 #x097F devanagari)
878 (#x0980 #x09FF bengali)
879 (#x0A00 #x0A7F gurmukhi)
880 (#x0A80 #x0AFF gujarati)
881 (#x0B00 #x0B7F oriya)
882 (#x0B80 #x0BFF tamil)
883 (#x0C00 #x0C7F telugu)
884 (#x0C80 #x0CFF kannada)
885 (#x0D00 #x0D7F malayalam)
886 (#x0D80 #x0DFF sinhala)
887 (#x0E00 #x0E5F thai)
888 (#x0E80 #x0EDF lao)
889 (#x0F00 #x0FFF tibetan)
890 (#x1000 #x105F myanmar)
891 (#x10A0 #x10FF georgian)
892 (#x1100 #x11FF hangul)
893 (#x1200 #x137F ethiopic)
894 (#x13A0 #x13FF cherokee)
895 (#x1400 #x167F canadian-aboriginal)
896 (#x1680 #x169F ogham)
897 (#x16A0 #x16FF runic)
898 (#x1780 #x17FF khmer)
899 (#x1800 #x18AF mongolian)
900 (#x1E00 #x1EFF latin)
901 (#x1F00 #x1FFF greek)
902 (#x20A0 #x20AF currency)
903 (#x2800 #x28FF braille)
904 (#x2E80 #x2FDF han)
905 (#x2FF0 #x2FFF ideographic-description)
906 (#x3000 #x303F cjk-misc)
907 (#x3040 #x30FF kana)
908 (#x3100 #x312F bopomofo)
909 (#x3130 #x318F hangul)
910 (#x3190 #x319F kanbun)
911 (#x31A0 #x31BF bopomofo)
912 (#x3400 #x9FAF han)
913 (#xA000 #xA4CF yi)
914 (#xAC00 #xD7AF hangul)
915 (#xF900 #xFA5F han)
916 (#xFB1D #xFB4F hebrew)
917 (#xFB50 #xFDFF arabic)
918 (#xFE70 #xFEFC arabic)
919 (#xFF00 #xFF5F cjk-misc)
920 (#xFF61 #xFF9F kana)
921 (#xFFE0 #xFFE6 cjk-misc)
922 (#x20000 #x2AFFF han)
923 (#x2F800 #x2FFFF han)))
924 (set-char-table-range char-script-table
925 (cons (car elt) (nth 1 elt)) (nth 2 elt))
926 (or (memq (nth 2 elt) script-list)
927 (setq script-list (cons (nth 2 elt) script-list))))
928 (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))
929
930 (map-charset-chars
931 #'(lambda (range ignore)
932 (set-char-table-range char-script-table range 'tibetan))
933 'tibetan)
934
935
936 ;;; Setting word boundary.
937
938 (defun next-word-boundary-han (pos limit)
939 (if (<= pos limit)
940 (save-excursion
941 (goto-char pos)
942 (looking-at "\\cC+")
943 (goto-char (match-end 0))
944 (if (looking-at "\\cH+")
945 (goto-char (match-end 0)))
946 (point))
947 (while (and (> pos limit)
948 (eq (aref char-script-table (char-after (1- pos))) 'han))
949 (setq pos (1- pos)))
950 pos))
951
952 (defun next-word-boundary-kana (pos limit)
953 (if (<= pos limit)
954 (save-excursion
955 (goto-char pos)
956 (if (looking-at "\\cK+")
957 (goto-char (match-end 0)))
958 (if (looking-at "\\cH+")
959 (goto-char (match-end 0)))
960 (point))
961 (let ((category-set (char-category-set (char-after pos)))
962 category)
963 (if (aref category-set ?K)
964 (while (and (> pos limit)
965 (aref (char-category-set (char-after (1- pos))) ?K))
966 (setq pos (1- pos)))
967 (while (and (> pos limit)
968 (aref (setq category-set
969 (char-category-set (char-after (1- pos)))) ?H))
970 (setq pos (1- pos)))
971 (setq category (cond ((aref category-set ?C) ?C)
972 ((aref category-set ?K) ?K)
973 ((aref category-set ?A) ?A)))
974 (when category
975 (setq pos (1- pos))
976 (while (and (> pos limit)
977 (aref (char-category-set (char-after (1- pos)))
978 category))
979 (setq pos (1- pos)))))
980 pos)))
981
982 (map-char-table
983 #'(lambda (char script)
984 (cond ((eq script 'han)
985 (set-char-table-range find-word-boundary-function-table
986 char #'next-word-boundary-han))
987 ((eq script 'kana)
988 (set-char-table-range find-word-boundary-function-table
989 char #'next-word-boundary-kana))))
990 char-script-table)
991
992 (setq word-combining-categories
993 '((?l . ?l)))
994
995 (setq word-separating-categories ; (2-byte character sets)
996 '((?A . ?K) ; Alpha numeric - Katakana
997 (?A . ?C) ; Alpha numeric - Chinese
998 (?H . ?A) ; Hiragana - Alpha numeric
999 (?H . ?K) ; Hiragana - Katakana
1000 (?H . ?C) ; Hiragana - Chinese
1001 (?K . ?A) ; Katakana - Alpha numeric
1002 (?K . ?C) ; Katakana - Chinese
1003 (?C . ?A) ; Chinese - Alpha numeric
1004 (?C . ?K) ; Chinese - Katakana
1005 ))
1006
1218 ;;; Local Variables: 1007 ;;; Local Variables:
1219 ;;; coding: iso-2022-7bit 1008 ;;; coding: utf-8-emacs
1220 ;;; End: 1009 ;;; End:
1221 1010
1222 ;;; characters.el ends here 1011 ;;; characters.el ends here