comparison lisp/international/characters.el @ 91933:1a3f22533482

Delete occurances of non-Unicode tibetan and ethiopic characters. (cjk-char-width-table): New variable. (use-cjk-char-width-table, use-default-char-width-table): New functions.
author Kenichi Handa <handa@m17n.org>
date Mon, 18 Feb 2008 11:52:16 +0000
parents c8cd29958bb5
children cbc008b3464f
comparison
equal deleted inserted replaced
91932:d939d46e41a8 91933:1a3f22533482
266 266
267 ;; Ethiopic character set 267 ;; Ethiopic character set
268 268
269 (modify-category-entry '(#x1200 . #x1399) ?e) 269 (modify-category-entry '(#x1200 . #x1399) ?e)
270 (modify-category-entry '(#x2d80 . #x2dde) ?e) 270 (modify-category-entry '(#x2d80 . #x2dde) ?e)
271 (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ? ? ? ? ? ?))) 271 (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨)))
272 (while chars 272 (while chars
273 (modify-syntax-entry (car chars) ".") 273 (modify-syntax-entry (car chars) ".")
274 (setq chars (cdr chars)))) 274 (setq chars (cdr chars))))
275 (map-charset-chars #'modify-category-entry 'ethiopic ?e) 275 (map-charset-chars #'modify-category-entry 'ethiopic ?e)
276 276
366 (map-charset-chars #'modify-category-entry 'tibetan ?q) 366 (map-charset-chars #'modify-category-entry 'tibetan ?q)
367 (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q) 367 (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
368 368
369 (let ((deflist '(;; chars syntax category 369 (let ((deflist '(;; chars syntax category
370 ("ཀ-ཀྵཪ" "w" ?0) ; consonant 370 ("ཀ-ཀྵཪ" "w" ?0) ; consonant
371 ("ྐ-ྐྵྺྻྼ" "w" ?0) ; 371 ("ྐ-ྐྵྺྻྼ" "w" ?0) ;
372 ("-" "w" ?0) ;
373 ("-" "w" ?0) ;
374 ("ིེཻོཽྀ" "w" ?2) ; upper vowel 372 ("ིེཻོཽྀ" "w" ?2) ; upper vowel
375 ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier 373 ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier
376 ("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier 374 ("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier
377 ("཰" "w" ?3) ; invisible vowel a 375 ("཰" "w" ?3) ; invisible vowel a
378 ("༠-༩༪-༳" "w" ?6) ; digit 376 ("༠-༩༪-༳" "w" ?6) ; digit
379 ("་།-༒༔ཿ" "." ?|) ; line-break char 377 ("་།-༒༔ཿ" "." ?|) ; line-break char
380 ("་།༏༐༑༔ཿ" "." ?|) ; 378 ("་།༏༐༑༔ཿ" "." ?|) ;
381 ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition 379 ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition
1005 'indian-2-column) 1003 'indian-2-column)
1006 (map-charset-chars 1004 (map-charset-chars
1007 (lambda (range ignore) (set-char-table-range char-width-table range 2)) 1005 (lambda (range ignore) (set-char-table-range char-width-table range 2))
1008 'arabic-2-column) 1006 'arabic-2-column)
1009 1007
1008 (defvar cjk-char-width-table
1009 (let ((table (make-char-table nil)))
1010 (dolist (charset '(big5 chinese-gb2312 chinese-cns11643-1
1011 japanese-jisx0208 korean-ksc5601))
1012 (map-charset-chars #'(lambda (range arg)
1013 (set-char-table-range table range 2))
1014 charset))
1015 (optimize-char-table table)
1016 (set-char-table-parent table char-width-table)
1017 table)
1018 "Character width table used in CJK language environment.")
1019
1020 (defun use-cjk-char-width-table ()
1021 "Internal use only.
1022 Setup char-width-table appropriate for CJK language environment.")
1023
1024 (defun use-default-char-width-table ()
1025 "Internal use only.
1026 Setup char-width-table appropriate for non-CJK language environment.")
1027
1010 (optimize-char-table (standard-case-table)) 1028 (optimize-char-table (standard-case-table))
1011 (optimize-char-table (standard-category-table)) 1029 (optimize-char-table (standard-category-table))
1012 (optimize-char-table (standard-syntax-table)) 1030 (optimize-char-table (standard-syntax-table))
1031
1032
1033 ;; Setting char-script-table.
1013 1034
1014 ;; The Unicode blocks actually extend past some of these ranges with 1035 ;; The Unicode blocks actually extend past some of these ranges with
1015 ;; undefined codepoints. 1036 ;; undefined codepoints.
1016 (let ((script-list nil)) 1037 (let ((script-list nil))
1017 (dolist 1038 (dolist