changeset 104052:f317b7c46271

Fix setting of category ?C, ?|, ?K, and ?H. Fix setting of case for Latin Extended and Greek Extended. (build-unicode-category-table): Fix range checks.
author Kenichi Handa <handa@m17n.org>
date Fri, 24 Jul 2009 07:56:19 +0000
parents a629d51e1d82
children e8bddd7112e5
files lisp/international/characters.el
diffstat 1 files changed, 23 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/international/characters.el	Fri Jul 24 05:41:17 2009 +0000
+++ b/lisp/international/characters.el	Fri Jul 24 07:56:19 2009 +0000
@@ -139,10 +139,9 @@
 (modify-category-entry '(#xF900 . #xFAFF) ?C)
 (modify-category-entry '(#xF900 . #xFAFF) ?c)
 (modify-category-entry '(#xF900 . #xFAFF) ?|)
-(modify-category-entry '(#x20000 . #x2AFFF) ?|)
-(modify-category-entry '(#x2F800 . #x2FFFF) ?|)
-(modify-category-entry '(#x20000 . #x2AFFF) ?C)
-(modify-category-entry '(#x2F800 . #x2FFFF) ?C)
+(modify-category-entry '(#x20000 . #x2FFFF) ?|)
+(modify-category-entry '(#x20000 . #x2FFFF) ?C)
+(modify-category-entry '(#x20000 . #x2FFFF) ?c)
 
 
 ;; Chinese character set (GB2312)
@@ -195,17 +194,18 @@
   (modify-category-entry range ?\|))
 
 ;; Katakana block
-(let ((range '(#x30a0 . #x30ff)))
-  ;; ?K is double width, ?k isn't specified
-  (modify-category-entry range ?K)
-  (modify-category-entry range ?\|))
+(modify-category-entry '(#x3099 . #x309C) ?K)
+(modify-category-entry '(#x30A0 . #x30FF) ?K)
+(modify-category-entry '(#x30A0 . #x30FA) ?\|))
+(modify-category-entry #x30FF ?\|)
 
 ;; Hiragana block
-(let ((range '(#x3040 . #x309d)))
-  ;; ?H is actually defined to be double width
-  ;;(modify-category-entry range ?H)
-  (modify-category-entry range ?\|)
-  )
+(modify-category-entry '(#x3040 . #x309F) ?H)
+(modify-category-entry '(#x3040 . #x3096) ?\|)
+(modify-category-entry #x309F ?\|)
+(modify-category-entry #x30A0 ?H)
+(modify-category-entry #x30FC ?H)
+
 
 ;; JISX0208
 (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E)
@@ -527,7 +527,8 @@
 		       (#x014a . #x0177)
 		       (#x0179 . #x017E)
 		       (#x0182 . #x0185)
-		       (#x0187 . #x018C)
+		       (#x0187 . #x0188)
+		       (#x018B . #x018C)
 		       (#x0191 . #x0192)
 		       (#x0198 . #x0199)
 		       (#x01A0 . #x01A5)
@@ -550,6 +551,9 @@
 	  (set-case-syntax-pair from (1+ from) tbl)
 	  (setq from (+ from 2))))))
 
+  (set-case-syntax-pair #x189 #x256 tbl)
+  (set-case-syntax-pair #x18A #x257 tbl)
+
   ;; In some languages, such as Turkish, U+0049 LATIN CAPITAL LETTER I
   ;; and U+0131 LATIN SMALL LETTER DOTLESS I make a case pair, and so
   ;; do U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+0069 LATIN
@@ -616,8 +620,9 @@
   (while (<= c #x1fff)
     (and (<= (logand c #x000f) 7)
 	 (<= c #x1fa7)
-	 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
-	 (/= (logand c #x00f0) 7)
+	 (not (memq c '(#x1f16 #x1f17 #x1f56 #x1f57
+			       #x1f50 #x1f52 #x1f54 #x1f56)))
+	 (/= (logand c #x00f0) #x70)
 	 (set-case-syntax-pair (+ c 8) c tbl))
     (setq c (1+ c)))
   (set-case-syntax-pair ?Ᾰ ?ᾰ tbl)
@@ -1202,8 +1207,8 @@
   (let ((table (make-char-table 'unicode-category-table nil)))
     (dotimes (i #x110000)
       (if (or (< i #xD800)
-	      (and (> i #xF900) (< i #x30000))
-	      (and (> i #xE0000) (< i #xE0200)))
+	      (and (>= i #xF900) (< i #x30000))
+	      (and (>= i #xE0000) (< i #xE0200)))
 	  (aset table i (get-char-code-property i 'general-category))))
     (set-char-table-range table '(#xE000 . #xF8FF) 'Co)
     (set-char-table-range table '(#xF0000 . #xFFFFD) 'Co)