Mercurial > emacs
comparison lisp/language/indian.el @ 106634:1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
handle ZWNJ and ZWJ. Use it in composition-function-table for
Devanagari.
(malayalam-composable-pattern): Fix previous change.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 25 Dec 2009 02:45:47 +0000 |
parents | 3b9052789379 |
children | 8d23ea9e4ab1 |
comparison
equal
deleted
inserted
replaced
106633:c155113f5bd1 | 106634:1f0768e4ad7e |
---|---|
137 (dolist (elt table) | 137 (dolist (elt table) |
138 (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t))) | 138 (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t))) |
139 regexp)) | 139 regexp)) |
140 | 140 |
141 (defconst devanagari-composable-pattern | 141 (defconst devanagari-composable-pattern |
142 (concat | 142 (let ((table |
143 "\\([अ-औॠॡ][ँं]?\\)\\|[ः।]" | 143 '(("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel |
144 "\\|\\(" | 144 ("C" . "[\u0915-\u0939]") ; consonant |
145 "\\(?:\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?\\(?:[क-हक़-य़]्\\)?[क-हक़-य़]्\\)?" | 145 ("R" . "\u0930") ; RA |
146 "[क-हक़-य़]\\(?:्\\|[ा-्ॢॣ]?[ंँ]?\\)?" | 146 ("n" . "\u093C") ; NUKTA |
147 "\\)") | 147 ("H" . "\u094D") ; HALANT |
148 ("m" . "\u093F") ; vowel sign (pre) | |
149 ("u" . "[\u0945-\u0948\u0955]") ; vowel sign (above) | |
150 ("b" . "[\u0941-\u0944\u0962-\u0963]") ; vowel sign (below) | |
151 ("p" . "[\u093E\u0940\u0949-\u094C]") ; vowel sign (post) | |
152 ("A" . "[\u0900-\u0902\u0953-\u0954]") ; vowel modifier (above) | |
153 ("a" . "\u0903") ; vowel modifier (post) | |
154 ("S" . "\u0951") ; stress sign (above) | |
155 ("s" . "\u0952") ; stress sign (below) | |
156 ("J" . "\u200D") ; ZWJ | |
157 ("N" . "\u200C") ; ZWNJ | |
158 ("X" . "[\u0900-\u097F]")))) ; all coverage | |
159 (indian-compose-regexp | |
160 (concat | |
161 ;; syllables with an independent vowel, or | |
162 "\\(?:RH\\)?Vn?m?b?u?p?n?A?s?S?a?\\|" | |
163 ;; consonant-based syllables, or | |
164 "\\(?:Cn?J?HJ?\\)*Cn?\\(?:H[NJ]?\\|m?b?u?p?n?A?s?S?a?\\)\\|" | |
165 ;; special consonant form, or | |
166 "JHR\\|" | |
167 ;; any other singleton characters | |
168 "X") | |
169 table)) | |
148 "Regexp matching a composable sequence of Devanagari characters.") | 170 "Regexp matching a composable sequence of Devanagari characters.") |
149 | 171 |
150 (defconst tamil-composable-pattern | 172 (defconst tamil-composable-pattern |
151 (concat | 173 (concat |
152 "\\([அ-ஔ]\\)\\|" | 174 "\\([அ-ஔ]\\)\\|" |
163 "[ಕ-ಹ]\\(?:್\\|[ಾ-್ೕೃ]?\\)?" | 185 "[ಕ-ಹ]\\(?:್\\|[ಾ-್ೕೃ]?\\)?" |
164 "\\)") | 186 "\\)") |
165 "Regexp matching a composable sequence of Kannada characters.") | 187 "Regexp matching a composable sequence of Kannada characters.") |
166 | 188 |
167 (defconst malayalam-composable-pattern | 189 (defconst malayalam-composable-pattern |
168 (let ((table '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel | 190 (let ((table |
169 ("C" . "[\u0D15-\u0D39]") ; consonant | 191 '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel |
170 ("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra | 192 ("C" . "[\u0D15-\u0D39]") ; consonant |
171 ("p" . "[\u0D3E-\u0D44\u0D57]") ; postname matra | 193 ("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra |
172 ("b" . "[\u0D62-\u0D63]") ; belowbase matra | 194 ("p" . "[\u0D3E-\u0D44\u0D57]") ; postbase matra |
173 ("a" . "[\u0D02-\u0D03]") ; abovebase sign | 195 ("b" . "[\u0D62-\u0D63]") ; belowbase matra |
174 ("H" . "്") ; virama sign | 196 ("a" . "[\u0D02-\u0D03]") ; abovebase sign |
175 ("N" . "\u200D") ; ZWJ | 197 ("H" . "\u0D4D") ; virama sign |
176 ("J" . "\u200C") ; ZWNJ | 198 ("N" . "\u200D") ; ZWJ |
177 ("X" . "[\u0D00-\u0D7F]")))) ; all coverage | 199 ("J" . "\u200C") ; ZWNJ |
200 ("X" . "[\u0D00-\u0D7F]")))) ; all coverage | |
178 (indian-compose-regexp | 201 (indian-compose-regexp |
179 (concat | 202 (concat |
180 ;; consonant-based syllables | 203 ;; syllables with an independent vowel, or |
181 "\\(CJ?HJ?\\)*C\\(H[NJ]?\\|m?b?p?a?\\)\\|" | 204 "V\\(?:J?HC\\)?m?b?p?a?\\|" |
182 ;; syllables with an independent vowel | 205 ;; consonant-based syllables, or |
183 "V\\(J?HC\\)?m?b?p?a?\\|" | 206 "\\(?:CJ?HJ?\\)\\{0,4\\}C\\(?:H[NJ]?\\|m?b?p?a?\\)\\|" |
184 ;; special consonant form | 207 ;; special consonant form, or |
185 "JHC\\|" | 208 "JHC\\|" |
186 ;; any other singleton characters | 209 ;; any other singleton characters |
187 "X") | 210 "X") |
188 table)) | 211 table)) |
189 "Regexp matching a composable sequence of Malayalam characters.") | 212 "Regexp matching a composable sequence of Malayalam characters.") |
190 | 213 |
191 (let ((script-regexp-alist | 214 (let ((script-regexp-alist |
192 `((devanagari . "[\x900-\x97F\x200C\x200D]+") | 215 `((devanagari . ,devanagari-composable-pattern) |
193 (bengali . "[\x980-\x9FF\x200C\x200D]+") | 216 (bengali . "[\x980-\x9FF\x200C\x200D]+") |
194 (gurmukhi . "[\xA00-\xA7F\x200C\x200D]+") | 217 (gurmukhi . "[\xA00-\xA7F\x200C\x200D]+") |
195 (gujarati . "[\xA80-\xAFF\x200C\x200D]+") | 218 (gujarati . "[\xA80-\xAFF\x200C\x200D]+") |
196 (oriya . "[\xB00-\xB7F\x200C\x200D]+") | 219 (oriya . "[\xB00-\xB7F\x200C\x200D]+") |
197 (tamil . "[\xB80-\xBFF\x200C\x200D]+") | 220 (tamil . "[\xB80-\xBFF\x200C\x200D]+") |