Mercurial > emacs
annotate lisp/language/devan-util.el @ 41971:00a2b39fce69
*** empty log message ***
| author | Richard M. Stallman <rms@gnu.org> |
|---|---|
| date | Tue, 11 Dec 2001 22:56:41 +0000 |
| parents | 402b676048e8 |
| children | 144a811f0590 |
| rev | line source |
|---|---|
| 41469 | 1 ;;; devan-util.el --- Support for composing Devanagari characters |
| 17052 | 2 |
| 41469 | 3 ;; Copyright (C) 2001 Free Software Foundation, Inc. |
| 17052 | 4 |
| 41469 | 5 ;; Maintainer: KAWABATA, Taichi <batta@beige.ocn.ne.jp> |
| 6 ;; Keywords: multilingual, Devanagari | |
| 17052 | 7 |
| 8 ;; This file is part of GNU Emacs. | |
| 9 | |
| 10 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
| 11 ;; it under the terms of the GNU General Public License as published by | |
| 12 ;; the Free Software Foundation; either version 2, or (at your option) | |
| 13 ;; any later version. | |
| 14 | |
| 15 ;; GNU Emacs is distributed in the hope that it will be useful, | |
| 16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 18 ;; GNU General Public License for more details. | |
| 19 | |
| 20 ;; You should have received a copy of the GNU General Public License | |
|
17314
f438ebf1c679
Fix FSF address in comment.
Kenichi Handa <handa@m17n.org>
parents:
17300
diff
changeset
|
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the |
|
f438ebf1c679
Fix FSF address in comment.
Kenichi Handa <handa@m17n.org>
parents:
17300
diff
changeset
|
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
|
f438ebf1c679
Fix FSF address in comment.
Kenichi Handa <handa@m17n.org>
parents:
17300
diff
changeset
|
23 ;; Boston, MA 02111-1307, USA. |
| 17052 | 24 |
| 41469 | 25 ;; Created: Feb. 17. 2001 |
| 26 | |
| 17052 | 27 ;;; Commentary: |
| 28 | |
| 41469 | 29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and |
| 30 ;; composition of Devanagari script characters. | |
| 17052 | 31 |
| 32 ;;; Code: | |
| 33 | |
| 41599 | 34 ;;;###autoload |
| 26894 | 35 |
| 41469 | 36 ;; Devanagari Composable Pattern |
| 37 ;; C .. Consonants | |
| 38 ;; V .. Vowel | |
| 39 ;; H .. Halant | |
| 40 ;; M .. Matra | |
| 41 ;; V .. Vowel | |
| 42 ;; A .. Anuswar | |
| 43 ;; D .. Chandrabindu | |
| 44 ;; (N .. Zerowidth Non Joiner) | |
| 45 ;; (J .. Zerowidth Joiner. ) | |
| 46 ;; 1. vowel | |
| 47 ;; V(A/D)? | |
| 48 ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya) | |
| 41602 | 49 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? |
| 17052 | 50 |
| 41469 | 51 (defconst devanagari-consonant |
| 52 "[$,15U(B-$,15y68(B-$,16?(B]") | |
| 17052 | 53 |
| 41469 | 54 (defconst devanagari-composable-pattern |
| 55 (concat | |
| 56 "\\([$,15E(B-$,15T6@6A(B][$,15A5B(B]?\\)\\|$,15C(B" | |
| 57 "\\|\\(" | |
| 58 "\\(?:\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?" | |
| 59 "[$,15U(B-$,15y68(B-$,16?(B]\\(?:$,16-(B\\|[$,15~(B-$,16-6B6C(B]?[$,15B5A(B]?\\)?" | |
| 60 "\\)") | |
| 61 "Regexp matching a composable sequence of Devanagari characters.") | |
| 17052 | 62 |
| 41469 | 63 (defun devanagari-compose-region (from to) |
|
17300
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17087
diff
changeset
|
64 (interactive "r") |
| 26894 | 65 (save-excursion |
| 66 (save-restriction | |
| 67 (narrow-to-region from to) | |
| 68 (goto-char (point-min)) | |
| 41469 | 69 (while (re-search-forward devanagari-composable-pattern nil t) |
| 70 (devanagari-compose-syllable-region (match-beginning 0) | |
| 71 (match-end 0)))))) | |
| 72 (defun devanagari-compose-string (string) | |
| 73 (with-temp-buffer | |
| 74 (insert (decompose-string string)) | |
| 75 (devanagari-compose-region (point-min) (point-max)) | |
| 76 (buffer-string))) | |
| 77 | |
| 78 (defun range (from to) | |
| 79 "Make the list of the integers of range FROM to TO." | |
| 80 (let (result) | |
| 81 (while (<= from to) (setq result (cons to result) to (1- to))) result)) | |
| 82 | |
| 83 (defun regexp-of-hashtbl-keys (hashtbl) | |
| 84 "Returns the regular expression of hashtable keys." | |
| 85 (let ((max-specpdl-size 1000)) | |
| 86 (regexp-opt | |
| 87 (sort | |
| 88 (let (dummy) | |
| 89 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) | |
| 90 dummy) | |
| 91 (function (lambda (x y) (> (length x) (length y)))))))) | |
| 92 | |
| 93 (defun devanagari-composition-function (from to pattern &optional string) | |
| 94 "Compose Devanagari characters in REGION, or STRING if specified. | |
| 95 Assume that the REGION or STRING must fully match the composable | |
| 96 PATTERN regexp." | |
| 97 (if string (devanagari-compose-syllable-string string) | |
| 98 (devanagari-compose-syllable-region from to)) | |
| 99 (- to from)) | |
| 100 | |
| 101 ;; Register a function to compose Devanagari characters. | |
| 102 (mapc | |
| 103 (function (lambda (ucs) | |
| 104 (aset composition-function-table (decode-char 'ucs ucs) | |
| 105 (list (cons devanagari-composable-pattern | |
| 106 'devanagari-composition-function))))) | |
| 107 (nconc '(#x0903) (range #x0905 #x0939) (range #x0958 #x0961))) | |
| 108 | |
| 109 ;; Notes on conversion steps. | |
| 17052 | 110 |
| 41469 | 111 ;; 1. chars to glyphs |
| 112 ;; | |
| 113 ;; Rules will not be applied to the halant appeared at the end of the | |
| 114 ;; text. Also, the preceding/following "r" will be treated as special case. | |
| 115 | |
| 116 ;; 2. glyphs reordering. | |
| 117 ;; | |
| 118 ;; The glyphs are split by halant, and each glyph groups are | |
| 119 ;; re-ordered in the following order. | |
| 120 ;; | |
| 121 ;; Note that `consonant-glyph' mentioned here does not contain the | |
| 122 ;; vertical bar (right modifier) attached at the right of the | |
| 123 ;; consonant. | |
| 124 ;; | |
| 125 ;; If the glyph-group contains right modifier, | |
| 126 ;; (1) consonant-glyphs/vowels, with nukta sign | |
| 127 ;; (2) spacing | |
| 128 ;; (3) right modifier (may be matra) | |
| 129 ;; (4) top matra | |
| 130 ;; (5) preceding "r" | |
| 131 ;; (6) anuswar | |
| 132 ;; (7) following "r" | |
| 133 ;; (8) bottom matra or halant. | |
| 134 ;; | |
| 135 ;; Otherwise, | |
| 136 ;; (1) consonant-glyph/vowels, with nukta sign | |
| 137 ;; (3) left matra | |
| 138 ;; (4) top matra | |
| 139 ;; (5) preceding "r" | |
| 140 ;; (6) anuswar | |
| 141 ;; (7) following "r" | |
| 142 ;; (8) bottom matra or halant. | |
| 143 ;; (2) spacing | |
| 144 | |
| 145 ;; 3. glyph to glyph | |
| 146 ;; | |
| 147 ;; For better display, some glyph display would be tuned. | |
| 148 | |
| 149 ;; 4. Composition. | |
| 150 ;; | |
| 151 ;; left modifiers will be attached at the left. | |
| 152 ;; others will be attached right. | |
| 153 | |
| 154 ;; Problem:: | |
| 155 ;; Can we generalize this methods to other Indian scripts? | |
| 17052 | 156 |
| 41523 | 157 (defvar dev-char-glyph |
| 158 '(("$,15E(B" . "$,4 K(B") | |
| 41599 | 159 ("$,15F(B" . "$,4 K")(B") |
| 160 ("$,15~(B" . "$,4")(B") | |
| 41523 | 161 ("$,15G(B" . "$,4 \(B") |
| 41599 | 162 ("$,15(B" . "$,4"*(B") |
| 163 ("$,155A(B" . "$,4"*(B\$,4"&(B") | |
| 41523 | 164 ("$,15H(B" . "$,4 \"'(B") |
| 41599 | 165 ("$,15H5A(B" . "$,4 \"'"&(B") |
| 166 ("$,16 (B" . "$,4"2(B") | |
| 167 ("$,16 5A(B" . "$,4"2"&(B") | |
| 41523 | 168 ("$,15I(B" . "$,4 ](B") |
| 41599 | 169 ("$,16!(B" . "$,4"6(B") |
| 170 ("$,15J(B" . "$,4 ^"P(B") | |
| 171 ("$,16"(B" . "$,4":(B") | |
| 172 ("$,15K(B" . "$,4 `"Q(B") | |
| 173 ("$,16#(B" . "$,4">(B") | |
| 41523 | 174 ;;("$,15L(B" . nil) ; not implemented. |
| 41599 | 175 ("$,16$(B" . "$,4"?(B") |
| 176 ("$,15M(B" . "$,4 b"L(B") | |
| 177 ("$,15M5A(B" . "$,4 b"$(B") | |
| 178 ("$,15M5B(B" . "$,4 b"$(B") | |
| 179 ("$,16%(B" . "\$,4"L(B") | |
| 180 ("$,15N(B" . "$,4 b"@(B") | |
| 181 ("$,15N5A(B" . "$,4 b"@"&(B") | |
| 182 ("$,16&(B" . "\$,4"@(B") | |
| 183 ("$,16&5A(B" . "\$,4"@(B\$,4"&(B") | |
| 41523 | 184 ("$,15O(B" . "$,4 b(B") |
| 41599 | 185 ("$,16'(B" . "\$,4"D(B") |
| 186 ("$,16'5A(B" . "\$,4"D(B\$,4"&(B") | |
| 187 ("$,15P(B" . "$,4 b"D(B") | |
| 188 ("$,15P5A(B" . "$,4 b"D"&(B") | |
| 189 ("$,16((B" . "\$,4"H(B") | |
| 190 ("$,16(5A(B" . "\$,4"H(B\$,4"&(B") | |
| 191 ("$,15Q(B" . "$,4 K")"L(B") ;; special rule for reodering. | |
| 192 ("$,15Q5A(B" . "$,4 K")"$(B") | |
| 193 ("$,15Q5B(B" . "$,4 K")"$(B") | |
| 194 ("$,16)(B" . "\$,4")"L(B") | |
| 195 ("$,16)5A(B" . "\$,4")"$(B") | |
| 196 ("$,16)5B(B" . "\$,4")"$(B") | |
| 197 ("$,15R(B" . "$,4 K")"@(B") | |
| 198 ("$,15R5A(B" . "$,4 K")"@"&(B") | |
| 199 ("$,16*(B" . "\$,4")"@(B") | |
| 200 ("$,16*5A(B" . "\$,4")"@"&(B") | |
| 201 ("$,15S(B" . "$,4 K")"D(B") | |
| 202 ("$,15S5A(B" . "$,4 K")"D"&(B") | |
| 203 ("$,16+(B" . "\$,4")"D(B") | |
| 204 ("$,16+5A(B" . "\$,4")"D"&(B") | |
| 205 ("$,15T(B" . "$,4 K")"H(B") | |
| 206 ("$,15T5A(B" . "$,4 K")"H"&(B") | |
| 207 ("$,16,(B" . "\$,4")"H(B") | |
| 208 ("$,16,5A(B" . "\$,4")"H"&(B") | |
| 209 ("$,16@(B" . "$,4 a"Q(B") | |
| 41523 | 210 ;;("$,16B(B" . nil) |
| 211 ;;("$,16A(B" . nil) | |
| 212 ;;("$,16C(B" . nil) | |
| 41469 | 213 |
| 41523 | 214 ;; GRUTTALS |
| 41599 | 215 ("$,15U(B" . "$,4 e"R(B") |
| 41523 | 216 ("$,15U6-(B" . "$,4 c(B") |
| 41599 | 217 ("$,15U6-5p(B" . "$,4 g"R(B") |
| 218 ("$,15U6-5d(B" . "$,4 h"R(B") | |
| 219 ("$,15U6-5w(B" . "$,4 i")(B") | |
| 41523 | 220 ("$,15U6-5w6-(B" . "$,4 i(B") |
| 41469 | 221 |
| 41599 | 222 ("$,15V(B" . "$,4 j")(B") |
| 41523 | 223 ("$,15V6-(B" . "$,4 j(B") |
| 41599 | 224 ("$,15V6-5p(B" . "$,4 l")(B") |
| 41523 | 225 ("$,15V6-5p6-(B" . "$,4 l(B") |
| 41469 | 226 |
| 41599 | 227 ("$,15W(B" . "$,4 m")(B") |
| 41523 | 228 ("$,15W6-(B" . "$,4 m(B") |
| 41599 | 229 ("$,15W6-5p(B" . "$,4 o")(B") |
| 41523 | 230 ("$,15W6-5p6-(B" . "$,4 o(B") |
| 41469 | 231 |
| 41599 | 232 ("$,15X(B" . "$,4 p")(B") |
| 41523 | 233 ("$,15X6-(B" . "$,4 p(B") |
| 41599 | 234 ("$,15X6-5p(B" . "$,4 q")(B") |
| 41523 | 235 ("$,15X6-5p6-(B" . "$,4 q(B") |
| 41469 | 236 |
| 41599 | 237 ("$,15Y(B" . "$,4 r"S(B") |
| 41523 | 238 ;; PALATALS |
| 41599 | 239 ("$,15Z(B" . "$,4 s")(B") |
| 41523 | 240 ("$,15Z6-(B" . "$,4 s(B") |
| 41599 | 241 ("$,15Z6-5p(B" . "$,4 t")(B") |
| 41523 | 242 ("$,15Z6-5p6-(B" . "$,4 t(B") |
| 41469 | 243 |
| 41599 | 244 ("$,15[(B" . "$,4 u"T(B") |
| 41469 | 245 |
| 41599 | 246 ("$,15\(B" . "$,4 v")(B") |
| 41523 | 247 ("$,15\6-(B" . "$,4 v(B") |
| 41599 | 248 ("$,15\6-5p(B" . "$,4 x")(B") |
| 41523 | 249 ("$,15\6-5p6-(B" . "$,4 x(B") |
| 41599 | 250 ("$,15\6-5^(B" . "$,4 y")(B") |
| 41523 | 251 ("$,15\6-5^6-(B" . "$,4 y(B") |
| 17052 | 252 |
| 41599 | 253 ("$,15](B" . "$,4 z")(B") |
| 41523 | 254 ("$,15]6-(B" . "$,4 z(B") |
| 41599 | 255 ("$,15]6-5p(B" . "$,4 {")(B") |
| 41523 | 256 ("$,15]6-5p6-(B" . "$,4 {(B") |
| 41469 | 257 |
| 41599 | 258 ("$,15^(B" . "$,4 |")(B") |
| 41523 | 259 ("$,15^6-(B" . "$,4 |(B") |
| 260 ;; CEREBRALS | |
| 41599 | 261 ("$,15_(B" . "$,4 }"U(B") |
| 262 ("$,15_6-5_(B" . "$,4 ~"U(B") | |
| 263 ("$,15_6-5`(B" . "$,4 "U(B") | |
| 41469 | 264 |
| 41599 | 265 ("$,15`(B" . "$,4! "V(B") |
| 266 ("$,15`6-5`(B" . "$,4!!"V(B") | |
| 41469 | 267 |
| 41599 | 268 ("$,15a(B" . "$,4!""W(B") |
| 269 ("$,15a6-5a(B" . "$,4!$"W(B") | |
| 270 ("$,15a6-5b(B" . "$,4!%"W(B") | |
| 41469 | 271 |
| 41599 | 272 ("$,15b(B" . "$,4!&"X(B") |
| 41469 | 273 |
| 41599 | 274 ("$,15c(B" . "$,4!(")(B") |
| 41523 | 275 ("$,15c6-(B" . "$,4!((B") |
| 276 ;; DENTALS | |
| 41599 | 277 ("$,15d(B" . "$,4!)")(B") |
| 41523 | 278 ("$,15d6-(B" . "$,4!)(B") |
| 41599 | 279 ("$,15d6-5p(B" . "$,4!*")(B") |
| 41523 | 280 ("$,15d6-5p6-(B" . "$,4!*(B") |
| 41599 | 281 ("$,15d6-5d(B" . "$,4!+")(B") |
| 41523 | 282 ("$,15d6-5d6-(B" . "$,4!+(B") |
| 41469 | 283 |
| 41599 | 284 ("$,15e(B" . "$,4!,")(B") |
| 41523 | 285 ("$,15e6-(B" . "$,4!,(B") |
| 41599 | 286 ("$,15e6-5p(B" . "$,4!-")(B") |
| 41523 | 287 ("$,15e6-5p6-(B" . "$,4!-(B") |
| 41469 | 288 |
| 41599 | 289 ("$,15f(B" . "$,4!."Y(B") |
| 290 ("$,15f6#(B" . "$,4!/"Y(B") | |
| 291 ("$,15f6-5p(B" . "$,4!0"Y(B") | |
| 292 ("$,15f6-5f(B" . "$,4!1"Y(B") | |
| 293 ("$,15f6-5g(B" . "$,4!2"Y(B") | |
| 41523 | 294 ("$,15f6-5n(B" . "$,4!3(B") |
| 295 ("$,15f6-5o(B" . "$,4!4(B") | |
| 41599 | 296 ("$,15f6-5u(B" . "$,4!5"Y(B") |
|
17773
f1ece95d00c2
(in-is13194-devanagari-post-read-conversion): New function.
Kenichi Handa <handa@m17n.org>
parents:
17314
diff
changeset
|
297 |
| 41599 | 298 ("$,15g(B" . "$,4!6")(B") |
| 41523 | 299 ("$,15g6-(B" . "$,4!6(B") |
| 41599 | 300 ("$,15g6-5p(B" . "$,4!7")(B") |
| 41523 | 301 ("$,15g6-5p6-(B" . "$,4!7(B") |
| 41469 | 302 |
| 41599 | 303 ("$,15h(B" . "$,4!8")(B") |
| 41523 | 304 ("$,15h6-(B" . "$,4!8(B") |
| 41599 | 305 ("$,15h6-5p(B" . "$,4!9")(B") |
| 306 ("$,15h6-5p6-(B" . "$,4!9")(B") | |
| 307 ("$,15h6-5h(B" . "$,4!:")(B") | |
| 41523 | 308 ("$,15h6-5h6-(B" . "$,4!:(B") |
| 41469 | 309 |
| 41599 | 310 ("$,15i(B" . "$,4!8"#")(B") |
| 41523 | 311 ;; LABIALS |
| 41599 | 312 ("$,15j(B" . "$,4!;")(B") |
| 41523 | 313 ("$,15j6-(B" . "$,4!;(B") |
| 41599 | 314 ("$,15j6-5p(B" . "$,4!<")(B") |
| 41523 | 315 ("$,15j6-5p6-(B" . "$,4!<(B") |
| 41469 | 316 |
| 41599 | 317 ("$,15k(B" . "$,4!a"[(B") |
| 41523 | 318 ("$,15k6-(B" . "$,4!=(B") |
| 41599 | 319 ("$,15k6-5p(B" . "$,4!c"[(B") |
| 41469 | 320 |
| 41599 | 321 ("$,15l(B" . "$,4!d")(B") |
| 41523 | 322 ("$,15l6-(B" . "$,4!d(B") |
| 41599 | 323 ("$,15l6-5p(B" . "$,4!e")(B") |
| 41523 | 324 ("$,15l6-5p6-(B" . "$,4!e(B") |
| 41469 | 325 |
| 41599 | 326 ("$,15m(B" . "$,4!f")(B") |
| 41523 | 327 ("$,15m6-(B" . "$,4!f(B") |
| 41599 | 328 ("$,15m6-5p(B" . "$,4!g")(B") |
| 41523 | 329 ("$,15m6-5p6-(B" . "$,4!g(B") |
| 41469 | 330 |
| 41599 | 331 ("$,15n(B" . "$,4!h")(B") |
| 41523 | 332 ("$,15n6-(B" . "$,4!h(B") |
| 41599 | 333 ("$,15n6-5p(B" . "$,4!i")(B") |
| 41523 | 334 ("$,15n6-5p6-(B" . "$,4!i(B") |
| 335 ;; SEMIVOWELS | |
| 41599 | 336 ("$,15o(B" . "$,4!j")(B") |
| 41523 | 337 ("$,15o6-(B" . "$,4!j(B") |
| 41599 | 338 ("$,15o6-5p(B" . "$,4!k")(B") |
| 41523 | 339 ("$,15o6-5p6-(B" . "$,4!k(B") |
| 340 ("$,16-5o(B" . "$,4!l(B") ;; when every ohter lig. fails. | |
| 41469 | 341 |
| 41599 | 342 ("$,15p(B" . "$,4!n"W(B") |
| 41523 | 343 ;; ("$,15p6-(B" . "\$,4"'(B") ;; special case. only the topmost pos. |
| 41599 | 344 ("$,15q(B" . "$,4!n"#"W(B") |
| 41523 | 345 ("$,15q6-(B" . "$,4!m(B") ;; IS 13194 speical rule. |
| 41599 | 346 ("$,15p6!(B" . "$,4!o"[(B") |
| 347 ("$,15p6"(B" . "$,4!p"\(B") | |
| 17052 | 348 |
| 41599 | 349 ("$,15r(B" . "$,4!q")(B") |
| 41523 | 350 ("$,15r6-(B" . "$,4!q(B") |
| 351 ("$,15s(B" . "$,4!s(B") | |
| 352 ("$,15s6-(B" . "$,4!r(B") | |
| 41599 | 353 ("$,15t(B" . "$,4!s"#(B") |
| 354 ("$,15t6-(B" . "$,4!r"#(B") | |
| 41469 | 355 |
| 41599 | 356 ("$,15u(B" . "$,4!t")(B") |
| 41523 | 357 ("$,15u6-(B" . "$,4!t(B") |
| 41599 | 358 ("$,15u6-5p(B" . "$,4!u")(B") |
| 41523 | 359 ("$,15u6-5p6-(B" . "$,4!u(B") |
| 360 ;; SIBILANTS | |
| 41599 | 361 ("$,15v(B" . "$,4!v")(B") |
| 41523 | 362 ("$,15v6-(B" . "$,4!v(B") |
| 41599 | 363 ("$,15v6-5u(B" . "$,4!w")(B") |
| 41523 | 364 ("$,15v6-5u6-(B" . "$,4!w(B") |
| 41599 | 365 ("$,15v6-5p(B" . "$,4!x")(B") |
| 41523 | 366 ("$,15v6-5p6-(B" . "$,4!x(B") |
| 41469 | 367 |
| 41599 | 368 ("$,15w(B" . "$,4!y")(B") |
| 41523 | 369 ("$,15w6-(B" . "$,4!y(B") |
| 41599 | 370 ("$,15x(B" . "$,4!z")(B") |
| 41523 | 371 ("$,15x6-(B" . "$,4!z(B") |
| 41599 | 372 ("$,15x6-5p(B" . "$,4!{")(B") |
| 41523 | 373 ("$,15x6-5p6-(B" . "$,4!{(B") |
|
17300
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17087
diff
changeset
|
374 |
| 41523 | 375 ("$,15y(B" . "$,4!}(B") |
| 376 ("$,15y6-(B" . "$,4!|(B") | |
| 377 ("$,15y6#(B" . "$,4!~(B") | |
| 378 ("$,15y6-5p(B" . "$,4!(B") | |
| 41599 | 379 ("$,15y6-5n(B" . "$,4" (B") |
| 380 ("$,15y6-5o(B" . "$,4"!(B") | |
| 41523 | 381 ;; NUKTAS |
| 41599 | 382 ("$,168(B" . "$,4 f"R"S(B") |
| 41523 | 383 ("$,1686-(B" . "$,4 d(B") |
| 41599 | 384 ("$,169(B" . "$,4 k")(B") |
| 41523 | 385 ("$,1696-(B" . "$,4 k(B") |
| 41599 | 386 ("$,16:(B" . "$,4 n")(B") |
| 41523 | 387 ("$,16:6-(B" . "$,4 n(B") |
| 41599 | 388 ("$,16;(B" . "$,4 w")(B") |
| 41523 | 389 ("$,16;6-(B" . "$,4 w(B") |
| 41599 | 390 ("$,16<(B" . "$,4!#"W(B") |
| 391 ("$,16=(B" . "$,4!'"X(B") | |
| 392 ("$,16>(B" . "$,4!b"[(B") | |
| 41523 | 393 ("$,16>6-(B" . "$,4!>(B") |
| 41599 | 394 ("$,16?(B" . "$,4!j"#")(B") |
| 41523 | 395 ;; misc modifiers. |
| 41599 | 396 ("$,15A(B" . "\$,4"$(B") |
| 397 ("$,15B(B" . "\$,4"&(B") | |
| 41523 | 398 ("$,15C(B" . "$,4 F(B") |
| 41599 | 399 ("$,15|(B" . "$,4"#(B") |
| 41523 | 400 ("$,15}(B" . "$,4 E(B") |
| 41599 | 401 ("$,16-(B" . "$,4""(B") |
| 402 ("$,16-5p(B" . "$,4"%(B") ;; following "r" | |
| 41523 | 403 ;; ("$,160(B" . "$,4 D(B") |
| 404 ;; ("$,16D(B" . "$,4 J(B") | |
| 405 ;; ("$,16F(B" . "") | |
| 406 ;; ("$,16G(B" . "") | |
| 407 ;; ("$,16H(B" . "") | |
| 408 ;; ("$,16I(B" . "") | |
| 409 ;; ("$,16J(B" . "") | |
| 410 ;; ("$,16K(B" . "") | |
| 411 ;; ("$,16L(B" . "") | |
| 412 ;; ("$,16M(B" . "") | |
| 413 ;; ("$,16N(B" . "") | |
| 414 ;; ("$,16O(B" . "") | |
| 415 ) | |
| 416 "Devanagari characters to glyphs conversion table. | |
| 417 Default value contains only the basic rules. You may add your own | |
| 418 preferred rule from the sanskrit fonts." ) | |
| 41469 | 419 |
| 420 (defvar dev-char-glyph-hash | |
| 421 (let* ((hash (makehash 'equal))) | |
| 422 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
| 423 dev-char-glyph) | |
| 424 hash)) | |
| 425 | |
| 426 (defvar dev-char-glyph-regexp | |
| 427 (regexp-of-hashtbl-keys dev-char-glyph-hash)) | |
| 428 | |
| 429 ;; glyph-to-glyph conversion table. | |
| 430 ;; it is supposed that glyphs are ordered in | |
| 431 ;; [consonant/nukta] - [matra/halant] - [preceding-r] - [anuswar]. | |
| 432 | |
| 41523 | 433 (defvar dev-glyph-glyph |
| 41599 | 434 '(("\$,4"'(B\$,4"&(B" . "\$,4"((B") |
| 435 ("\$,4"'(B\$,4"$(B" . "\$,4"((B") | |
| 436 ("$,4"*(B\$,4"&(B" . "$,4"+(B") | |
| 437 ("$,4"*(B\$,4"'(B" . "$,4",(B") | |
| 438 ("$,4"*(B\$,4"'(B\$,4"&(B" . "$,4"-(B") | |
| 439 ("$,4"2(B\$,4"&(B" . "$,4"3(B") | |
| 440 ("$,4"2(B\$,4"'(B" . "$,4"4(B") | |
| 441 ("$,4"2(B\$,4"'(B\$,4"&(B" . "$,4"5(B") | |
| 442 ("$,4"#(B\$,4"6(B" . "$,4"7(B") | |
| 443 ("$,4"%(B\$,4"6(B" . "$,4"8(B") | |
| 444 ;;("$,4"6(B" . "$,4"9(B") | |
| 445 ("$,4"#(B\$,4":(B" . "$,4";(B") | |
| 446 ("$,4"%(B\$,4":(B" . "$,4"<(B") | |
| 447 ;;("$,4":(B" . "$,4"=(B") | |
| 448 ("\$,4"@(B\$,4"&(B" . "\$,4"A(B") | |
| 449 ("\$,4"@(B\$,4"'(B" . "\$,4"B(B") | |
| 450 ("\$,4"@(B\$,4"'(B\$,4"&(B" . "\$,4"C(B") | |
| 451 ("\$,4"D(B\$,4"&(B" . "\$,4"E(B") | |
| 452 ("\$,4"D(B\$,4"'(B" . "\$,4"F(B") | |
| 453 ("\$,4"D(B\$,4"'(B\$,4"&(B" . "\$,4"G(B") | |
| 454 ("\$,4"H(B\$,4"&(B" . "\$,4"I(B") | |
| 455 ("\$,4"H(B\$,4"'(B" . "\$,4"J(B") | |
| 456 ("\$,4"H(B\$,4"'(B\$,4"&(B" . "\$,4"K(B") | |
| 457 ("\$,4"L(B\$,4"&(B" . "\$,4"M(B") | |
| 458 ("\$,4"L(B\$,4"'(B" . "\$,4"N(B") | |
| 459 ("\$,4"L(B\$,4"'(B\$,4"&(B" . "\$,4"O(B") | |
| 41523 | 460 )) |
| 41469 | 461 (defvar dev-glyph-glyph-hash |
| 462 (let* ((hash (makehash 'equal))) | |
| 463 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
| 464 dev-glyph-glyph) | |
| 465 hash)) | |
| 466 (defvar dev-glyph-glyph-regexp | |
| 467 (regexp-of-hashtbl-keys dev-glyph-glyph-hash)) | |
| 468 | |
| 469 | |
| 470 ;; yet another glyph-to-glyph conversions. | |
| 41523 | 471 (defvar dev-glyph-glyph-2 |
| 41599 | 472 '(("$,4"*(B" . "$,4".(B") |
| 473 ("$,4"+(B" . "$,4"/(B") | |
| 474 ("$,4",(B" . "$,4"0(B") | |
| 475 ("$,4"-(B" . "$,4"1(B"))) | |
| 41469 | 476 (defvar dev-glyph-glyph-2-hash |
| 477 (let* ((hash (makehash 'equal))) | |
| 478 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
| 479 dev-glyph-glyph-2) | |
| 480 hash)) | |
| 481 (defvar dev-glyph-glyph-2-regexp | |
| 482 (regexp-of-hashtbl-keys dev-glyph-glyph-2-hash)) | |
| 483 | |
| 484 | |
| 485 (defun dev-charseq (from &optional to) | |
| 486 (if (null to) (setq to from)) | |
| 487 (mapcar (function (lambda (x) (indian-glyph-char x 'devanagari))) | |
| 488 (range from to))) | |
|
17300
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17087
diff
changeset
|
489 |
| 41469 | 490 (defvar dev-glyph-cvn |
| 491 (append | |
| 492 (dev-charseq #x2b) | |
| 493 (dev-charseq #x3c #xc1) | |
| 494 (dev-charseq #xc3)) | |
| 495 "Devanagari Consonants/Vowels/Nukta Glyphs") | |
| 496 | |
| 497 (defvar dev-glyph-space | |
| 498 (dev-charseq #xf0 #xfe) | |
| 499 "Devanagari Spacing Glyphs") | |
| 500 | |
| 501 (defvar dev-glyph-right-modifier | |
| 502 (append | |
| 503 (dev-charseq #xc9) | |
| 504 (dev-charseq #xd2 #xd5)) | |
| 505 "Devanagari Modifiers attached at the right side.") | |
| 506 | |
| 507 (defvar dev-glyph-right-modifier-regexp | |
| 508 (concat "[" dev-glyph-right-modifier "]")) | |
| 509 | |
| 510 (defvar dev-glyph-left-matra | |
| 511 (dev-charseq #xca #xd1) | |
| 512 "Devanagari Matras attached at the left side.") | |
| 513 | |
| 514 (defvar dev-glyph-top-matra | |
| 515 (dev-charseq #xe0 #xef) | |
| 516 "Devanagari Matras attached at the top side.") | |
| 517 | |
| 518 (defvar dev-glyph-bottom-modifier | |
| 519 (append | |
| 520 (dev-charseq #xd6 #xdf) | |
| 521 (dev-charseq #xc2)) | |
| 522 "Devanagari Modifiers attached at the bottom.") | |
| 523 | |
| 524 (defvar dev-glyph-order | |
| 525 `((,dev-glyph-cvn . 1) | |
| 526 (,dev-glyph-space . 2) | |
| 527 (,dev-glyph-right-modifier . 3) | |
| 528 (,dev-glyph-left-matra . 3) ;; processed by reference point. | |
| 529 (,dev-glyph-top-matra . 4) | |
| 530 (,(dev-charseq #xc7 #xc8) . 5) | |
| 531 (,(dev-charseq #xc6) . 6) | |
| 532 (,(dev-charseq #xc5) . 7) | |
| 533 (,dev-glyph-bottom-modifier . 8))) | |
| 534 | |
| 535 (mapc | |
| 536 (function (lambda (x) | |
| 537 (mapc | |
| 538 (function (lambda (y) | |
| 539 (put-char-code-property y 'composition-order (cdr x)))) | |
| 540 (car x)))) | |
| 541 dev-glyph-order) | |
| 542 | |
| 543 (mapc | |
| 544 (function (lambda (x) | |
| 545 (put-char-code-property x 'reference-point '(3 . 5)))) | |
| 546 dev-glyph-left-matra) | |
| 547 | |
| 548 (defun devanagari-compose-syllable-string (string) | |
| 549 (with-temp-buffer | |
| 550 (insert (decompose-string string)) | |
| 551 (devanagari-compose-syllable-region (point-min) (point-max)) | |
| 552 (buffer-string))) | |
|
17300
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17087
diff
changeset
|
553 |
| 41523 | 554 (defun devanagari-compose-syllable-region (from to) |
| 555 "Compose devanagari syllable in region FROM to TO." | |
| 556 (let ((glyph-str nil) (cons-num 0) glyph-str-list | |
| 557 (last-halant nil) (preceding-r nil) (last-modifier nil) | |
| 558 (last-char (char-before to)) match-str | |
| 559 glyph-block split-pos) | |
| 560 (save-excursion | |
| 561 (save-restriction | |
| 562 ;;; *** char-to-glyph conversion *** | |
| 563 ;; Special rule 1. -- Last halant must be preserved. | |
| 564 (if (eq last-char ?$,16-(B) | |
| 565 (progn | |
| 566 (setq last-halant t) | |
| 567 (narrow-to-region from (1- to))) | |
| 568 (narrow-to-region from to) | |
| 569 ;; note if the last char is modifier. | |
| 570 (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B)) | |
| 571 (setq last-modifier t))) | |
| 572 (goto-char (point-min)) | |
| 573 ;; Special rule 2. -- preceding "r halant" must be modifier. | |
| 574 (when (looking-at "$,15p6-(B.") | |
| 575 (setq preceding-r t) | |
| 576 (goto-char (+ 2 (point)))) | |
| 577 ;; translate the rest characters into glyphs | |
| 578 (while (re-search-forward dev-char-glyph-regexp nil t) | |
| 579 (setq match-str (match-string 0)) | |
| 580 (setq glyph-str | |
| 581 (concat glyph-str | |
| 582 (gethash match-str dev-char-glyph-hash))) | |
| 583 ;; count the number of consonant-glyhs. | |
| 584 (if (string-match devanagari-consonant match-str) | |
| 585 (setq cons-num (1+ cons-num)))) | |
| 586 ;; preceding-r must be attached before the anuswar if exists. | |
| 587 (if preceding-r | |
| 588 (if last-modifier | |
| 589 (setq glyph-str (concat (substring glyph-str 0 -1) | |
| 41599 | 590 "$,4"'(B" (substring glyph-str -1))) |
| 591 (setq glyph-str (concat glyph-str "$,4"'(B")))) | |
| 592 (if last-halant (setq glyph-str (concat glyph-str "$,4""(B"))) | |
| 41523 | 593 ;;; *** glyph-to-glyph conversion *** |
| 594 (when (string-match dev-glyph-glyph-regexp glyph-str) | |
| 595 (setq glyph-str | |
| 596 (replace-match (gethash (match-string 0 glyph-str) | |
| 597 dev-glyph-glyph-hash) | |
| 598 nil t glyph-str)) | |
| 599 (if (and (> cons-num 1) | |
| 600 (string-match dev-glyph-glyph-2-regexp glyph-str)) | |
| 601 (setq glyph-str | |
| 602 (replace-match (gethash (match-string 0 glyph-str) | |
| 603 dev-glyph-glyph-2-hash) | |
| 604 nil t glyph-str)))) | |
| 605 ;;; *** glyph reordering *** | |
| 41599 | 606 (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str)) |
| 41523 | 607 (setq glyph-block (substring glyph-str 0 (1+ split-pos))) |
| 608 (setq glyph-str (substring glyph-str (1+ split-pos))) | |
| 609 (setq | |
| 610 glyph-block | |
| 611 (if (string-match dev-glyph-right-modifier-regexp glyph-block) | |
| 612 (sort (string-to-list glyph-block) | |
| 613 (function (lambda (x y) | |
| 614 (< (get-char-code-property x 'composition-order) | |
| 615 (get-char-code-property y 'composition-order))))) | |
| 616 (sort (string-to-list glyph-block) | |
| 617 (function (lambda (x y) | |
| 618 (let ((xo (get-char-code-property x 'composition-order)) | |
| 619 (yo (get-char-code-property y 'composition-order))) | |
| 620 (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) | |
| 621 (setq glyph-str-list (nconc glyph-str-list glyph-block))) | |
| 622 ;; concatenate and attach reference-points. | |
| 623 (setq glyph-str | |
| 624 (cdr | |
| 625 (apply | |
| 626 'nconc | |
| 627 (mapcar | |
| 628 (function (lambda (x) | |
| 629 (list | |
| 630 (or (get-char-code-property x 'reference-point) | |
| 631 '(5 . 3) ;; default reference point. | |
| 632 ) | |
| 633 x))) | |
| 634 glyph-str-list)))))) | |
| 635 (compose-region from to glyph-str))) | |
|
17300
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17087
diff
changeset
|
636 |
|
18309
bd8b521f5218
Provide XXX-util instead of
Kenichi Handa <handa@m17n.org>
parents:
18203
diff
changeset
|
637 (provide 'devan-util) |
