41469
|
1 ;;; devan-util.el --- Support for composing Devanagari characters
|
17052
|
2
|
41469
|
3 ;; Copyright (C) 2001 Free Software Foundation, Inc.
|
17052
|
4
|
49704
|
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
|
41469
|
6 ;; Keywords: multilingual, Devanagari
|
17052
|
7
|
|
8 ;; This file is part of GNU Emacs.
|
|
9
|
|
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
11 ;; it under the terms of the GNU General Public License as published by
|
|
12 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
13 ;; any later version.
|
|
14
|
|
15 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
18 ;; GNU General Public License for more details.
|
|
19
|
|
20 ;; You should have received a copy of the GNU General Public License
|
17314
|
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
23 ;; Boston, MA 02111-1307, USA.
|
17052
|
24
|
41469
|
25 ;; Created: Feb. 17. 2001
|
|
26
|
17052
|
27 ;;; Commentary:
|
|
28
|
41469
|
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
|
|
30 ;; composition of Devanagari script characters.
|
17052
|
31
|
|
32 ;;; Code:
|
|
33
|
41599
|
34 ;;;###autoload
|
26894
|
35
|
41469
|
36 ;; Devanagari Composable Pattern
|
|
37 ;; C .. Consonants
|
|
38 ;; V .. Vowel
|
|
39 ;; H .. Halant
|
|
40 ;; M .. Matra
|
|
41 ;; V .. Vowel
|
|
42 ;; A .. Anuswar
|
|
43 ;; D .. Chandrabindu
|
|
44 ;; (N .. Zerowidth Non Joiner)
|
|
45 ;; (J .. Zerowidth Joiner. )
|
|
46 ;; 1. vowel
|
|
47 ;; V(A/D)?
|
|
48 ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
|
41602
|
49 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
|
17052
|
50
|
41469
|
51 (defconst devanagari-consonant
|
|
52 "[$,15U(B-$,15y68(B-$,16?(B]")
|
17052
|
53
|
41469
|
54 (defconst devanagari-composable-pattern
|
49598
|
55 (concat
|
49709
|
56 "\\([$,15E(B-$,15T6@6A(B][$,15A5B(B]?\\)\\|[$,15C6D(B]"
|
41469
|
57 "\\|\\("
|
|
58 "\\(?:\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?"
|
|
59 "[$,15U(B-$,15y68(B-$,16?(B]\\(?:$,16-(B\\|[$,15~(B-$,16-6B6C(B]?[$,15B5A(B]?\\)?"
|
|
60 "\\)")
|
|
61 "Regexp matching a composable sequence of Devanagari characters.")
|
17052
|
62
|
41469
|
63 (defun devanagari-compose-region (from to)
|
17300
|
64 (interactive "r")
|
26894
|
65 (save-excursion
|
|
66 (save-restriction
|
|
67 (narrow-to-region from to)
|
|
68 (goto-char (point-min))
|
41469
|
69 (while (re-search-forward devanagari-composable-pattern nil t)
|
49598
|
70 (devanagari-compose-syllable-region (match-beginning 0)
|
41469
|
71 (match-end 0))))))
|
|
72 (defun devanagari-compose-string (string)
|
|
73 (with-temp-buffer
|
|
74 (insert (decompose-string string))
|
|
75 (devanagari-compose-region (point-min) (point-max))
|
|
76 (buffer-string)))
|
|
77
|
42054
|
78 (defun devanagari-post-read-conversion (len)
|
|
79 (save-excursion
|
|
80 (save-restriction
|
|
81 (let ((buffer-modified-p (buffer-modified-p)))
|
|
82 (narrow-to-region (point) (+ (point) len))
|
|
83 (devanagari-compose-region (point-min) (point-max))
|
|
84 (set-buffer-modified-p buffer-modified-p)
|
|
85 (- (point-max) (point-min))))))
|
|
86
|
42061
|
87 (defun devanagari-range (from to)
|
41469
|
88 "Make the list of the integers of range FROM to TO."
|
49598
|
89 (let (result)
|
41469
|
90 (while (<= from to) (setq result (cons to result) to (1- to))) result))
|
|
91
|
42061
|
92 (defun devanagari-regexp-of-hashtbl-keys (hashtbl)
|
|
93 "Return a regular expression that matches all keys in hashtable HASHTBL."
|
41469
|
94 (let ((max-specpdl-size 1000))
|
|
95 (regexp-opt
|
49598
|
96 (sort
|
41469
|
97 (let (dummy)
|
|
98 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
|
|
99 dummy)
|
|
100 (function (lambda (x y) (> (length x) (length y))))))))
|
|
101
|
|
102 (defun devanagari-composition-function (from to pattern &optional string)
|
|
103 "Compose Devanagari characters in REGION, or STRING if specified.
|
|
104 Assume that the REGION or STRING must fully match the composable
|
|
105 PATTERN regexp."
|
|
106 (if string (devanagari-compose-syllable-string string)
|
|
107 (devanagari-compose-syllable-region from to))
|
|
108 (- to from))
|
|
109
|
|
110 ;; Register a function to compose Devanagari characters.
|
|
111 (mapc
|
|
112 (function (lambda (ucs)
|
|
113 (aset composition-function-table (decode-char 'ucs ucs)
|
49598
|
114 (list (cons devanagari-composable-pattern
|
41469
|
115 'devanagari-composition-function)))))
|
42061
|
116 (nconc '(#x0903) (devanagari-range #x0905 #x0939) (devanagari-range #x0958 #x0961)))
|
41469
|
117
|
|
118 ;; Notes on conversion steps.
|
17052
|
119
|
49598
|
120 ;; 1. chars to glyphs
|
41469
|
121 ;;
|
|
122 ;; Rules will not be applied to the halant appeared at the end of the
|
|
123 ;; text. Also, the preceding/following "r" will be treated as special case.
|
|
124
|
|
125 ;; 2. glyphs reordering.
|
|
126 ;;
|
|
127 ;; The glyphs are split by halant, and each glyph groups are
|
|
128 ;; re-ordered in the following order.
|
|
129 ;;
|
|
130 ;; Note that `consonant-glyph' mentioned here does not contain the
|
|
131 ;; vertical bar (right modifier) attached at the right of the
|
|
132 ;; consonant.
|
49598
|
133 ;;
|
|
134 ;; If the glyph-group contains right modifier,
|
41469
|
135 ;; (1) consonant-glyphs/vowels, with nukta sign
|
|
136 ;; (2) spacing
|
|
137 ;; (3) right modifier (may be matra)
|
|
138 ;; (4) top matra
|
|
139 ;; (5) preceding "r"
|
|
140 ;; (6) anuswar
|
|
141 ;; (7) following "r"
|
|
142 ;; (8) bottom matra or halant.
|
49598
|
143 ;;
|
|
144 ;; Otherwise,
|
41469
|
145 ;; (1) consonant-glyph/vowels, with nukta sign
|
|
146 ;; (3) left matra
|
49598
|
147 ;; (4) top matra
|
41469
|
148 ;; (5) preceding "r"
|
|
149 ;; (6) anuswar
|
|
150 ;; (7) following "r"
|
|
151 ;; (8) bottom matra or halant.
|
|
152 ;; (2) spacing
|
|
153
|
|
154 ;; 3. glyph to glyph
|
|
155 ;;
|
|
156 ;; For better display, some glyph display would be tuned.
|
|
157
|
|
158 ;; 4. Composition.
|
|
159 ;;
|
|
160 ;; left modifiers will be attached at the left.
|
|
161 ;; others will be attached right.
|
|
162
|
|
163 ;; Problem::
|
|
164 ;; Can we generalize this methods to other Indian scripts?
|
17052
|
165
|
41523
|
166 (defvar dev-char-glyph
|
|
167 '(("$,15E(B" . "$,4 K(B")
|
41599
|
168 ("$,15F(B" . "$,4 K")(B")
|
|
169 ("$,15~(B" . "$,4")(B")
|
41523
|
170 ("$,15G(B" . "$,4 \(B")
|
41599
|
171 ("$,15(B" . "$,4"*(B")
|
|
172 ("$,155A(B" . "$,4"*(B\$,4"&(B")
|
41523
|
173 ("$,15H(B" . "$,4 \"'(B")
|
41599
|
174 ("$,15H5A(B" . "$,4 \"'"&(B")
|
|
175 ("$,16 (B" . "$,4"2(B")
|
|
176 ("$,16 5A(B" . "$,4"2"&(B")
|
41523
|
177 ("$,15I(B" . "$,4 ](B")
|
41599
|
178 ("$,16!(B" . "$,4"6(B")
|
|
179 ("$,15J(B" . "$,4 ^"P(B")
|
|
180 ("$,16"(B" . "$,4":(B")
|
|
181 ("$,15K(B" . "$,4 `"Q(B")
|
|
182 ("$,16#(B" . "$,4">(B")
|
41523
|
183 ;;("$,15L(B" . nil) ; not implemented.
|
41599
|
184 ("$,16$(B" . "$,4"?(B")
|
|
185 ("$,15M(B" . "$,4 b"L(B")
|
|
186 ("$,15M5A(B" . "$,4 b"$(B")
|
|
187 ("$,15M5B(B" . "$,4 b"$(B")
|
|
188 ("$,16%(B" . "\$,4"L(B")
|
49598
|
189 ("$,15N(B" . "$,4 b"@(B")
|
|
190 ("$,15N5A(B" . "$,4 b"@"&(B")
|
41599
|
191 ("$,16&(B" . "\$,4"@(B")
|
|
192 ("$,16&5A(B" . "\$,4"@(B\$,4"&(B")
|
49598
|
193 ("$,15O(B" . "$,4 b(B")
|
41599
|
194 ("$,16'(B" . "\$,4"D(B")
|
|
195 ("$,16'5A(B" . "\$,4"D(B\$,4"&(B")
|
49598
|
196 ("$,15P(B" . "$,4 b"D(B")
|
|
197 ("$,15P5A(B" . "$,4 b"D"&(B")
|
41599
|
198 ("$,16((B" . "\$,4"H(B")
|
|
199 ("$,16(5A(B" . "\$,4"H(B\$,4"&(B")
|
|
200 ("$,15Q(B" . "$,4 K")"L(B") ;; special rule for reodering.
|
|
201 ("$,15Q5A(B" . "$,4 K")"$(B")
|
|
202 ("$,15Q5B(B" . "$,4 K")"$(B")
|
|
203 ("$,16)(B" . "\$,4")"L(B")
|
|
204 ("$,16)5A(B" . "\$,4")"$(B")
|
|
205 ("$,16)5B(B" . "\$,4")"$(B")
|
49598
|
206 ("$,15R(B" . "$,4 K")"@(B")
|
|
207 ("$,15R5A(B" . "$,4 K")"@"&(B")
|
41599
|
208 ("$,16*(B" . "\$,4")"@(B")
|
|
209 ("$,16*5A(B" . "\$,4")"@"&(B")
|
|
210 ("$,15S(B" . "$,4 K")"D(B")
|
|
211 ("$,15S5A(B" . "$,4 K")"D"&(B")
|
|
212 ("$,16+(B" . "\$,4")"D(B")
|
|
213 ("$,16+5A(B" . "\$,4")"D"&(B")
|
49598
|
214 ("$,15T(B" . "$,4 K")"H(B")
|
|
215 ("$,15T5A(B" . "$,4 K")"H"&(B")
|
41599
|
216 ("$,16,(B" . "\$,4")"H(B")
|
|
217 ("$,16,5A(B" . "\$,4")"H"&(B")
|
49598
|
218 ("$,16@(B" . "$,4 a"Q(B")
|
41523
|
219 ;;("$,16B(B" . nil)
|
49598
|
220 ;;("$,16A(B" . nil)
|
41523
|
221 ;;("$,16C(B" . nil)
|
41469
|
222
|
41523
|
223 ;; GRUTTALS
|
41599
|
224 ("$,15U(B" . "$,4 e"R(B")
|
41523
|
225 ("$,15U6-(B" . "$,4 c(B")
|
41599
|
226 ("$,15U6-5p(B" . "$,4 g"R(B")
|
|
227 ("$,15U6-5d(B" . "$,4 h"R(B")
|
|
228 ("$,15U6-5w(B" . "$,4 i")(B")
|
41523
|
229 ("$,15U6-5w6-(B" . "$,4 i(B")
|
41469
|
230
|
41599
|
231 ("$,15V(B" . "$,4 j")(B")
|
41523
|
232 ("$,15V6-(B" . "$,4 j(B")
|
41599
|
233 ("$,15V6-5p(B" . "$,4 l")(B")
|
41523
|
234 ("$,15V6-5p6-(B" . "$,4 l(B")
|
41469
|
235
|
49598
|
236 ("$,15W(B" . "$,4 m")(B")
|
|
237 ("$,15W6-(B" . "$,4 m(B")
|
41599
|
238 ("$,15W6-5p(B" . "$,4 o")(B")
|
41523
|
239 ("$,15W6-5p6-(B" . "$,4 o(B")
|
41469
|
240
|
49598
|
241 ("$,15X(B" . "$,4 p")(B")
|
|
242 ("$,15X6-(B" . "$,4 p(B")
|
|
243 ("$,15X6-5p(B" . "$,4 q")(B")
|
|
244 ("$,15X6-5p6-(B" . "$,4 q(B")
|
41469
|
245
|
41599
|
246 ("$,15Y(B" . "$,4 r"S(B")
|
49598
|
247 ;; PALATALS
|
|
248 ("$,15Z(B" . "$,4 s")(B")
|
|
249 ("$,15Z6-(B" . "$,4 s(B")
|
|
250 ("$,15Z6-5p(B" . "$,4 t")(B")
|
41523
|
251 ("$,15Z6-5p6-(B" . "$,4 t(B")
|
41469
|
252
|
49598
|
253 ("$,15[(B" . "$,4 u"T(B")
|
41469
|
254
|
49598
|
255 ("$,15\(B" . "$,4 v")(B")
|
|
256 ("$,15\6-(B" . "$,4 v(B")
|
|
257 ("$,15\6-5p(B" . "$,4 x")(B")
|
|
258 ("$,15\6-5p6-(B" . "$,4 x(B")
|
|
259 ("$,15\6-5^(B" . "$,4 y")(B")
|
|
260 ("$,15\6-5^6-(B" . "$,4 y(B")
|
17052
|
261
|
49598
|
262 ("$,15](B" . "$,4 z")(B")
|
|
263 ("$,15]6-(B" . "$,4 z(B")
|
|
264 ("$,15]6-5p(B" . "$,4 {")(B")
|
|
265 ("$,15]6-5p6-(B" . "$,4 {(B")
|
41469
|
266
|
41599
|
267 ("$,15^(B" . "$,4 |")(B")
|
41523
|
268 ("$,15^6-(B" . "$,4 |(B")
|
49598
|
269 ;; CEREBRALS
|
41599
|
270 ("$,15_(B" . "$,4 }"U(B")
|
|
271 ("$,15_6-5_(B" . "$,4 ~"U(B")
|
|
272 ("$,15_6-5`(B" . "$,4 "U(B")
|
41469
|
273
|
49598
|
274 ("$,15`(B" . "$,4! "V(B")
|
|
275 ("$,15`6-5`(B" . "$,4!!"V(B")
|
41469
|
276
|
49598
|
277 ("$,15a(B" . "$,4!""W(B")
|
|
278 ("$,15a6-5a(B" . "$,4!$"W(B")
|
|
279 ("$,15a6-5b(B" . "$,4!%"W(B")
|
41469
|
280
|
49598
|
281 ("$,15b(B" . "$,4!&"X(B")
|
41469
|
282
|
41599
|
283 ("$,15c(B" . "$,4!(")(B")
|
41523
|
284 ("$,15c6-(B" . "$,4!((B")
|
49598
|
285 ;; DENTALS
|
|
286 ("$,15d(B" . "$,4!)")(B")
|
|
287 ("$,15d6-(B" . "$,4!)(B")
|
|
288 ("$,15d6-5p(B" . "$,4!*")(B")
|
|
289 ("$,15d6-5p6-(B" . "$,4!*(B")
|
|
290 ("$,15d6-5d(B" . "$,4!+")(B")
|
|
291 ("$,15d6-5d6-(B" . "$,4!+(B")
|
41469
|
292
|
49598
|
293 ("$,15e(B" . "$,4!,")(B")
|
|
294 ("$,15e6-(B" . "$,4!,(B")
|
|
295 ("$,15e6-5p(B" . "$,4!-")(B")
|
|
296 ("$,15e6-5p6-(B" . "$,4!-(B")
|
41469
|
297
|
49598
|
298 ("$,15f(B" . "$,4!."Y(B")
|
41599
|
299 ("$,15f6#(B" . "$,4!/"Y(B")
|
|
300 ("$,15f6-5p(B" . "$,4!0"Y(B")
|
|
301 ("$,15f6-5f(B" . "$,4!1"Y(B")
|
|
302 ("$,15f6-5g(B" . "$,4!2"Y(B")
|
41523
|
303 ("$,15f6-5n(B" . "$,4!3(B")
|
|
304 ("$,15f6-5o(B" . "$,4!4(B")
|
41599
|
305 ("$,15f6-5u(B" . "$,4!5"Y(B")
|
17773
|
306
|
49598
|
307 ("$,15g(B" . "$,4!6")(B")
|
|
308 ("$,15g6-(B" . "$,4!6(B")
|
|
309 ("$,15g6-5p(B" . "$,4!7")(B")
|
|
310 ("$,15g6-5p6-(B" . "$,4!7(B")
|
41469
|
311
|
49598
|
312 ("$,15h(B" . "$,4!8")(B")
|
|
313 ("$,15h6-(B" . "$,4!8(B")
|
|
314 ("$,15h6-5p(B" . "$,4!9")(B")
|
|
315 ("$,15h6-5p6-(B" . "$,4!9")(B")
|
|
316 ("$,15h6-5h(B" . "$,4!:")(B")
|
|
317 ("$,15h6-5h6-(B" . "$,4!:(B")
|
41469
|
318
|
41599
|
319 ("$,15i(B" . "$,4!8"#")(B")
|
49598
|
320 ;; LABIALS
|
|
321 ("$,15j(B" . "$,4!;")(B")
|
|
322 ("$,15j6-(B" . "$,4!;(B")
|
|
323 ("$,15j6-5p(B" . "$,4!<")(B")
|
|
324 ("$,15j6-5p6-(B" . "$,4!<(B")
|
41469
|
325
|
49598
|
326 ("$,15k(B" . "$,4!a"[(B")
|
|
327 ("$,15k6-(B" . "$,4!=(B")
|
|
328 ("$,15k6-5p(B" . "$,4!c"[(B")
|
41469
|
329
|
49598
|
330 ("$,15l(B" . "$,4!d")(B")
|
|
331 ("$,15l6-(B" . "$,4!d(B")
|
|
332 ("$,15l6-5p(B" . "$,4!e")(B")
|
|
333 ("$,15l6-5p6-(B" . "$,4!e(B")
|
41469
|
334
|
49598
|
335 ("$,15m(B" . "$,4!f")(B")
|
|
336 ("$,15m6-(B" . "$,4!f(B")
|
|
337 ("$,15m6-5p(B" . "$,4!g")(B")
|
|
338 ("$,15m6-5p6-(B" . "$,4!g(B")
|
41469
|
339
|
41599
|
340 ("$,15n(B" . "$,4!h")(B")
|
41523
|
341 ("$,15n6-(B" . "$,4!h(B")
|
41599
|
342 ("$,15n6-5p(B" . "$,4!i")(B")
|
41523
|
343 ("$,15n6-5p6-(B" . "$,4!i(B")
|
|
344 ;; SEMIVOWELS
|
49598
|
345 ("$,15o(B" . "$,4!j")(B")
|
|
346 ("$,15o6-(B" . "$,4!j(B")
|
|
347 ("$,15o6-5p(B" . "$,4!k")(B")
|
|
348 ("$,15o6-5p6-(B" . "$,4!k(B")
|
41523
|
349 ("$,16-5o(B" . "$,4!l(B") ;; when every ohter lig. fails.
|
41469
|
350
|
49598
|
351 ("$,15p(B" . "$,4!n"W(B")
|
41523
|
352 ;; ("$,15p6-(B" . "\$,4"'(B") ;; special case. only the topmost pos.
|
49598
|
353 ("$,15q(B" . "$,4!n"#"W(B")
|
41523
|
354 ("$,15q6-(B" . "$,4!m(B") ;; IS 13194 speical rule.
|
49598
|
355 ("$,15p6!(B" . "$,4!o"[(B")
|
|
356 ("$,15p6"(B" . "$,4!p"\(B")
|
17052
|
357
|
49598
|
358 ("$,15r(B" . "$,4!q")(B")
|
|
359 ("$,15r6-(B" . "$,4!q(B")
|
|
360 ("$,15s(B" . "$,4!s(B")
|
|
361 ("$,15s6-(B" . "$,4!r(B")
|
41599
|
362 ("$,15t(B" . "$,4!s"#(B")
|
|
363 ("$,15t6-(B" . "$,4!r"#(B")
|
41469
|
364
|
41599
|
365 ("$,15u(B" . "$,4!t")(B")
|
41523
|
366 ("$,15u6-(B" . "$,4!t(B")
|
41599
|
367 ("$,15u6-5p(B" . "$,4!u")(B")
|
41523
|
368 ("$,15u6-5p6-(B" . "$,4!u(B")
|
49598
|
369 ;; SIBILANTS
|
|
370 ("$,15v(B" . "$,4!v")(B")
|
41523
|
371 ("$,15v6-(B" . "$,4!v(B")
|
41599
|
372 ("$,15v6-5u(B" . "$,4!w")(B")
|
41523
|
373 ("$,15v6-5u6-(B" . "$,4!w(B")
|
41599
|
374 ("$,15v6-5p(B" . "$,4!x")(B")
|
41523
|
375 ("$,15v6-5p6-(B" . "$,4!x(B")
|
41469
|
376
|
41599
|
377 ("$,15w(B" . "$,4!y")(B")
|
41523
|
378 ("$,15w6-(B" . "$,4!y(B")
|
41599
|
379 ("$,15x(B" . "$,4!z")(B")
|
41523
|
380 ("$,15x6-(B" . "$,4!z(B")
|
41599
|
381 ("$,15x6-5p(B" . "$,4!{")(B")
|
41523
|
382 ("$,15x6-5p6-(B" . "$,4!{(B")
|
17300
|
383
|
41523
|
384 ("$,15y(B" . "$,4!}(B")
|
|
385 ("$,15y6-(B" . "$,4!|(B")
|
|
386 ("$,15y6#(B" . "$,4!~(B")
|
|
387 ("$,15y6-5p(B" . "$,4!(B")
|
41599
|
388 ("$,15y6-5n(B" . "$,4" (B")
|
|
389 ("$,15y6-5o(B" . "$,4"!(B")
|
49598
|
390 ;; NUKTAS
|
41599
|
391 ("$,168(B" . "$,4 f"R"S(B")
|
49598
|
392 ("$,1686-(B" . "$,4 d(B")
|
|
393 ("$,169(B" . "$,4 k")(B")
|
|
394 ("$,1696-(B" . "$,4 k(B")
|
|
395 ("$,16:(B" . "$,4 n")(B")
|
|
396 ("$,16:6-(B" . "$,4 n(B")
|
|
397 ("$,16;(B" . "$,4 w")(B")
|
|
398 ("$,16;6-(B" . "$,4 w(B")
|
|
399 ("$,16<(B" . "$,4!#"W(B")
|
|
400 ("$,16=(B" . "$,4!'"X(B")
|
|
401 ("$,16>(B" . "$,4!b"[(B")
|
|
402 ("$,16>6-(B" . "$,4!>(B")
|
41599
|
403 ("$,16?(B" . "$,4!j"#")(B")
|
41523
|
404 ;; misc modifiers.
|
41599
|
405 ("$,15A(B" . "\$,4"$(B")
|
49598
|
406 ("$,15B(B" . "\$,4"&(B")
|
41523
|
407 ("$,15C(B" . "$,4 F(B")
|
41599
|
408 ("$,15|(B" . "$,4"#(B")
|
41523
|
409 ("$,15}(B" . "$,4 E(B")
|
41599
|
410 ("$,16-(B" . "$,4""(B")
|
|
411 ("$,16-5p(B" . "$,4"%(B") ;; following "r"
|
49598
|
412 ;; ("$,160(B" . "$,4 D(B")
|
49709
|
413 ("$,16D(B" . "$,4 J(B")
|
49598
|
414 ;; ("$,16F(B" . "")
|
|
415 ;; ("$,16G(B" . "")
|
|
416 ;; ("$,16H(B" . "")
|
|
417 ;; ("$,16I(B" . "")
|
|
418 ;; ("$,16J(B" . "")
|
|
419 ;; ("$,16K(B" . "")
|
|
420 ;; ("$,16L(B" . "")
|
|
421 ;; ("$,16M(B" . "")
|
|
422 ;; ("$,16N(B" . "")
|
41523
|
423 ;; ("$,16O(B" . "")
|
|
424 )
|
49598
|
425 "Devanagari characters to glyphs conversion table.
|
41523
|
426 Default value contains only the basic rules. You may add your own
|
|
427 preferred rule from the sanskrit fonts." )
|
41469
|
428
|
|
429 (defvar dev-char-glyph-hash
|
46222
|
430 (let* ((hash (make-hash-table :test 'equal)))
|
41469
|
431 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
|
|
432 dev-char-glyph)
|
|
433 hash))
|
|
434
|
|
435 (defvar dev-char-glyph-regexp
|
42061
|
436 (devanagari-regexp-of-hashtbl-keys dev-char-glyph-hash))
|
41469
|
437
|
|
438 ;; glyph-to-glyph conversion table.
|
49598
|
439 ;; it is supposed that glyphs are ordered in
|
41469
|
440 ;; [consonant/nukta] - [matra/halant] - [preceding-r] - [anuswar].
|
|
441
|
41523
|
442 (defvar dev-glyph-glyph
|
41599
|
443 '(("\$,4"'(B\$,4"&(B" . "\$,4"((B")
|
|
444 ("\$,4"'(B\$,4"$(B" . "\$,4"((B")
|
|
445 ("$,4"*(B\$,4"&(B" . "$,4"+(B")
|
|
446 ("$,4"*(B\$,4"'(B" . "$,4",(B")
|
|
447 ("$,4"*(B\$,4"'(B\$,4"&(B" . "$,4"-(B")
|
|
448 ("$,4"2(B\$,4"&(B" . "$,4"3(B")
|
|
449 ("$,4"2(B\$,4"'(B" . "$,4"4(B")
|
|
450 ("$,4"2(B\$,4"'(B\$,4"&(B" . "$,4"5(B")
|
|
451 ("$,4"#(B\$,4"6(B" . "$,4"7(B")
|
|
452 ("$,4"%(B\$,4"6(B" . "$,4"8(B")
|
|
453 ;;("$,4"6(B" . "$,4"9(B")
|
|
454 ("$,4"#(B\$,4":(B" . "$,4";(B")
|
|
455 ("$,4"%(B\$,4":(B" . "$,4"<(B")
|
|
456 ;;("$,4":(B" . "$,4"=(B")
|
|
457 ("\$,4"@(B\$,4"&(B" . "\$,4"A(B")
|
|
458 ("\$,4"@(B\$,4"'(B" . "\$,4"B(B")
|
|
459 ("\$,4"@(B\$,4"'(B\$,4"&(B" . "\$,4"C(B")
|
|
460 ("\$,4"D(B\$,4"&(B" . "\$,4"E(B")
|
|
461 ("\$,4"D(B\$,4"'(B" . "\$,4"F(B")
|
|
462 ("\$,4"D(B\$,4"'(B\$,4"&(B" . "\$,4"G(B")
|
|
463 ("\$,4"H(B\$,4"&(B" . "\$,4"I(B")
|
|
464 ("\$,4"H(B\$,4"'(B" . "\$,4"J(B")
|
|
465 ("\$,4"H(B\$,4"'(B\$,4"&(B" . "\$,4"K(B")
|
|
466 ("\$,4"L(B\$,4"&(B" . "\$,4"M(B")
|
|
467 ("\$,4"L(B\$,4"'(B" . "\$,4"N(B")
|
|
468 ("\$,4"L(B\$,4"'(B\$,4"&(B" . "\$,4"O(B")
|
41523
|
469 ))
|
41469
|
470 (defvar dev-glyph-glyph-hash
|
46222
|
471 (let* ((hash (make-hash-table :test 'equal)))
|
41469
|
472 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
|
|
473 dev-glyph-glyph)
|
|
474 hash))
|
|
475 (defvar dev-glyph-glyph-regexp
|
42061
|
476 (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-hash))
|
41469
|
477
|
|
478
|
|
479 ;; yet another glyph-to-glyph conversions.
|
41523
|
480 (defvar dev-glyph-glyph-2
|
41599
|
481 '(("$,4"*(B" . "$,4".(B")
|
|
482 ("$,4"+(B" . "$,4"/(B")
|
|
483 ("$,4",(B" . "$,4"0(B")
|
|
484 ("$,4"-(B" . "$,4"1(B")))
|
41469
|
485 (defvar dev-glyph-glyph-2-hash
|
46222
|
486 (let* ((hash (make-hash-table :test 'equal)))
|
41469
|
487 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
|
|
488 dev-glyph-glyph-2)
|
|
489 hash))
|
|
490 (defvar dev-glyph-glyph-2-regexp
|
42061
|
491 (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-2-hash))
|
41469
|
492
|
|
493
|
|
494 (defun dev-charseq (from &optional to)
|
|
495 (if (null to) (setq to from))
|
|
496 (mapcar (function (lambda (x) (indian-glyph-char x 'devanagari)))
|
42061
|
497 (devanagari-range from to)))
|
17300
|
498
|
41469
|
499 (defvar dev-glyph-cvn
|
49598
|
500 (append
|
41469
|
501 (dev-charseq #x2b)
|
|
502 (dev-charseq #x3c #xc1)
|
|
503 (dev-charseq #xc3))
|
|
504 "Devanagari Consonants/Vowels/Nukta Glyphs")
|
|
505
|
|
506 (defvar dev-glyph-space
|
|
507 (dev-charseq #xf0 #xfe)
|
|
508 "Devanagari Spacing Glyphs")
|
|
509
|
|
510 (defvar dev-glyph-right-modifier
|
49598
|
511 (append
|
41469
|
512 (dev-charseq #xc9)
|
|
513 (dev-charseq #xd2 #xd5))
|
|
514 "Devanagari Modifiers attached at the right side.")
|
|
515
|
|
516 (defvar dev-glyph-right-modifier-regexp
|
|
517 (concat "[" dev-glyph-right-modifier "]"))
|
|
518
|
|
519 (defvar dev-glyph-left-matra
|
|
520 (dev-charseq #xca #xd1)
|
|
521 "Devanagari Matras attached at the left side.")
|
|
522
|
|
523 (defvar dev-glyph-top-matra
|
|
524 (dev-charseq #xe0 #xef)
|
|
525 "Devanagari Matras attached at the top side.")
|
|
526
|
|
527 (defvar dev-glyph-bottom-modifier
|
49598
|
528 (append
|
41469
|
529 (dev-charseq #xd6 #xdf)
|
|
530 (dev-charseq #xc2))
|
|
531 "Devanagari Modifiers attached at the bottom.")
|
|
532
|
|
533 (defvar dev-glyph-order
|
|
534 `((,dev-glyph-cvn . 1)
|
|
535 (,dev-glyph-space . 2)
|
|
536 (,dev-glyph-right-modifier . 3)
|
|
537 (,dev-glyph-left-matra . 3) ;; processed by reference point.
|
|
538 (,dev-glyph-top-matra . 4)
|
|
539 (,(dev-charseq #xc7 #xc8) . 5)
|
|
540 (,(dev-charseq #xc6) . 6)
|
|
541 (,(dev-charseq #xc5) . 7)
|
|
542 (,dev-glyph-bottom-modifier . 8)))
|
|
543
|
49598
|
544 (mapc
|
41469
|
545 (function (lambda (x)
|
49598
|
546 (mapc
|
41469
|
547 (function (lambda (y)
|
|
548 (put-char-code-property y 'composition-order (cdr x))))
|
|
549 (car x))))
|
|
550 dev-glyph-order)
|
|
551
|
|
552 (mapc
|
|
553 (function (lambda (x)
|
|
554 (put-char-code-property x 'reference-point '(3 . 5))))
|
|
555 dev-glyph-left-matra)
|
|
556
|
|
557 (defun devanagari-compose-syllable-string (string)
|
|
558 (with-temp-buffer
|
|
559 (insert (decompose-string string))
|
|
560 (devanagari-compose-syllable-region (point-min) (point-max))
|
|
561 (buffer-string)))
|
17300
|
562
|
41523
|
563 (defun devanagari-compose-syllable-region (from to)
|
|
564 "Compose devanagari syllable in region FROM to TO."
|
|
565 (let ((glyph-str nil) (cons-num 0) glyph-str-list
|
49598
|
566 (last-halant nil) (preceding-r nil) (last-modifier nil)
|
41523
|
567 (last-char (char-before to)) match-str
|
|
568 glyph-block split-pos)
|
|
569 (save-excursion
|
|
570 (save-restriction
|
|
571 ;;; *** char-to-glyph conversion ***
|
|
572 ;; Special rule 1. -- Last halant must be preserved.
|
|
573 (if (eq last-char ?$,16-(B)
|
|
574 (progn
|
|
575 (setq last-halant t)
|
|
576 (narrow-to-region from (1- to)))
|
|
577 (narrow-to-region from to)
|
|
578 ;; note if the last char is modifier.
|
|
579 (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B))
|
|
580 (setq last-modifier t)))
|
|
581 (goto-char (point-min))
|
|
582 ;; Special rule 2. -- preceding "r halant" must be modifier.
|
|
583 (when (looking-at "$,15p6-(B.")
|
|
584 (setq preceding-r t)
|
|
585 (goto-char (+ 2 (point))))
|
|
586 ;; translate the rest characters into glyphs
|
|
587 (while (re-search-forward dev-char-glyph-regexp nil t)
|
|
588 (setq match-str (match-string 0))
|
49598
|
589 (setq glyph-str
|
41523
|
590 (concat glyph-str
|
|
591 (gethash match-str dev-char-glyph-hash)))
|
|
592 ;; count the number of consonant-glyhs.
|
|
593 (if (string-match devanagari-consonant match-str)
|
|
594 (setq cons-num (1+ cons-num))))
|
|
595 ;; preceding-r must be attached before the anuswar if exists.
|
49598
|
596 (if preceding-r
|
41523
|
597 (if last-modifier
|
49598
|
598 (setq glyph-str (concat (substring glyph-str 0 -1)
|
41599
|
599 "$,4"'(B" (substring glyph-str -1)))
|
|
600 (setq glyph-str (concat glyph-str "$,4"'(B"))))
|
|
601 (if last-halant (setq glyph-str (concat glyph-str "$,4""(B")))
|
41523
|
602 ;;; *** glyph-to-glyph conversion ***
|
|
603 (when (string-match dev-glyph-glyph-regexp glyph-str)
|
|
604 (setq glyph-str
|
49598
|
605 (replace-match (gethash (match-string 0 glyph-str)
|
41523
|
606 dev-glyph-glyph-hash)
|
|
607 nil t glyph-str))
|
|
608 (if (and (> cons-num 1)
|
|
609 (string-match dev-glyph-glyph-2-regexp glyph-str))
|
|
610 (setq glyph-str
|
|
611 (replace-match (gethash (match-string 0 glyph-str)
|
|
612 dev-glyph-glyph-2-hash)
|
|
613 nil t glyph-str))))
|
|
614 ;;; *** glyph reordering ***
|
41599
|
615 (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str))
|
41523
|
616 (setq glyph-block (substring glyph-str 0 (1+ split-pos)))
|
|
617 (setq glyph-str (substring glyph-str (1+ split-pos)))
|
49598
|
618 (setq
|
|
619 glyph-block
|
41523
|
620 (if (string-match dev-glyph-right-modifier-regexp glyph-block)
|
|
621 (sort (string-to-list glyph-block)
|
|
622 (function (lambda (x y)
|
|
623 (< (get-char-code-property x 'composition-order)
|
|
624 (get-char-code-property y 'composition-order)))))
|
|
625 (sort (string-to-list glyph-block)
|
|
626 (function (lambda (x y)
|
|
627 (let ((xo (get-char-code-property x 'composition-order))
|
|
628 (yo (get-char-code-property y 'composition-order)))
|
|
629 (if (= xo 2) nil (if (= yo 2) t (< xo yo)))))))))
|
|
630 (setq glyph-str-list (nconc glyph-str-list glyph-block)))
|
|
631 ;; concatenate and attach reference-points.
|
|
632 (setq glyph-str
|
49598
|
633 (cdr
|
|
634 (apply
|
|
635 'nconc
|
|
636 (mapcar
|
|
637 (function (lambda (x)
|
41523
|
638 (list
|
|
639 (or (get-char-code-property x 'reference-point)
|
|
640 '(5 . 3) ;; default reference point.
|
|
641 )
|
|
642 x)))
|
|
643 glyph-str-list))))))
|
|
644 (compose-region from to glyph-str)))
|
17300
|
645
|
18309
|
646 (provide 'devan-util)
|
42311
|
647
|
|
648 ;;; devan-util.el ends here
|