Mercurial > emacs
annotate lisp/language/knd-util.el @ 91385:4ce4937732be
Typo.
author | Andreas Schwab <schwab@suse.de> |
---|---|
date | Fri, 01 Feb 2008 22:49:17 +0000 |
parents | 606f2d163a64 |
children |
rev | line source |
---|---|
53018 | 1 ;;; knd-util.el --- Support for composing Kannada characters |
2 | |
79711 | 3 ;; Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 |
4 ;; Free Software Foundation, Inc. | |
53018 | 5 |
6 ;; Maintainer: Maintainer: CHOWKSEY, Kailash C. <klchxbec@m-net.arbornet.org> | |
7 ;; Keywords: multilingual, Kannada | |
8 | |
9 ;; This file is part of GNU Emacs. | |
10 | |
11 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 ;; it under the terms of the GNU General Public License as published by | |
78236
9355f9b7bbff
Switch license to GPLv3 or later.
Glenn Morris <rgm@gnu.org>
parents:
75347
diff
changeset
|
13 ;; the Free Software Foundation; either version 3, or (at your option) |
53018 | 14 ;; any later version. |
15 | |
16 ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 ;; GNU General Public License for more details. | |
20 | |
21 ;; You should have received a copy of the GNU General Public License | |
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
64085 | 23 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
24 ;; Boston, MA 02110-1301, USA. | |
53018 | 25 |
26 ;; Created: Jul. 14. 2003 | |
27 | |
28 ;;; Commentary: | |
29 | |
30 ;; This file provides character(Unicode) to glyph(CDAC) conversion and | |
31 ;; composition of Kannada script characters. | |
32 | |
33 ;;; Code: | |
34 | |
35 ;;;###autoload | |
36 | |
37 ;; Kannada Composable Pattern | |
38 ;; C .. Consonants | |
39 ;; V .. Vowel | |
40 ;; H .. Virama | |
41 ;; M .. Matra | |
42 ;; V .. Vowel | |
43 ;; (N .. Zerowidth Non Joiner) | |
44 ;; (J .. Zerowidth Joiner. ) | |
45 ;; 1. vowel | |
46 ;; V(A)? | |
47 ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya) | |
48 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?)? | |
49 | |
50 (defconst kannada-consonant | |
51 "[$,1>u(B-$,1?9(B]") | |
52 | |
53 (defconst kannada-consonant-needs-twirl | |
54 "[$,1>u>w(B-$,1>{>}(B-$,1>~? (B-$,1?"?$(B-$,1?+?-?0?3(B-$,1?9(B]\\($,1?M(B[$,1>u(B-$,1?9(B]\\)*[$,1?A?B?C?D>b(B]?$") | |
55 | |
56 (defconst kannada-composable-pattern | |
57 (concat | |
58 "\\([$,1>b(B-$,1>t?`>l(B]\\)\\|[$,1>c(B]" | |
59 "\\|\\(" | |
60 "\\(?:\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?[$,1>u(B-$,1?9(B]$,1?M(B\\)?" | |
61 "[$,1>u(B-$,1?9(B]\\(?:$,1?M(B\\|[$,1?>(B-$,1?M?U?C(B]?\\)?" | |
62 "\\)") | |
63 "Regexp matching a composable sequence of Kannada characters.") | |
64 | |
53035
edde52617580
(kannada-compose-region) (kannada-compose-string,
Kenichi Handa <handa@m17n.org>
parents:
53022
diff
changeset
|
65 ;;;###autoload |
53018 | 66 (defun kannada-compose-region (from to) |
67 (interactive "r") | |
68 (save-excursion | |
69 (save-restriction | |
70 (narrow-to-region from to) | |
71 (goto-char (point-min)) | |
72 (while (re-search-forward kannada-composable-pattern nil t) | |
73 (kannada-compose-syllable-region (match-beginning 0) | |
74 (match-end 0)))))) | |
53035
edde52617580
(kannada-compose-region) (kannada-compose-string,
Kenichi Handa <handa@m17n.org>
parents:
53022
diff
changeset
|
75 ;;;###autoload |
53018 | 76 (defun kannada-compose-string (string) |
77 (with-temp-buffer | |
78 (insert (decompose-string string)) | |
79 (kannada-compose-region (point-min) (point-max)) | |
80 (buffer-string))) | |
81 | |
53035
edde52617580
(kannada-compose-region) (kannada-compose-string,
Kenichi Handa <handa@m17n.org>
parents:
53022
diff
changeset
|
82 ;;;###autoload |
53018 | 83 (defun kannada-post-read-conversion (len) |
84 (save-excursion | |
85 (save-restriction | |
86 (let ((buffer-modified-p (buffer-modified-p))) | |
87 (narrow-to-region (point) (+ (point) len)) | |
88 (kannada-compose-region (point-min) (point-max)) | |
89 (set-buffer-modified-p buffer-modified-p) | |
90 (- (point-max) (point-min)))))) | |
91 | |
92 (defun kannada-range (from to) | |
93 "Make the list of the integers of range FROM to TO." | |
94 (let (result) | |
95 (while (<= from to) (setq result (cons to result) to (1- to))) result)) | |
96 | |
97 (defun kannada-regexp-of-hashtbl-keys (hashtbl) | |
98 "Return a regular expression that matches all keys in hashtable HASHTBL." | |
99 (let ((max-specpdl-size 1000)) | |
100 (regexp-opt | |
101 (sort | |
102 (let (dummy) | |
103 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) | |
104 dummy) | |
105 (function (lambda (x y) (> (length x) (length y)))))))) | |
106 | |
107 (defun kannada-regexp-of-hashtbl-vals (hashtbl) | |
108 "Return a regular expression that matches all values in hashtable HASHTBL." | |
109 (let ((max-specpdl-size 1000)) | |
110 (regexp-opt | |
111 (sort | |
112 (let (dummy) | |
113 (maphash (function (lambda (key val) (setq dummy (cons val dummy)))) hashtbl) | |
114 dummy) | |
115 (function (lambda (x y) (> (length x) (length y)))))))) | |
116 | |
89909 | 117 ;;;###autoload |
118 (defun kannada-composition-function (pos &optional string) | |
119 "Compose Kannada characters after the position POS. | |
120 If STRING is not nil, it is a string, and POS is an index to the string. | |
121 In this case, compose characters after POS of the string." | |
122 (if string | |
123 ;; Not yet implemented. | |
124 nil | |
125 (goto-char pos) | |
126 (if (looking-at kannada-composable-pattern) | |
127 (prog1 (match-end 0) | |
128 (kannada-compose-syllable-region pos (match-end 0)))))) | |
53018 | 129 |
130 ;; Notes on conversion steps. | |
131 | |
132 ;; 1. chars to glyphs | |
133 ;; | |
134 ;; Rules will not be applied to the virama appeared at the end of the | |
135 ;; text. Also, the preceding/following "r" will be treated as special case. | |
136 | |
137 ;; 2. glyphs reordering. | |
138 ;; | |
139 ;; The glyphs are split by virama, and each glyph groups are | |
140 ;; re-ordered in the following order. | |
141 ;; | |
142 ;; Note that `consonant-glyph' mentioned here does not contain the | |
143 ;; vertical bar (right modifier) attached at the right of the | |
144 ;; consonant. | |
145 ;; | |
146 ;; If the glyph-group contains right modifier, | |
147 ;; (1) consonant-glyphs/vowels | |
148 ;; (2) spacing | |
149 ;; (3) right modifier (may be matra) | |
150 ;; (4) top matra | |
151 ;; (5) preceding "r" | |
152 ;; (7) following "r" | |
153 ;; (8) bottom matra or virama. | |
154 ;; | |
155 ;; Otherwise, | |
156 ;; (1) consonant-glyph/vowels, with nukta sign | |
157 ;; (3) left matra | |
158 ;; (4) top matra | |
159 ;; (5) preceding "r" | |
160 ;; (7) following "r" | |
161 ;; (8) bottom matra or virama. | |
162 ;; (2) spacing | |
163 | |
164 ;; 3. glyph to glyph | |
165 ;; | |
166 ;; For better display, some glyph display would be tuned. | |
167 | |
168 ;; 4. Composition. | |
169 ;; | |
170 ;; left modifiers will be attached at the left. | |
171 ;; others will be attached right. | |
172 | |
173 ;; Problem:: | |
174 ;; Can we generalize this methods to other Indian scripts? | |
175 | |
176 (defvar knd-char-glyph | |
177 '(("$,1>e(B" . "$,43@(B") | |
178 ("$,1>f(B" . "$,43A(B") | |
179 ("$,1?>(B" . "$,44{(B") | |
180 ("$,1>g(B" . "$,43B(B") | |
181 ("$,1??(B" . nil) | |
182 ("$,1>h(B" . "$,43C(B") | |
183 ("$,1?@(B" . nil) | |
184 ("$,1>i(B" . "$,43D(B") | |
185 ("$,1?A(B" . "\$,44(B") | |
186 ("$,1>j(B" . "$,43E(B") | |
187 ("$,1?B(B" . "\$,45 (B") | |
188 ("$,1>k(B" . "$,43F4(B") | |
189 ("$,1?C(B" . "\$,45$(B") | |
190 ("$,1?`(B" . "$,43F5 (B") | |
191 ("$,1?D(B" . "\$,45%(B") | |
192 ;;("$,1>l(B" . nil) ; not implemented. | |
193 ;;("$,1?a(B" . nil) | |
194 ("$,1>n(B" . "$,43G(B") | |
195 ("$,1>o(B" . "$,43H(B") | |
196 ("$,1>p(B" . "$,43I(B") | |
197 ("$,1?F(B" . "\$,45&(B") | |
198 ("$,1?G(B" . "\$,45&4~(B") | |
199 ("$,1?H(B" . "\$,45&5'(B") | |
200 ("$,1>r(B" . "$,43J(B") | |
201 ("$,1?J(B" . "$,45&5 (B") | |
202 ("$,1>s(B" . "$,43K(B") | |
203 ("$,1?K(B" . "\$,45&5 4~(B") | |
204 ("$,1>t(B" . "$,43L(B") | |
205 ("$,1?L(B" . "\$,45((B") | |
206 ("$,1>b(B" . "$,43M(B") | |
207 ("$,1>c(B" . "$,43N(B") | |
208 ("$,1>u?M(B" . "$,43O5)(B") ("$,1>u(B" . "$,43O(B") ("$,1>u??(B" . "$,43P(B") ("$,1>u?@(B" . "$,43P4~(B") | |
209 ("$,1>v?M(B" . "$,43S5)(B") ("$,1>v(B" . "$,43S(B") ("$,1>v??(B" . "$,43T(B") ("$,1>v?@(B" . "$,43T4~(B") ("$,1>v?F(B" . "$,43S5&(B") ("$,1>v?G(B" . "$,43S5&4~(B") ("$,1>v?H(B" . "$,43S5&5'(B") ("$,1>v?J(B" . "$,43S5&5&5 (B") ("$,1>v?K(B" . "$,43S5&5&5 4~(B") ("$,1>v?L(B" . "$,43S5((B") | |
210 ("$,1>w?M(B" . "$,43V5)(B") ("$,1>w(B" . "$,43V(B") ("$,1>w??(B" . "$,43W(B") ("$,1>w?@(B" . "$,43W4~(B") | |
211 ("$,1>x?M(B" . "$,43Y5)(B") ("$,1>x(B" . "$,43Y(B") ("$,1>x??(B" . "$,43Z(B") ("$,1>x?@(B" . "$,43Z4~(B") | |
212 ("$,1>y?M(B" . "$,43\5)(B") ("$,1>y(B" . "$,43\(B") | |
213 ("$,1>z?M(B" . "$,43^5)(B") ("$,1>z(B" . "$,43^(B") ("$,1>z??(B" . "$,43_(B") ("$,1>z?@(B" . "$,43_4~(B") | |
214 ("$,1>{?M(B" . "$,43a5)(B") ("$,1>{(B" . "$,43a(B") ("$,1>{??(B" . "$,43b(B") ("$,1>{?@(B" . "$,43b4~(B") | |
215 ("$,1>|?M(B" . "$,43d5)(B") ("$,1>|(B" . "$,43d(B") ("$,1>|??(B" . "$,43f(B") ("$,1>|?@(B" . "$,43f4~(B") ("$,1>|?F(B" . "$,43e5&(B") ("$,1>|?G(B" . "$,43e5&4~(B") ("$,1>|?H(B" . "$,43e5&5'(B") ("$,1>|?J(B" . "$,43e5&5&5 (B") ("$,1>|?K(B" . "$,43e5&5&5 4~(B") ("$,1>|?L(B" . "$,43e5((B") | |
216 ("$,1>}?M(B" . "$,44a4z3h45)(B") ("$,1>}(B" . "$,44a4z3h4(B") ("$,1>}??(B" . "$,44b3h4(B") ("$,1>}?@(B" . "$,44b3h44~(B") ("$,1>}?B(B". "$,44a4z3h5 (B") ("$,1>}?J(B". "$,44a5&3h5 (B") ("$,1>}?K(B". "$,44a5&3h5 4~(B") | |
217 ("$,1>~?M(B" . "$,43j5)(B") ("$,1>~(B" . "$,43j(B") | |
218 ("$,1>?M(B" . "$,43m5)(B") ("$,1>(B" . "$,43l(B") ("$,1?#?>(B" . "$,43m4{(B") ("$,1>??(B" . "$,43n(B") ("$,1>?@(B" . "$,43n4~(B") ("$,1>?F(B" . "$,43m5&(B") ("$,1>?G(B" . "$,43m5&4~(B") ("$,1>?H(B" . "$,43m5&5'(B") ("$,1>?J(B" . "$,43m5&5&5 (B") ("$,1>?K(B" . "$,43m5&5&5 4~(B") ("$,1>?L(B" . "$,43m5((B") | |
219 ("$,1? ?M(B" . "$,43p5)(B") ("$,1? (B" . "$,43p(B") ("$,1? ??(B" . "$,43q(B") ("$,1? ?@(B" . "$,43q4~(B") | |
220 ("$,1?!?M(B" . "$,43s5)(B") ("$,1?!(B" . "$,43s(B") ("$,1?!??(B" . "$,43t(B") ("$,1?!?@(B" . "$,43t4~(B") | |
221 ("$,1?"?M(B" . "$,43v5)(B") ("$,1?"(B" . "$,43v(B") ("$,1?"??(B" . "$,43w(B") ("$,1?"?@(B" . "$,43w4~(B") | |
222 ("$,1?#?M(B" . "$,43z5)(B") ("$,1?#(B" . "$,43y(B") ("$,1?#?>(B" . "$,43z4{(B") ("$,1?#??(B" . "$,43{(B") ("$,1?#?@(B" . "$,43{4~(B") ("$,1?#?F(B" . "$,43z5&(B") ("$,1?#?G(B" . "$,43z5&4~(B") ("$,1?#?H(B" . "$,43z5&5'(B") ("$,1?#?J(B" . "$,43z5&5&5 (B") ("$,1?#?K(B" . "$,43z5&5&5 4~(B") ("$,1?#?L(B" . "$,43z5((B") | |
223 ("$,1?$?M(B" . "$,43}5)(B") ("$,1?$(B" . "$,43}(B") ("$,1?$??(B" . "$,43~(B") ("$,1?$?@(B" . "$,43~4~(B") | |
224 ("$,1?%?M(B" . "$,44B5)(B") ("$,1?%(B" . "$,44B(B") ("$,1?%??(B" . "$,44C(B") ("$,1?%?@(B" . "$,44C4~(B") | |
225 ("$,1?&?M(B" . "$,44E5)(B") ("$,1?&(B" . "$,44E(B") ("$,1?&??(B" . "$,44F(B") ("$,1?&?@(B" . "$,44F4~(B") | |
226 ("$,1?'?M(B" . "$,44H5)(B") ("$,1?'(B" . "$,44H(B") ("$,1?'??(B" . "$,44I(B") ("$,1?'?@(B" . "$,44I4~(B") | |
227 ("$,1?(?M(B" . "$,44K5)(B") ("$,1?((B" . "$,44K(B") ("$,1?(??(B" . "$,44L(B") ("$,1?(?@(B" . "$,44L4~(B") | |
228 ("$,1?*?M(B" . "$,44N5)(B") ("$,1?*(B" . "$,44N(B") ("$,1?*??(B" . "$,44O(B") ("$,1?*?@(B" . "$,44O4~(B") ("$,1?*?A(B" . "$,44N5"(B") ("$,1?*?B(B" . "$,44N5#(B") ("$,1?*?J(B" . "$,44N5&5#(B") ("$,1?*?K(B" . "$,44N5&5#4~(B") | |
229 ("$,1?+?M(B" . "$,44Q5)(B") ("$,1?+(B" . "$,44Q(B") ("$,1?+??(B" . "$,44R(B") ("$,1?+?@(B" . "$,44R4~(B") ("$,1?+?A(B" . "$,44Q5"(B") ("$,1?+?B(B" . "$,44Q5#(B") ("$,1?+?J(B" . "$,44Q5&5#(B") ("$,1?+?K(B" . "$,44Q5&5#4~(B") | |
230 ("$,1?,?M(B" . "$,44W5)(B") ("$,1?,(B" . "$,44V(B") ("$,1?,?>(B". "$,44W4{(B") ("$,1?,??(B" . "$,44X(B") ("$,1?,?@(B" . "$,44X4~(B") ("$,1?,?F(B" . "$,44W5&(B") ("$,1?,?G(B" . "$,44W5&4~(B") ("$,1?,?H(B" . "$,44W5&5'(B") ("$,1?,?J(B" . "$,44W5&5&5 (B") ("$,1?,?K(B" . "$,44W5&5&5 4~(B") ("$,1?,?L(B" . "$,44W5((B") | |
231 ("$,1?-?M(B" . "$,44Z5)(B") ("$,1?-(B" . "$,44Z(B") ("$,1?-??(B" . "$,44[(B") ("$,1?-?@(B" . "$,44[4~(B") | |
232 ("$,1?.?M(B" . "$,44h5!5)(B") ("$,1?.(B" . "$,44h4z4(B") ("$,1?.?>(B" . "$,44h4z5!4{(B") ("$,1?.??(B" . "$,44i4(B") ("$,1?.?@(B" . "$,44i44~(B") ("$,1?.?J(B". "$,44h5&5 (B") ("$,1?.?K(B". "$,44h5&5 4~(B") | |
233 ("$,1?/?M(B" . "$,44^4z5!5)(B") ("$,1?/(B" . "$,44^4z4(B") ("$,1?/?>(B" . "$,44^4z5!4{(B")("$,1?/??(B" . "$,44_4(B") ("$,1?/?@(B" . "$,44_44~(B") ("$,1?/?J(B" . "$,44^5&5 (B") ("$,1?/?K(B" . "$,44^5&5 4~(B") | |
234 ("$,1?0?M(B" . "$,44a5)(B") ("$,1?0(B" . "$,44a(B") ("$,1?0??(B" . "$,44b(B") ("$,1?0?@(B" . "$,44b4~(B") | |
235 ("$,1?0?M(B" . "$,44a5)(B") ("$,1?0(B" . "$,44a(B") ("$,1?0??(B" . "$,44b(B") ("$,1?0?@(B" . "$,44b4~(B") | |
236 ("$,1?2?M(B" . "$,44e5)(B") ("$,1?2(B" . "$,44d(B") ("$,1?2?>(B" . "$,44e4{(B") ("$,1?2??(B" . "$,44f(B") ("$,1?2?@(B" . "$,44f4~(B") ("$,1?2?F(B" . "$,44e5&(B") ("$,1?2?G(B" . "$,44e5&4~(B") ("$,1?2?H(B" . "$,44e5&5'(B") ("$,1?2?J(B" . "$,44e5&5&5 (B") ("$,1?2?K(B" . "$,44e5&5&5 4~(B") ("$,1?2?L(B" . "$,44e5((B") | |
237 ("$,1?5?M(B" . "$,44h5)(B") ("$,1?5(B" . "$,44h(B") ("$,1?5??(B" . "$,44i(B") ("$,1?5?@(B" . "$,44i4~(B") ("$,1?5?A(B" . "$,44h5"(B") ("$,1?5?B(B" . "$,44h5#(B") ("$,1?5?J(B" . "$,44h5&5#(B") ("$,1?5?K(B" . "$,44h5&5#4~(B") | |
238 ("$,1?6?M(B" . "$,44k5)(B") ("$,1?6(B" . "$,44k(B") ("$,1?6??(B" . "$,44l(B") ("$,1?6?@(B" . "$,44l4~(B") | |
239 ("$,1?7?M(B" . "$,44n5)(B") ("$,1?7(B" . "$,44n(B") ("$,1?7??(B" . "$,44o(B") ("$,1?7?@(B" . "$,44o4~(B") | |
240 ("$,1?8?M(B" . "$,44q5)(B") ("$,1?8(B" . "$,44q(B") ("$,1?8??(B" . "$,44r(B") ("$,1?8?@(B" . "$,44r4~(B") | |
241 ("$,1?9?M(B" . "$,44t5)(B") ("$,1?9(B" . "$,44t(B") ("$,1?9??(B" . "$,44u(B") ("$,1?9?@(B" . "$,44u4~(B") | |
242 ("$,1?3?M(B" . "$,44w5)(B") ("$,1?3(B" . "$,44w(B") ("$,1?3??(B" . "$,44x(B") ("$,1?3?@(B" . "$,44x4~(B")) | |
243 "Kannada characters to glyphs conversion table. | |
244 Default value contains only the basic rules.") | |
245 | |
246 (defvar knd-char-glyph-hash | |
247 (let* ((hash (make-hash-table :test 'equal))) | |
248 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
249 knd-char-glyph) | |
250 hash)) | |
251 | |
252 (defvar knd-char-glyph-regexp | |
253 (kannada-regexp-of-hashtbl-keys knd-char-glyph-hash)) | |
254 | |
255 (defvar knd-conjunct-glyph | |
256 '(("$,1>u(B" . "$,43Q(B") ("$,1>v(B" . "$,43U(B") ("$,1>w(B" . "$,43X(B") ("$,1>x(B" . "$,43[(B") ("$,1>y(B" . "$,43](B") | |
257 ("$,1>z(B" . "$,43`(B") ("$,1>{(B" . "$,43c(B") ("$,1>|(B" . "$,43g(B") ("$,1>}(B" . "$,43i(B") ("$,1>~(B" . "$,43k(B") | |
258 ("$,1>(B" . "$,43o(B") ("$,1? (B" . "$,43r(B") ("$,1?!(B" . "$,43u(B") ("$,1?"(B" . "$,43x(B") ("$,1?#(B" . "$,43|(B") | |
259 ("$,1?$(B" . "$,44A(B") ("$,1?%(B" . "$,44D(B") ("$,1?&(B" . "$,44G(B") ("$,1?'(B" . "$,44J(B") ("$,1?((B" . "$,44M(B") | |
260 ("$,1?*(B" . "$,44P(B") ("$,1?+(B" . "$,44U(B") ("$,1?,(B" . "$,44Y(B") ("$,1?-(B" . "$,44\(B") ("$,1?.(B" . "$,44](B") | |
261 ("$,1?/(B" . "$,44`(B") ("$,1?0(B" . "$,44c(B") ("$,1?2(B" . "$,44g(B") ("$,1?3(B" . "$,44y(B") ("$,1?5(B" . "$,44j(B") | |
262 ("$,1?6(B" . "$,44m(B") ("$,1?7(B" . "$,44p(B") ("$,1?8(B" . "$,44s(B") ("$,1?9(B" . "$,44v(B")) | |
263 "Kannada characters to conjunct glyphs conversion table.") | |
264 | |
265 (defvar knd-conjunct-glyph-hash | |
266 (let* ((hash (make-hash-table :test 'equal))) | |
267 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
268 knd-conjunct-glyph) | |
269 hash)) | |
270 | |
271 (defvar knd-conjunct-glyph-regexp | |
272 (kannada-regexp-of-hashtbl-vals knd-conjunct-glyph-hash)) | |
273 | |
274 (mapc | |
275 (function (lambda (x) | |
276 (put-char-code-property (aref (cdr x) 0) 'reference-point '(5 . 3)))) | |
277 knd-conjunct-glyph) | |
278 | |
279 ;; glyph-to-glyph conversion table. | |
280 ;; it is supposed that glyphs are ordered in | |
281 ;; [consonant/nukta] - [matra/virama] - [preceding-r] - [anuswar]. | |
282 | |
283 (defvar knd-glyph-glyph | |
284 '(("$,45$4A(B" . "$,45*(B") | |
285 ("$,45'4A(B" . "$,45+(B") | |
286 ("$,44A3g(B" . "$,45,(B") | |
287 ("$,45$3Q(B" . "$,45-(B"))) | |
288 | |
289 (defvar knd-glyph-glyph-hash | |
290 (let* ((hash (make-hash-table :test 'equal))) | |
291 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
292 knd-glyph-glyph) | |
293 hash)) | |
294 (defvar knd-glyph-glyph-regexp | |
295 (kannada-regexp-of-hashtbl-keys knd-glyph-glyph-hash)) | |
296 | |
297 (defun knd-charseq (from &optional to) | |
298 (if (null to) (setq to from)) | |
89909 | 299 (number-sequence (decode-char 'kannada-cdac from) |
300 (decode-char 'kannada-cdac to))) | |
53018 | 301 |
302 (defvar knd-glyph-cv | |
303 (append | |
304 (knd-charseq #x40 #x50) | |
305 (knd-charseq #x52 #x54) | |
306 (knd-charseq #x56 #x57) | |
307 (knd-charseq #x59 #x5a) | |
308 (knd-charseq #x5c) | |
309 (knd-charseq #x5e #x5f) | |
310 (knd-charseq #x61 #x62) | |
311 (knd-charseq #x64 #x66) | |
312 (knd-charseq #x6a) | |
313 (knd-charseq #x6c #x6e) | |
314 (knd-charseq #x70 #x71) | |
315 (knd-charseq #x73 #x74) | |
316 (knd-charseq #x76 #x77) | |
317 (knd-charseq #x79 #x7b) | |
318 (knd-charseq #x7d #x7e) | |
319 (knd-charseq #xa2 #xa3) | |
320 (knd-charseq #xa5 #xa6) | |
321 (knd-charseq #xa8 #xa9) | |
322 (knd-charseq #xab #xac) | |
323 (knd-charseq #xae #xaf) | |
324 (knd-charseq #xb1 #xb2) | |
325 (knd-charseq #xb6 #xb8) | |
326 (knd-charseq #xb6 #xb8) | |
327 (knd-charseq #xba #xbb) | |
328 (knd-charseq #xbe #xbf) | |
329 (knd-charseq #xc1 #xc2) | |
330 (knd-charseq #xc4 #xc6) | |
331 (knd-charseq #xc8 #xc9) | |
332 (knd-charseq #xcb #xcc) | |
333 (knd-charseq #xce #xcf) | |
334 (knd-charseq #xd1 #xd2) | |
335 (knd-charseq #xd4 #xd5) | |
336 (knd-charseq #xd7 #xd8) | |
337 (knd-charseq #xc3)) | |
338 "Kannada Consonants/Vowels/Nukta Glyphs") | |
339 | |
340 (defvar knd-glyph-space | |
341 (knd-charseq #xb3 #xb4) | |
342 "Kannada Spacing Glyphs") | |
343 | |
344 (defvar knd-glyph-right-modifier | |
345 (append | |
346 (knd-charseq #xdb #xdd) | |
347 (knd-charseq #xdf) | |
348 (knd-charseq #xe0 #xe3) | |
349 (knd-charseq #xe9)) | |
350 "Kannada Modifiers attached at the right side.") | |
351 | |
352 (defvar knd-glyph-right-modifier-regexp | |
353 (concat "[" knd-glyph-right-modifier "]")) | |
354 | |
355 (defvar knd-glyph-jha-tail | |
356 (knd-charseq #x68) | |
357 "Kannada tail for jha.") | |
358 | |
359 (defvar knd-glyph-top-matra | |
360 (append | |
361 (knd-charseq #xda) | |
362 (knd-charseq #xdd) | |
363 (knd-charseq #xe6) | |
364 (knd-charseq #xe8)) | |
365 "Kannada Matras attached at the top side.") | |
366 | |
367 (defvar knd-glyph-bottom-matra | |
368 (append | |
369 (knd-charseq #xe4 #xe5) | |
370 (knd-charseq #xe7)) | |
371 "Kannada Matras attached at the bottom.") | |
372 | |
373 (defvar knd-glyph-end-marks | |
374 (append | |
375 (knd-charseq #x25) | |
376 (knd-charseq #x4d #x4e) | |
377 (knd-charseq #xde)) | |
378 "Kannada end marks: arkavattu, virama, au and diirghaa.") | |
379 | |
380 (defvar knd-glyph-bottom-modifier | |
381 (append | |
382 (knd-charseq #x51) | |
383 (knd-charseq #x55) | |
384 (knd-charseq #x58) | |
385 (knd-charseq #x5b) | |
386 (knd-charseq #x5d) | |
387 (knd-charseq #x60) | |
388 (knd-charseq #x63) | |
389 (knd-charseq #x67) | |
390 (knd-charseq #x69) | |
391 (knd-charseq #x6b) | |
392 (knd-charseq #x6f) | |
393 (knd-charseq #x72) | |
394 (knd-charseq #x75) | |
395 (knd-charseq #x78) | |
396 (knd-charseq #x7c) | |
397 (knd-charseq #xa1) | |
398 (knd-charseq #xa4) | |
399 (knd-charseq #xa7) | |
400 (knd-charseq #xaa) | |
401 (knd-charseq #xad) | |
402 (knd-charseq #xb0) | |
403 (knd-charseq #xb5) | |
404 (knd-charseq #xb9) | |
405 (knd-charseq #xbc #xbd) | |
406 (knd-charseq #xc0) | |
407 (knd-charseq #xc3) | |
408 (knd-charseq #xc7) | |
409 (knd-charseq #xca) | |
410 (knd-charseq #xcd) | |
411 (knd-charseq #xd0) | |
412 (knd-charseq #xd3) | |
413 (knd-charseq #xd6) | |
414 (knd-charseq #xd9) | |
415 (knd-charseq #xea #xef)) | |
416 "Kannada Modifiers attached at the bottom.") | |
417 | |
418 (defvar knd-glyph-order | |
419 `((,knd-glyph-cv . 1) | |
420 (,knd-glyph-top-matra . 2) | |
421 (,knd-glyph-jha-tail . 3) | |
422 (,knd-glyph-right-modifier . 4) | |
423 (,knd-glyph-space . 5) | |
424 (,knd-glyph-bottom-modifier . 5) | |
425 (,knd-glyph-bottom-matra . 6) | |
426 (,knd-glyph-end-marks . 7) | |
427 )) | |
428 | |
429 (mapc | |
430 (function (lambda (x) | |
431 (mapc | |
432 (function (lambda (y) | |
433 (put-char-code-property y 'composition-order (cdr x)))) | |
434 (car x)))) | |
435 knd-glyph-order) | |
436 | |
437 (defun kannada-compose-syllable-string (string) | |
438 (with-temp-buffer | |
439 (insert (decompose-string string)) | |
440 (kannada-compose-syllable-region (point-min) (point-max)) | |
441 (buffer-string))) | |
442 | |
443 ;; kch | |
444 (defun kannada-compose-syllable-region (from to) | |
445 "Compose kannada syllable in region FROM to TO." | |
446 (let ((glyph-str nil) (cons-num 0) (glyph-str-list nil) | |
447 (last-virama nil) (preceding-r nil) (last-modifier nil) | |
448 (last-char (char-before to)) match-str pos | |
449 glyph-block split-pos (conj nil) (rest nil)) | |
450 (save-excursion | |
451 (save-restriction | |
452 ;;; *** char-to-glyph conversion *** | |
453 ;; Special rule 1. -- Last virama must be preserved. | |
454 (if (eq last-char ?$,1?M(B) | |
455 (progn | |
456 (setq last-virama t) | |
457 (narrow-to-region from (1- to))) | |
458 (narrow-to-region from to)) | |
459 (goto-char (point-min)) | |
460 ;; Special rule 2. -- preceding "r virama" must be modifier. | |
461 (when (looking-at "$,1?0?M(B.") | |
462 (setq preceding-r t) | |
463 (goto-char (+ 2 (point)))) | |
464 ;; remove conjunct consonants | |
465 (while (re-search-forward knd-char-glyph-regexp nil t) | |
466 (setq match-str (match-string 0)) | |
467 (if (and (string-match kannada-consonant match-str) | |
468 (> cons-num 0)) | |
469 (progn | |
470 (setq conj (concat conj (gethash (match-string 0 match-str) | |
471 knd-conjunct-glyph-hash))) | |
472 (setq match-str (replace-match "" t nil match-str)) | |
473 (if (string-match "$,1?M(B" rest) | |
474 (setq rest (replace-match "" t nil rest))))) | |
475 (setq rest (concat rest match-str)) | |
476 ;; count the number of consonant-glyhs. | |
477 (if (string-match kannada-consonant match-str) | |
478 (setq cons-num (1+ cons-num)))) | |
479 ;; translate the rest characters into glyphs | |
480 (setq pos 0) | |
481 (while (string-match knd-char-glyph-regexp rest pos) | |
482 (setq match-str (match-string 0 rest)) | |
483 (setq pos (match-end 0)) | |
484 (setq glyph-str | |
485 (concat glyph-str (gethash match-str knd-char-glyph-hash)))) | |
486 | |
487 (if conj (setq glyph-str (concat glyph-str conj))) | |
488 (if last-virama (setq glyph-str (concat glyph-str "$,45)(B")) | |
489 (goto-char (point-min)) | |
490 (if (re-search-forward kannada-consonant-needs-twirl nil t) | |
491 (progn | |
492 (setq match-str (match-string 0)) | |
493 (setq glyph-str (concat glyph-str "$,44z(B"))))) | |
494 ;; preceding-r must be attached | |
495 (if preceding-r | |
496 (setq glyph-str (concat glyph-str "$,43%(B"))) | |
497 ;;; *** glyph-to-glyph conversion *** | |
498 (when (string-match knd-glyph-glyph-regexp glyph-str) | |
499 (setq glyph-str | |
500 (replace-match (gethash (match-string 0 glyph-str) | |
501 knd-glyph-glyph-hash) | |
502 nil t glyph-str))) | |
503 ;;; *** glyph reordering *** | |
504 (while (setq split-pos (string-match "$,45)(B\\|.$" glyph-str)) | |
505 (setq glyph-block (substring glyph-str 0 (1+ split-pos))) | |
506 (setq glyph-str (substring glyph-str (1+ split-pos))) | |
507 (setq | |
508 glyph-block | |
509 (sort (string-to-list glyph-block) | |
510 (function (lambda (x y) | |
511 (< (get-char-code-property x 'composition-order) | |
512 (get-char-code-property y 'composition-order)))))) | |
513 (setq glyph-str-list (nconc glyph-str-list glyph-block))) | |
514 ;;; *** insert space glyphs for kerning *** | |
515 (if (> cons-num 0) | |
516 (let ((curr glyph-str-list) (prev nil) (last-bott nil) bott co) | |
517 (while curr | |
518 (setq co (get-char-code-property | |
519 (car curr) 'composition-order) | |
520 bott (or (eq co 5) (eq co 6))) | |
521 (if (and bott last-bott) | |
522 (setcdr prev (cons ?$,44T(B curr))) | |
523 (setq last-bott bott prev curr curr (cdr curr))))) | |
524 ;; concatenate and attach reference-points. | |
525 (setq glyph-str | |
526 (cdr | |
527 (apply | |
528 'nconc | |
529 (mapcar | |
530 (function (lambda (x) | |
531 (list | |
532 (or (get-char-code-property x 'reference-point) | |
533 '(5 . 3) ;; default reference point. | |
534 ) | |
535 x))) | |
536 glyph-str-list)))))) | |
537 (compose-region from to glyph-str))) | |
538 | |
539 (provide 'knd-util) | |
540 | |
53022 | 541 ;;; arch-tag: 78d32230-a960-46a5-b622-61ed6ffcf8fc |
53018 | 542 ;;; knd-util.el ends here |