comparison lisp/language/knd-util.el @ 53018:3d8aaf9af0df

New file.
author Kenichi Handa <handa@m17n.org>
date Sat, 08 Nov 2003 01:40:48 +0000
parents
children 56dd8068d740
comparison
equal deleted inserted replaced
53017:c65ae0405d12 53018:3d8aaf9af0df
1 ;;; knd-util.el --- Support for composing Kannada characters
2
3 ;; Copyright (C) 2003 Free Software Foundation, Inc.
4
5 ;; Maintainer: Maintainer: CHOWKSEY, Kailash C. <klchxbec@m-net.arbornet.org>
6 ;; Keywords: multilingual, Kannada
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;; Created: Jul. 14. 2003
26
27 ;;; Commentary:
28
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Kannada script characters.
31
32 ;;; Code:
33
34 ;;;###autoload
35
36 ;; Kannada Composable Pattern
37 ;; C .. Consonants
38 ;; V .. Vowel
39 ;; H .. Virama
40 ;; M .. Matra
41 ;; V .. Vowel
42 ;; (N .. Zerowidth Non Joiner)
43 ;; (J .. Zerowidth Joiner. )
44 ;; 1. vowel
45 ;; V(A)?
46 ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
47 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?)?
48
49 (defconst kannada-consonant
50 "[$,1>u(B-$,1?9(B]")
51
52 (defconst kannada-consonant-needs-twirl
53 "[$,1>u>w(B-$,1>{>}(B-$,1>~? (B-$,1?"?$(B-$,1?+?-?0?3(B-$,1?9(B]\\($,1?M(B[$,1>u(B-$,1?9(B]\\)*[$,1?A?B?C?D>b(B]?$")
54
55 (defconst kannada-composable-pattern
56 (concat
57 "\\([$,1>b(B-$,1>t?`>l(B]\\)\\|[$,1>c(B]"
58 "\\|\\("
59 "\\(?:\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?[$,1>u(B-$,1?9(B]$,1?M(B\\)?"
60 "[$,1>u(B-$,1?9(B]\\(?:$,1?M(B\\|[$,1?>(B-$,1?M?U?C(B]?\\)?"
61 "\\)")
62 "Regexp matching a composable sequence of Kannada characters.")
63
64 (defun kannada-compose-region (from to)
65 (interactive "r")
66 (save-excursion
67 (save-restriction
68 (narrow-to-region from to)
69 (goto-char (point-min))
70 (while (re-search-forward kannada-composable-pattern nil t)
71 (kannada-compose-syllable-region (match-beginning 0)
72 (match-end 0))))))
73 (defun kannada-compose-string (string)
74 (with-temp-buffer
75 (insert (decompose-string string))
76 (kannada-compose-region (point-min) (point-max))
77 (buffer-string)))
78
79 (defun kannada-post-read-conversion (len)
80 (save-excursion
81 (save-restriction
82 (let ((buffer-modified-p (buffer-modified-p)))
83 (narrow-to-region (point) (+ (point) len))
84 (kannada-compose-region (point-min) (point-max))
85 (set-buffer-modified-p buffer-modified-p)
86 (- (point-max) (point-min))))))
87
88 (defun kannada-range (from to)
89 "Make the list of the integers of range FROM to TO."
90 (let (result)
91 (while (<= from to) (setq result (cons to result) to (1- to))) result))
92
93 (defun kannada-regexp-of-hashtbl-keys (hashtbl)
94 "Return a regular expression that matches all keys in hashtable HASHTBL."
95 (let ((max-specpdl-size 1000))
96 (regexp-opt
97 (sort
98 (let (dummy)
99 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
100 dummy)
101 (function (lambda (x y) (> (length x) (length y))))))))
102
103 (defun kannada-regexp-of-hashtbl-vals (hashtbl)
104 "Return a regular expression that matches all values in hashtable HASHTBL."
105 (let ((max-specpdl-size 1000))
106 (regexp-opt
107 (sort
108 (let (dummy)
109 (maphash (function (lambda (key val) (setq dummy (cons val dummy)))) hashtbl)
110 dummy)
111 (function (lambda (x y) (> (length x) (length y))))))))
112
113 (defun kannada-composition-function (from to pattern &optional string)
114 "Compose Kannada characters in REGION, or STRING if specified.
115 Assume that the REGION or STRING must fully match the composable
116 PATTERN regexp."
117 (if string (kannada-compose-syllable-string string)
118 (kannada-compose-syllable-region from to))
119 (- to from))
120
121 ;; Register a function to compose Kannada characters.
122 (mapc
123 (function (lambda (ucs)
124 (aset composition-function-table (decode-char 'ucs ucs)
125 (list (cons kannada-composable-pattern
126 'kannada-composition-function)))))
127 (kannada-range #x0c80 #x0cff))
128
129 ;; Notes on conversion steps.
130
131 ;; 1. chars to glyphs
132 ;;
133 ;; Rules will not be applied to the virama appeared at the end of the
134 ;; text. Also, the preceding/following "r" will be treated as special case.
135
136 ;; 2. glyphs reordering.
137 ;;
138 ;; The glyphs are split by virama, and each glyph groups are
139 ;; re-ordered in the following order.
140 ;;
141 ;; Note that `consonant-glyph' mentioned here does not contain the
142 ;; vertical bar (right modifier) attached at the right of the
143 ;; consonant.
144 ;;
145 ;; If the glyph-group contains right modifier,
146 ;; (1) consonant-glyphs/vowels
147 ;; (2) spacing
148 ;; (3) right modifier (may be matra)
149 ;; (4) top matra
150 ;; (5) preceding "r"
151 ;; (7) following "r"
152 ;; (8) bottom matra or virama.
153 ;;
154 ;; Otherwise,
155 ;; (1) consonant-glyph/vowels, with nukta sign
156 ;; (3) left matra
157 ;; (4) top matra
158 ;; (5) preceding "r"
159 ;; (7) following "r"
160 ;; (8) bottom matra or virama.
161 ;; (2) spacing
162
163 ;; 3. glyph to glyph
164 ;;
165 ;; For better display, some glyph display would be tuned.
166
167 ;; 4. Composition.
168 ;;
169 ;; left modifiers will be attached at the left.
170 ;; others will be attached right.
171
172 ;; Problem::
173 ;; Can we generalize this methods to other Indian scripts?
174
175 (defvar knd-char-glyph
176 '(("$,1>e(B" . "$,43@(B")
177 ("$,1>f(B" . "$,43A(B")
178 ("$,1?>(B" . "$,44{(B")
179 ("$,1>g(B" . "$,43B(B")
180 ("$,1??(B" . nil)
181 ("$,1>h(B" . "$,43C(B")
182 ("$,1?@(B" . nil)
183 ("$,1>i(B" . "$,43D(B")
184 ("$,1?A(B" . "\$,44(B")
185 ("$,1>j(B" . "$,43E(B")
186 ("$,1?B(B" . "\$,45 (B")
187 ("$,1>k(B" . "$,43F4(B")
188 ("$,1?C(B" . "\$,45$(B")
189 ("$,1?`(B" . "$,43F5 (B")
190 ("$,1?D(B" . "\$,45%(B")
191 ;;("$,1>l(B" . nil) ; not implemented.
192 ;;("$,1?a(B" . nil)
193 ("$,1>n(B" . "$,43G(B")
194 ("$,1>o(B" . "$,43H(B")
195 ("$,1>p(B" . "$,43I(B")
196 ("$,1?F(B" . "\$,45&(B")
197 ("$,1?G(B" . "\$,45&4~(B")
198 ("$,1?H(B" . "\$,45&5'(B")
199 ("$,1>r(B" . "$,43J(B")
200 ("$,1?J(B" . "$,45&5 (B")
201 ("$,1>s(B" . "$,43K(B")
202 ("$,1?K(B" . "\$,45&5 4~(B")
203 ("$,1>t(B" . "$,43L(B")
204 ("$,1?L(B" . "\$,45((B")
205 ("$,1>b(B" . "$,43M(B")
206 ("$,1>c(B" . "$,43N(B")
207 ("$,1>u?M(B" . "$,43O5)(B") ("$,1>u(B" . "$,43O(B") ("$,1>u??(B" . "$,43P(B") ("$,1>u?@(B" . "$,43P4~(B")
208 ("$,1>v?M(B" . "$,43S5)(B") ("$,1>v(B" . "$,43S(B") ("$,1>v??(B" . "$,43T(B") ("$,1>v?@(B" . "$,43T4~(B") ("$,1>v?F(B" . "$,43S5&(B") ("$,1>v?G(B" . "$,43S5&4~(B") ("$,1>v?H(B" . "$,43S5&5'(B") ("$,1>v?J(B" . "$,43S5&5&5 (B") ("$,1>v?K(B" . "$,43S5&5&5 4~(B") ("$,1>v?L(B" . "$,43S5((B")
209 ("$,1>w?M(B" . "$,43V5)(B") ("$,1>w(B" . "$,43V(B") ("$,1>w??(B" . "$,43W(B") ("$,1>w?@(B" . "$,43W4~(B")
210 ("$,1>x?M(B" . "$,43Y5)(B") ("$,1>x(B" . "$,43Y(B") ("$,1>x??(B" . "$,43Z(B") ("$,1>x?@(B" . "$,43Z4~(B")
211 ("$,1>y?M(B" . "$,43\5)(B") ("$,1>y(B" . "$,43\(B")
212 ("$,1>z?M(B" . "$,43^5)(B") ("$,1>z(B" . "$,43^(B") ("$,1>z??(B" . "$,43_(B") ("$,1>z?@(B" . "$,43_4~(B")
213 ("$,1>{?M(B" . "$,43a5)(B") ("$,1>{(B" . "$,43a(B") ("$,1>{??(B" . "$,43b(B") ("$,1>{?@(B" . "$,43b4~(B")
214 ("$,1>|?M(B" . "$,43d5)(B") ("$,1>|(B" . "$,43d(B") ("$,1>|??(B" . "$,43f(B") ("$,1>|?@(B" . "$,43f4~(B") ("$,1>|?F(B" . "$,43e5&(B") ("$,1>|?G(B" . "$,43e5&4~(B") ("$,1>|?H(B" . "$,43e5&5'(B") ("$,1>|?J(B" . "$,43e5&5&5 (B") ("$,1>|?K(B" . "$,43e5&5&5 4~(B") ("$,1>|?L(B" . "$,43e5((B")
215 ("$,1>}?M(B" . "$,44a4z3h45)(B") ("$,1>}(B" . "$,44a4z3h4(B") ("$,1>}??(B" . "$,44b3h4(B") ("$,1>}?@(B" . "$,44b3h44~(B") ("$,1>}?B(B". "$,44a4z3h5 (B") ("$,1>}?J(B". "$,44a5&3h5 (B") ("$,1>}?K(B". "$,44a5&3h5 4~(B")
216 ("$,1>~?M(B" . "$,43j5)(B") ("$,1>~(B" . "$,43j(B")
217 ("$,1>?M(B" . "$,43m5)(B") ("$,1>(B" . "$,43l(B") ("$,1?#?>(B" . "$,43m4{(B") ("$,1>??(B" . "$,43n(B") ("$,1>?@(B" . "$,43n4~(B") ("$,1>?F(B" . "$,43m5&(B") ("$,1>?G(B" . "$,43m5&4~(B") ("$,1>?H(B" . "$,43m5&5'(B") ("$,1>?J(B" . "$,43m5&5&5 (B") ("$,1>?K(B" . "$,43m5&5&5 4~(B") ("$,1>?L(B" . "$,43m5((B")
218 ("$,1? ?M(B" . "$,43p5)(B") ("$,1? (B" . "$,43p(B") ("$,1? ??(B" . "$,43q(B") ("$,1? ?@(B" . "$,43q4~(B")
219 ("$,1?!?M(B" . "$,43s5)(B") ("$,1?!(B" . "$,43s(B") ("$,1?!??(B" . "$,43t(B") ("$,1?!?@(B" . "$,43t4~(B")
220 ("$,1?"?M(B" . "$,43v5)(B") ("$,1?"(B" . "$,43v(B") ("$,1?"??(B" . "$,43w(B") ("$,1?"?@(B" . "$,43w4~(B")
221 ("$,1?#?M(B" . "$,43z5)(B") ("$,1?#(B" . "$,43y(B") ("$,1?#?>(B" . "$,43z4{(B") ("$,1?#??(B" . "$,43{(B") ("$,1?#?@(B" . "$,43{4~(B") ("$,1?#?F(B" . "$,43z5&(B") ("$,1?#?G(B" . "$,43z5&4~(B") ("$,1?#?H(B" . "$,43z5&5'(B") ("$,1?#?J(B" . "$,43z5&5&5 (B") ("$,1?#?K(B" . "$,43z5&5&5 4~(B") ("$,1?#?L(B" . "$,43z5((B")
222 ("$,1?$?M(B" . "$,43}5)(B") ("$,1?$(B" . "$,43}(B") ("$,1?$??(B" . "$,43~(B") ("$,1?$?@(B" . "$,43~4~(B")
223 ("$,1?%?M(B" . "$,44B5)(B") ("$,1?%(B" . "$,44B(B") ("$,1?%??(B" . "$,44C(B") ("$,1?%?@(B" . "$,44C4~(B")
224 ("$,1?&?M(B" . "$,44E5)(B") ("$,1?&(B" . "$,44E(B") ("$,1?&??(B" . "$,44F(B") ("$,1?&?@(B" . "$,44F4~(B")
225 ("$,1?'?M(B" . "$,44H5)(B") ("$,1?'(B" . "$,44H(B") ("$,1?'??(B" . "$,44I(B") ("$,1?'?@(B" . "$,44I4~(B")
226 ("$,1?(?M(B" . "$,44K5)(B") ("$,1?((B" . "$,44K(B") ("$,1?(??(B" . "$,44L(B") ("$,1?(?@(B" . "$,44L4~(B")
227 ("$,1?*?M(B" . "$,44N5)(B") ("$,1?*(B" . "$,44N(B") ("$,1?*??(B" . "$,44O(B") ("$,1?*?@(B" . "$,44O4~(B") ("$,1?*?A(B" . "$,44N5"(B") ("$,1?*?B(B" . "$,44N5#(B") ("$,1?*?J(B" . "$,44N5&5#(B") ("$,1?*?K(B" . "$,44N5&5#4~(B")
228 ("$,1?+?M(B" . "$,44Q5)(B") ("$,1?+(B" . "$,44Q(B") ("$,1?+??(B" . "$,44R(B") ("$,1?+?@(B" . "$,44R4~(B") ("$,1?+?A(B" . "$,44Q5"(B") ("$,1?+?B(B" . "$,44Q5#(B") ("$,1?+?J(B" . "$,44Q5&5#(B") ("$,1?+?K(B" . "$,44Q5&5#4~(B")
229 ("$,1?,?M(B" . "$,44W5)(B") ("$,1?,(B" . "$,44V(B") ("$,1?,?>(B". "$,44W4{(B") ("$,1?,??(B" . "$,44X(B") ("$,1?,?@(B" . "$,44X4~(B") ("$,1?,?F(B" . "$,44W5&(B") ("$,1?,?G(B" . "$,44W5&4~(B") ("$,1?,?H(B" . "$,44W5&5'(B") ("$,1?,?J(B" . "$,44W5&5&5 (B") ("$,1?,?K(B" . "$,44W5&5&5 4~(B") ("$,1?,?L(B" . "$,44W5((B")
230 ("$,1?-?M(B" . "$,44Z5)(B") ("$,1?-(B" . "$,44Z(B") ("$,1?-??(B" . "$,44[(B") ("$,1?-?@(B" . "$,44[4~(B")
231 ("$,1?.?M(B" . "$,44h5!5)(B") ("$,1?.(B" . "$,44h4z4(B") ("$,1?.?>(B" . "$,44h4z5!4{(B") ("$,1?.??(B" . "$,44i4(B") ("$,1?.?@(B" . "$,44i44~(B") ("$,1?.?J(B". "$,44h5&5 (B") ("$,1?.?K(B". "$,44h5&5 4~(B")
232 ("$,1?/?M(B" . "$,44^4z5!5)(B") ("$,1?/(B" . "$,44^4z4(B") ("$,1?/?>(B" . "$,44^4z5!4{(B")("$,1?/??(B" . "$,44_4(B") ("$,1?/?@(B" . "$,44_44~(B") ("$,1?/?J(B" . "$,44^5&5 (B") ("$,1?/?K(B" . "$,44^5&5 4~(B")
233 ("$,1?0?M(B" . "$,44a5)(B") ("$,1?0(B" . "$,44a(B") ("$,1?0??(B" . "$,44b(B") ("$,1?0?@(B" . "$,44b4~(B")
234 ("$,1?0?M(B" . "$,44a5)(B") ("$,1?0(B" . "$,44a(B") ("$,1?0??(B" . "$,44b(B") ("$,1?0?@(B" . "$,44b4~(B")
235 ("$,1?2?M(B" . "$,44e5)(B") ("$,1?2(B" . "$,44d(B") ("$,1?2?>(B" . "$,44e4{(B") ("$,1?2??(B" . "$,44f(B") ("$,1?2?@(B" . "$,44f4~(B") ("$,1?2?F(B" . "$,44e5&(B") ("$,1?2?G(B" . "$,44e5&4~(B") ("$,1?2?H(B" . "$,44e5&5'(B") ("$,1?2?J(B" . "$,44e5&5&5 (B") ("$,1?2?K(B" . "$,44e5&5&5 4~(B") ("$,1?2?L(B" . "$,44e5((B")
236 ("$,1?5?M(B" . "$,44h5)(B") ("$,1?5(B" . "$,44h(B") ("$,1?5??(B" . "$,44i(B") ("$,1?5?@(B" . "$,44i4~(B") ("$,1?5?A(B" . "$,44h5"(B") ("$,1?5?B(B" . "$,44h5#(B") ("$,1?5?J(B" . "$,44h5&5#(B") ("$,1?5?K(B" . "$,44h5&5#4~(B")
237 ("$,1?6?M(B" . "$,44k5)(B") ("$,1?6(B" . "$,44k(B") ("$,1?6??(B" . "$,44l(B") ("$,1?6?@(B" . "$,44l4~(B")
238 ("$,1?7?M(B" . "$,44n5)(B") ("$,1?7(B" . "$,44n(B") ("$,1?7??(B" . "$,44o(B") ("$,1?7?@(B" . "$,44o4~(B")
239 ("$,1?8?M(B" . "$,44q5)(B") ("$,1?8(B" . "$,44q(B") ("$,1?8??(B" . "$,44r(B") ("$,1?8?@(B" . "$,44r4~(B")
240 ("$,1?9?M(B" . "$,44t5)(B") ("$,1?9(B" . "$,44t(B") ("$,1?9??(B" . "$,44u(B") ("$,1?9?@(B" . "$,44u4~(B")
241 ("$,1?3?M(B" . "$,44w5)(B") ("$,1?3(B" . "$,44w(B") ("$,1?3??(B" . "$,44x(B") ("$,1?3?@(B" . "$,44x4~(B"))
242 "Kannada characters to glyphs conversion table.
243 Default value contains only the basic rules.")
244
245 (defvar knd-char-glyph-hash
246 (let* ((hash (make-hash-table :test 'equal)))
247 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
248 knd-char-glyph)
249 hash))
250
251 (defvar knd-char-glyph-regexp
252 (kannada-regexp-of-hashtbl-keys knd-char-glyph-hash))
253
254 (defvar knd-conjunct-glyph
255 '(("$,1>u(B" . "$,43Q(B") ("$,1>v(B" . "$,43U(B") ("$,1>w(B" . "$,43X(B") ("$,1>x(B" . "$,43[(B") ("$,1>y(B" . "$,43](B")
256 ("$,1>z(B" . "$,43`(B") ("$,1>{(B" . "$,43c(B") ("$,1>|(B" . "$,43g(B") ("$,1>}(B" . "$,43i(B") ("$,1>~(B" . "$,43k(B")
257 ("$,1>(B" . "$,43o(B") ("$,1? (B" . "$,43r(B") ("$,1?!(B" . "$,43u(B") ("$,1?"(B" . "$,43x(B") ("$,1?#(B" . "$,43|(B")
258 ("$,1?$(B" . "$,44A(B") ("$,1?%(B" . "$,44D(B") ("$,1?&(B" . "$,44G(B") ("$,1?'(B" . "$,44J(B") ("$,1?((B" . "$,44M(B")
259 ("$,1?*(B" . "$,44P(B") ("$,1?+(B" . "$,44U(B") ("$,1?,(B" . "$,44Y(B") ("$,1?-(B" . "$,44\(B") ("$,1?.(B" . "$,44](B")
260 ("$,1?/(B" . "$,44`(B") ("$,1?0(B" . "$,44c(B") ("$,1?2(B" . "$,44g(B") ("$,1?3(B" . "$,44y(B") ("$,1?5(B" . "$,44j(B")
261 ("$,1?6(B" . "$,44m(B") ("$,1?7(B" . "$,44p(B") ("$,1?8(B" . "$,44s(B") ("$,1?9(B" . "$,44v(B"))
262 "Kannada characters to conjunct glyphs conversion table.")
263
264 (defvar knd-conjunct-glyph-hash
265 (let* ((hash (make-hash-table :test 'equal)))
266 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
267 knd-conjunct-glyph)
268 hash))
269
270 (defvar knd-conjunct-glyph-regexp
271 (kannada-regexp-of-hashtbl-vals knd-conjunct-glyph-hash))
272
273 (mapc
274 (function (lambda (x)
275 (put-char-code-property (aref (cdr x) 0) 'reference-point '(5 . 3))))
276 knd-conjunct-glyph)
277
278 ;; glyph-to-glyph conversion table.
279 ;; it is supposed that glyphs are ordered in
280 ;; [consonant/nukta] - [matra/virama] - [preceding-r] - [anuswar].
281
282 (defvar knd-glyph-glyph
283 '(("$,45$4A(B" . "$,45*(B")
284 ("$,45'4A(B" . "$,45+(B")
285 ("$,44A3g(B" . "$,45,(B")
286 ("$,45$3Q(B" . "$,45-(B")))
287
288 (defvar knd-glyph-glyph-hash
289 (let* ((hash (make-hash-table :test 'equal)))
290 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
291 knd-glyph-glyph)
292 hash))
293 (defvar knd-glyph-glyph-regexp
294 (kannada-regexp-of-hashtbl-keys knd-glyph-glyph-hash))
295
296 (defun knd-charseq (from &optional to)
297 (if (null to) (setq to from))
298 (mapcar (function (lambda (x) (indian-glyph-char x 'kannada)))
299 (kannada-range from to)))
300
301 (defvar knd-glyph-cv
302 (append
303 (knd-charseq #x40 #x50)
304 (knd-charseq #x52 #x54)
305 (knd-charseq #x56 #x57)
306 (knd-charseq #x59 #x5a)
307 (knd-charseq #x5c)
308 (knd-charseq #x5e #x5f)
309 (knd-charseq #x61 #x62)
310 (knd-charseq #x64 #x66)
311 (knd-charseq #x6a)
312 (knd-charseq #x6c #x6e)
313 (knd-charseq #x70 #x71)
314 (knd-charseq #x73 #x74)
315 (knd-charseq #x76 #x77)
316 (knd-charseq #x79 #x7b)
317 (knd-charseq #x7d #x7e)
318 (knd-charseq #xa2 #xa3)
319 (knd-charseq #xa5 #xa6)
320 (knd-charseq #xa8 #xa9)
321 (knd-charseq #xab #xac)
322 (knd-charseq #xae #xaf)
323 (knd-charseq #xb1 #xb2)
324 (knd-charseq #xb6 #xb8)
325 (knd-charseq #xb6 #xb8)
326 (knd-charseq #xba #xbb)
327 (knd-charseq #xbe #xbf)
328 (knd-charseq #xc1 #xc2)
329 (knd-charseq #xc4 #xc6)
330 (knd-charseq #xc8 #xc9)
331 (knd-charseq #xcb #xcc)
332 (knd-charseq #xce #xcf)
333 (knd-charseq #xd1 #xd2)
334 (knd-charseq #xd4 #xd5)
335 (knd-charseq #xd7 #xd8)
336 (knd-charseq #xc3))
337 "Kannada Consonants/Vowels/Nukta Glyphs")
338
339 (defvar knd-glyph-space
340 (knd-charseq #xb3 #xb4)
341 "Kannada Spacing Glyphs")
342
343 (defvar knd-glyph-right-modifier
344 (append
345 (knd-charseq #xdb #xdd)
346 (knd-charseq #xdf)
347 (knd-charseq #xe0 #xe3)
348 (knd-charseq #xe9))
349 "Kannada Modifiers attached at the right side.")
350
351 (defvar knd-glyph-right-modifier-regexp
352 (concat "[" knd-glyph-right-modifier "]"))
353
354 (defvar knd-glyph-jha-tail
355 (knd-charseq #x68)
356 "Kannada tail for jha.")
357
358 (defvar knd-glyph-top-matra
359 (append
360 (knd-charseq #xda)
361 (knd-charseq #xdd)
362 (knd-charseq #xe6)
363 (knd-charseq #xe8))
364 "Kannada Matras attached at the top side.")
365
366 (defvar knd-glyph-bottom-matra
367 (append
368 (knd-charseq #xe4 #xe5)
369 (knd-charseq #xe7))
370 "Kannada Matras attached at the bottom.")
371
372 (defvar knd-glyph-end-marks
373 (append
374 (knd-charseq #x25)
375 (knd-charseq #x4d #x4e)
376 (knd-charseq #xde))
377 "Kannada end marks: arkavattu, virama, au and diirghaa.")
378
379 (defvar knd-glyph-bottom-modifier
380 (append
381 (knd-charseq #x51)
382 (knd-charseq #x55)
383 (knd-charseq #x58)
384 (knd-charseq #x5b)
385 (knd-charseq #x5d)
386 (knd-charseq #x60)
387 (knd-charseq #x63)
388 (knd-charseq #x67)
389 (knd-charseq #x69)
390 (knd-charseq #x6b)
391 (knd-charseq #x6f)
392 (knd-charseq #x72)
393 (knd-charseq #x75)
394 (knd-charseq #x78)
395 (knd-charseq #x7c)
396 (knd-charseq #xa1)
397 (knd-charseq #xa4)
398 (knd-charseq #xa7)
399 (knd-charseq #xaa)
400 (knd-charseq #xad)
401 (knd-charseq #xb0)
402 (knd-charseq #xb5)
403 (knd-charseq #xb9)
404 (knd-charseq #xbc #xbd)
405 (knd-charseq #xc0)
406 (knd-charseq #xc3)
407 (knd-charseq #xc7)
408 (knd-charseq #xca)
409 (knd-charseq #xcd)
410 (knd-charseq #xd0)
411 (knd-charseq #xd3)
412 (knd-charseq #xd6)
413 (knd-charseq #xd9)
414 (knd-charseq #xea #xef))
415 "Kannada Modifiers attached at the bottom.")
416
417 (defvar knd-glyph-order
418 `((,knd-glyph-cv . 1)
419 (,knd-glyph-top-matra . 2)
420 (,knd-glyph-jha-tail . 3)
421 (,knd-glyph-right-modifier . 4)
422 (,knd-glyph-space . 5)
423 (,knd-glyph-bottom-modifier . 5)
424 (,knd-glyph-bottom-matra . 6)
425 (,knd-glyph-end-marks . 7)
426 ))
427
428 (mapc
429 (function (lambda (x)
430 (mapc
431 (function (lambda (y)
432 (put-char-code-property y 'composition-order (cdr x))))
433 (car x))))
434 knd-glyph-order)
435
436 (defun kannada-compose-syllable-string (string)
437 (with-temp-buffer
438 (insert (decompose-string string))
439 (kannada-compose-syllable-region (point-min) (point-max))
440 (buffer-string)))
441
442 ;; kch
443 (defun kannada-compose-syllable-region (from to)
444 "Compose kannada syllable in region FROM to TO."
445 (let ((glyph-str nil) (cons-num 0) (glyph-str-list nil)
446 (last-virama nil) (preceding-r nil) (last-modifier nil)
447 (last-char (char-before to)) match-str pos
448 glyph-block split-pos (conj nil) (rest nil))
449 (save-excursion
450 (save-restriction
451 ;;; *** char-to-glyph conversion ***
452 ;; Special rule 1. -- Last virama must be preserved.
453 (if (eq last-char ?$,1?M(B)
454 (progn
455 (setq last-virama t)
456 (narrow-to-region from (1- to)))
457 (narrow-to-region from to))
458 (goto-char (point-min))
459 ;; Special rule 2. -- preceding "r virama" must be modifier.
460 (when (looking-at "$,1?0?M(B.")
461 (setq preceding-r t)
462 (goto-char (+ 2 (point))))
463 ;; remove conjunct consonants
464 (while (re-search-forward knd-char-glyph-regexp nil t)
465 (setq match-str (match-string 0))
466 (if (and (string-match kannada-consonant match-str)
467 (> cons-num 0))
468 (progn
469 (setq conj (concat conj (gethash (match-string 0 match-str)
470 knd-conjunct-glyph-hash)))
471 (setq match-str (replace-match "" t nil match-str))
472 (if (string-match "$,1?M(B" rest)
473 (setq rest (replace-match "" t nil rest)))))
474 (setq rest (concat rest match-str))
475 ;; count the number of consonant-glyhs.
476 (if (string-match kannada-consonant match-str)
477 (setq cons-num (1+ cons-num))))
478 ;; translate the rest characters into glyphs
479 (setq pos 0)
480 (while (string-match knd-char-glyph-regexp rest pos)
481 (setq match-str (match-string 0 rest))
482 (setq pos (match-end 0))
483 (setq glyph-str
484 (concat glyph-str (gethash match-str knd-char-glyph-hash))))
485
486 (if conj (setq glyph-str (concat glyph-str conj)))
487 (if last-virama (setq glyph-str (concat glyph-str "$,45)(B"))
488 (goto-char (point-min))
489 (if (re-search-forward kannada-consonant-needs-twirl nil t)
490 (progn
491 (setq match-str (match-string 0))
492 (setq glyph-str (concat glyph-str "$,44z(B")))))
493 ;; preceding-r must be attached
494 (if preceding-r
495 (setq glyph-str (concat glyph-str "$,43%(B")))
496 ;;; *** glyph-to-glyph conversion ***
497 (when (string-match knd-glyph-glyph-regexp glyph-str)
498 (setq glyph-str
499 (replace-match (gethash (match-string 0 glyph-str)
500 knd-glyph-glyph-hash)
501 nil t glyph-str)))
502 ;;; *** glyph reordering ***
503 (while (setq split-pos (string-match "$,45)(B\\|.$" glyph-str))
504 (setq glyph-block (substring glyph-str 0 (1+ split-pos)))
505 (setq glyph-str (substring glyph-str (1+ split-pos)))
506 (setq
507 glyph-block
508 (sort (string-to-list glyph-block)
509 (function (lambda (x y)
510 (< (get-char-code-property x 'composition-order)
511 (get-char-code-property y 'composition-order))))))
512 (setq glyph-str-list (nconc glyph-str-list glyph-block)))
513 ;;; *** insert space glyphs for kerning ***
514 (if (> cons-num 0)
515 (let ((curr glyph-str-list) (prev nil) (last-bott nil) bott co)
516 (while curr
517 (setq co (get-char-code-property
518 (car curr) 'composition-order)
519 bott (or (eq co 5) (eq co 6)))
520 (if (and bott last-bott)
521 (setcdr prev (cons ?$,44T(B curr)))
522 (setq last-bott bott prev curr curr (cdr curr)))))
523 ;; concatenate and attach reference-points.
524 (setq glyph-str
525 (cdr
526 (apply
527 'nconc
528 (mapcar
529 (function (lambda (x)
530 (list
531 (or (get-char-code-property x 'reference-point)
532 '(5 . 3) ;; default reference point.
533 )
534 x)))
535 glyph-str-list))))))
536 (compose-region from to glyph-str)))
537
538 (provide 'knd-util)
539
540 ;;; knd-util.el ends here