89909
|
1 ;;; knd-util.el --- Support for composing Kannada characters
|
|
2
|
|
3 ;; Copyright (C) 2003 Free Software Foundation, Inc.
|
|
4
|
|
5 ;; Maintainer: Maintainer: CHOWKSEY, Kailash C. <klchxbec@m-net.arbornet.org>
|
|
6 ;; Keywords: multilingual, Kannada
|
|
7
|
|
8 ;; This file is part of GNU Emacs.
|
|
9
|
|
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
11 ;; it under the terms of the GNU General Public License as published by
|
|
12 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
13 ;; any later version.
|
|
14
|
|
15 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
18 ;; GNU General Public License for more details.
|
|
19
|
|
20 ;; You should have received a copy of the GNU General Public License
|
|
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
23 ;; Boston, MA 02111-1307, USA.
|
|
24
|
|
25 ;; Created: Jul. 14. 2003
|
|
26
|
|
27 ;;; Commentary:
|
|
28
|
|
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
|
|
30 ;; composition of Kannada script characters.
|
|
31
|
|
32 ;;; Code:
|
|
33
|
|
34 ;;;###autoload
|
|
35
|
|
36 ;; Kannada Composable Pattern
|
|
37 ;; C .. Consonants
|
|
38 ;; V .. Vowel
|
|
39 ;; H .. Virama
|
|
40 ;; M .. Matra
|
|
41 ;; V .. Vowel
|
|
42 ;; (N .. Zerowidth Non Joiner)
|
|
43 ;; (J .. Zerowidth Joiner. )
|
|
44 ;; 1. vowel
|
|
45 ;; V(A)?
|
|
46 ;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya)
|
|
47 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?)?
|
|
48
|
|
49 (defconst kannada-consonant
|
|
50 "[$,1>u(B-$,1?9(B]")
|
|
51
|
|
52 (defconst kannada-consonant-needs-twirl
|
|
53 "[$,1>u>w(B-$,1>{>}(B-$,1>~? (B-$,1?"?$(B-$,1?+?-?0?3(B-$,1?9(B]\\($,1?M(B[$,1>u(B-$,1?9(B]\\)*[$,1?A?B?C?D>b(B]?$")
|
|
54
|
|
55 (defconst kannada-composable-pattern
|
|
56 (concat
|
|
57 "\\([$,1>b(B-$,1>t?`>l(B]\\)\\|[$,1>c(B]"
|
|
58 "\\|\\("
|
|
59 "\\(?:\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?\\(?:[$,1>u(B-$,1?9(B]$,1?M(B\\)?[$,1>u(B-$,1?9(B]$,1?M(B\\)?"
|
|
60 "[$,1>u(B-$,1?9(B]\\(?:$,1?M(B\\|[$,1?>(B-$,1?M?U?C(B]?\\)?"
|
|
61 "\\)")
|
|
62 "Regexp matching a composable sequence of Kannada characters.")
|
|
63
|
|
64 ;;;###autoload
|
|
65 (defun kannada-compose-region (from to)
|
|
66 (interactive "r")
|
|
67 (save-excursion
|
|
68 (save-restriction
|
|
69 (narrow-to-region from to)
|
|
70 (goto-char (point-min))
|
|
71 (while (re-search-forward kannada-composable-pattern nil t)
|
|
72 (kannada-compose-syllable-region (match-beginning 0)
|
|
73 (match-end 0))))))
|
|
74 ;;;###autoload
|
|
75 (defun kannada-compose-string (string)
|
|
76 (with-temp-buffer
|
|
77 (insert (decompose-string string))
|
|
78 (kannada-compose-region (point-min) (point-max))
|
|
79 (buffer-string)))
|
|
80
|
|
81 ;;;###autoload
|
|
82 (defun kannada-post-read-conversion (len)
|
|
83 (save-excursion
|
|
84 (save-restriction
|
|
85 (let ((buffer-modified-p (buffer-modified-p)))
|
|
86 (narrow-to-region (point) (+ (point) len))
|
|
87 (kannada-compose-region (point-min) (point-max))
|
|
88 (set-buffer-modified-p buffer-modified-p)
|
|
89 (- (point-max) (point-min))))))
|
|
90
|
|
91 (defun kannada-range (from to)
|
|
92 "Make the list of the integers of range FROM to TO."
|
|
93 (let (result)
|
|
94 (while (<= from to) (setq result (cons to result) to (1- to))) result))
|
|
95
|
|
96 (defun kannada-regexp-of-hashtbl-keys (hashtbl)
|
|
97 "Return a regular expression that matches all keys in hashtable HASHTBL."
|
|
98 (let ((max-specpdl-size 1000))
|
|
99 (regexp-opt
|
|
100 (sort
|
|
101 (let (dummy)
|
|
102 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
|
|
103 dummy)
|
|
104 (function (lambda (x y) (> (length x) (length y))))))))
|
|
105
|
|
106 (defun kannada-regexp-of-hashtbl-vals (hashtbl)
|
|
107 "Return a regular expression that matches all values in hashtable HASHTBL."
|
|
108 (let ((max-specpdl-size 1000))
|
|
109 (regexp-opt
|
|
110 (sort
|
|
111 (let (dummy)
|
|
112 (maphash (function (lambda (key val) (setq dummy (cons val dummy)))) hashtbl)
|
|
113 dummy)
|
|
114 (function (lambda (x y) (> (length x) (length y))))))))
|
|
115
|
|
116 ;;;###autoload
|
|
117 (defun kannada-composition-function (pos &optional string)
|
|
118 "Compose Kannada characters after the position POS.
|
|
119 If STRING is not nil, it is a string, and POS is an index to the string.
|
|
120 In this case, compose characters after POS of the string."
|
|
121 (if string
|
|
122 ;; Not yet implemented.
|
|
123 nil
|
|
124 (goto-char pos)
|
|
125 (if (looking-at kannada-composable-pattern)
|
|
126 (prog1 (match-end 0)
|
|
127 (kannada-compose-syllable-region pos (match-end 0))))))
|
|
128
|
|
129 ;; Notes on conversion steps.
|
|
130
|
|
131 ;; 1. chars to glyphs
|
|
132 ;;
|
|
133 ;; Rules will not be applied to the virama appeared at the end of the
|
|
134 ;; text. Also, the preceding/following "r" will be treated as special case.
|
|
135
|
|
136 ;; 2. glyphs reordering.
|
|
137 ;;
|
|
138 ;; The glyphs are split by virama, and each glyph groups are
|
|
139 ;; re-ordered in the following order.
|
|
140 ;;
|
|
141 ;; Note that `consonant-glyph' mentioned here does not contain the
|
|
142 ;; vertical bar (right modifier) attached at the right of the
|
|
143 ;; consonant.
|
|
144 ;;
|
|
145 ;; If the glyph-group contains right modifier,
|
|
146 ;; (1) consonant-glyphs/vowels
|
|
147 ;; (2) spacing
|
|
148 ;; (3) right modifier (may be matra)
|
|
149 ;; (4) top matra
|
|
150 ;; (5) preceding "r"
|
|
151 ;; (7) following "r"
|
|
152 ;; (8) bottom matra or virama.
|
|
153 ;;
|
|
154 ;; Otherwise,
|
|
155 ;; (1) consonant-glyph/vowels, with nukta sign
|
|
156 ;; (3) left matra
|
|
157 ;; (4) top matra
|
|
158 ;; (5) preceding "r"
|
|
159 ;; (7) following "r"
|
|
160 ;; (8) bottom matra or virama.
|
|
161 ;; (2) spacing
|
|
162
|
|
163 ;; 3. glyph to glyph
|
|
164 ;;
|
|
165 ;; For better display, some glyph display would be tuned.
|
|
166
|
|
167 ;; 4. Composition.
|
|
168 ;;
|
|
169 ;; left modifiers will be attached at the left.
|
|
170 ;; others will be attached right.
|
|
171
|
|
172 ;; Problem::
|
|
173 ;; Can we generalize this methods to other Indian scripts?
|
|
174
|
|
175 (defvar knd-char-glyph
|
|
176 '(("$,1>e(B" . "$,43@(B")
|
|
177 ("$,1>f(B" . "$,43A(B")
|
|
178 ("$,1?>(B" . "$,44{(B")
|
|
179 ("$,1>g(B" . "$,43B(B")
|
|
180 ("$,1??(B" . nil)
|
|
181 ("$,1>h(B" . "$,43C(B")
|
|
182 ("$,1?@(B" . nil)
|
|
183 ("$,1>i(B" . "$,43D(B")
|
|
184 ("$,1?A(B" . "\$,44(B")
|
|
185 ("$,1>j(B" . "$,43E(B")
|
|
186 ("$,1?B(B" . "\$,45 (B")
|
|
187 ("$,1>k(B" . "$,43F4(B")
|
|
188 ("$,1?C(B" . "\$,45$(B")
|
|
189 ("$,1?`(B" . "$,43F5 (B")
|
|
190 ("$,1?D(B" . "\$,45%(B")
|
|
191 ;;("$,1>l(B" . nil) ; not implemented.
|
|
192 ;;("$,1?a(B" . nil)
|
|
193 ("$,1>n(B" . "$,43G(B")
|
|
194 ("$,1>o(B" . "$,43H(B")
|
|
195 ("$,1>p(B" . "$,43I(B")
|
|
196 ("$,1?F(B" . "\$,45&(B")
|
|
197 ("$,1?G(B" . "\$,45&4~(B")
|
|
198 ("$,1?H(B" . "\$,45&5'(B")
|
|
199 ("$,1>r(B" . "$,43J(B")
|
|
200 ("$,1?J(B" . "$,45&5 (B")
|
|
201 ("$,1>s(B" . "$,43K(B")
|
|
202 ("$,1?K(B" . "\$,45&5 4~(B")
|
|
203 ("$,1>t(B" . "$,43L(B")
|
|
204 ("$,1?L(B" . "\$,45((B")
|
|
205 ("$,1>b(B" . "$,43M(B")
|
|
206 ("$,1>c(B" . "$,43N(B")
|
|
207 ("$,1>u?M(B" . "$,43O5)(B") ("$,1>u(B" . "$,43O(B") ("$,1>u??(B" . "$,43P(B") ("$,1>u?@(B" . "$,43P4~(B")
|
|
208 ("$,1>v?M(B" . "$,43S5)(B") ("$,1>v(B" . "$,43S(B") ("$,1>v??(B" . "$,43T(B") ("$,1>v?@(B" . "$,43T4~(B") ("$,1>v?F(B" . "$,43S5&(B") ("$,1>v?G(B" . "$,43S5&4~(B") ("$,1>v?H(B" . "$,43S5&5'(B") ("$,1>v?J(B" . "$,43S5&5&5 (B") ("$,1>v?K(B" . "$,43S5&5&5 4~(B") ("$,1>v?L(B" . "$,43S5((B")
|
|
209 ("$,1>w?M(B" . "$,43V5)(B") ("$,1>w(B" . "$,43V(B") ("$,1>w??(B" . "$,43W(B") ("$,1>w?@(B" . "$,43W4~(B")
|
|
210 ("$,1>x?M(B" . "$,43Y5)(B") ("$,1>x(B" . "$,43Y(B") ("$,1>x??(B" . "$,43Z(B") ("$,1>x?@(B" . "$,43Z4~(B")
|
|
211 ("$,1>y?M(B" . "$,43\5)(B") ("$,1>y(B" . "$,43\(B")
|
|
212 ("$,1>z?M(B" . "$,43^5)(B") ("$,1>z(B" . "$,43^(B") ("$,1>z??(B" . "$,43_(B") ("$,1>z?@(B" . "$,43_4~(B")
|
|
213 ("$,1>{?M(B" . "$,43a5)(B") ("$,1>{(B" . "$,43a(B") ("$,1>{??(B" . "$,43b(B") ("$,1>{?@(B" . "$,43b4~(B")
|
|
214 ("$,1>|?M(B" . "$,43d5)(B") ("$,1>|(B" . "$,43d(B") ("$,1>|??(B" . "$,43f(B") ("$,1>|?@(B" . "$,43f4~(B") ("$,1>|?F(B" . "$,43e5&(B") ("$,1>|?G(B" . "$,43e5&4~(B") ("$,1>|?H(B" . "$,43e5&5'(B") ("$,1>|?J(B" . "$,43e5&5&5 (B") ("$,1>|?K(B" . "$,43e5&5&5 4~(B") ("$,1>|?L(B" . "$,43e5((B")
|
|
215 ("$,1>}?M(B" . "$,44a4z3h45)(B") ("$,1>}(B" . "$,44a4z3h4(B") ("$,1>}??(B" . "$,44b3h4(B") ("$,1>}?@(B" . "$,44b3h44~(B") ("$,1>}?B(B". "$,44a4z3h5 (B") ("$,1>}?J(B". "$,44a5&3h5 (B") ("$,1>}?K(B". "$,44a5&3h5 4~(B")
|
|
216 ("$,1>~?M(B" . "$,43j5)(B") ("$,1>~(B" . "$,43j(B")
|
|
217 ("$,1>?M(B" . "$,43m5)(B") ("$,1>(B" . "$,43l(B") ("$,1?#?>(B" . "$,43m4{(B") ("$,1>??(B" . "$,43n(B") ("$,1>?@(B" . "$,43n4~(B") ("$,1>?F(B" . "$,43m5&(B") ("$,1>?G(B" . "$,43m5&4~(B") ("$,1>?H(B" . "$,43m5&5'(B") ("$,1>?J(B" . "$,43m5&5&5 (B") ("$,1>?K(B" . "$,43m5&5&5 4~(B") ("$,1>?L(B" . "$,43m5((B")
|
|
218 ("$,1? ?M(B" . "$,43p5)(B") ("$,1? (B" . "$,43p(B") ("$,1? ??(B" . "$,43q(B") ("$,1? ?@(B" . "$,43q4~(B")
|
|
219 ("$,1?!?M(B" . "$,43s5)(B") ("$,1?!(B" . "$,43s(B") ("$,1?!??(B" . "$,43t(B") ("$,1?!?@(B" . "$,43t4~(B")
|
|
220 ("$,1?"?M(B" . "$,43v5)(B") ("$,1?"(B" . "$,43v(B") ("$,1?"??(B" . "$,43w(B") ("$,1?"?@(B" . "$,43w4~(B")
|
|
221 ("$,1?#?M(B" . "$,43z5)(B") ("$,1?#(B" . "$,43y(B") ("$,1?#?>(B" . "$,43z4{(B") ("$,1?#??(B" . "$,43{(B") ("$,1?#?@(B" . "$,43{4~(B") ("$,1?#?F(B" . "$,43z5&(B") ("$,1?#?G(B" . "$,43z5&4~(B") ("$,1?#?H(B" . "$,43z5&5'(B") ("$,1?#?J(B" . "$,43z5&5&5 (B") ("$,1?#?K(B" . "$,43z5&5&5 4~(B") ("$,1?#?L(B" . "$,43z5((B")
|
|
222 ("$,1?$?M(B" . "$,43}5)(B") ("$,1?$(B" . "$,43}(B") ("$,1?$??(B" . "$,43~(B") ("$,1?$?@(B" . "$,43~4~(B")
|
|
223 ("$,1?%?M(B" . "$,44B5)(B") ("$,1?%(B" . "$,44B(B") ("$,1?%??(B" . "$,44C(B") ("$,1?%?@(B" . "$,44C4~(B")
|
|
224 ("$,1?&?M(B" . "$,44E5)(B") ("$,1?&(B" . "$,44E(B") ("$,1?&??(B" . "$,44F(B") ("$,1?&?@(B" . "$,44F4~(B")
|
|
225 ("$,1?'?M(B" . "$,44H5)(B") ("$,1?'(B" . "$,44H(B") ("$,1?'??(B" . "$,44I(B") ("$,1?'?@(B" . "$,44I4~(B")
|
|
226 ("$,1?(?M(B" . "$,44K5)(B") ("$,1?((B" . "$,44K(B") ("$,1?(??(B" . "$,44L(B") ("$,1?(?@(B" . "$,44L4~(B")
|
|
227 ("$,1?*?M(B" . "$,44N5)(B") ("$,1?*(B" . "$,44N(B") ("$,1?*??(B" . "$,44O(B") ("$,1?*?@(B" . "$,44O4~(B") ("$,1?*?A(B" . "$,44N5"(B") ("$,1?*?B(B" . "$,44N5#(B") ("$,1?*?J(B" . "$,44N5&5#(B") ("$,1?*?K(B" . "$,44N5&5#4~(B")
|
|
228 ("$,1?+?M(B" . "$,44Q5)(B") ("$,1?+(B" . "$,44Q(B") ("$,1?+??(B" . "$,44R(B") ("$,1?+?@(B" . "$,44R4~(B") ("$,1?+?A(B" . "$,44Q5"(B") ("$,1?+?B(B" . "$,44Q5#(B") ("$,1?+?J(B" . "$,44Q5&5#(B") ("$,1?+?K(B" . "$,44Q5&5#4~(B")
|
|
229 ("$,1?,?M(B" . "$,44W5)(B") ("$,1?,(B" . "$,44V(B") ("$,1?,?>(B". "$,44W4{(B") ("$,1?,??(B" . "$,44X(B") ("$,1?,?@(B" . "$,44X4~(B") ("$,1?,?F(B" . "$,44W5&(B") ("$,1?,?G(B" . "$,44W5&4~(B") ("$,1?,?H(B" . "$,44W5&5'(B") ("$,1?,?J(B" . "$,44W5&5&5 (B") ("$,1?,?K(B" . "$,44W5&5&5 4~(B") ("$,1?,?L(B" . "$,44W5((B")
|
|
230 ("$,1?-?M(B" . "$,44Z5)(B") ("$,1?-(B" . "$,44Z(B") ("$,1?-??(B" . "$,44[(B") ("$,1?-?@(B" . "$,44[4~(B")
|
|
231 ("$,1?.?M(B" . "$,44h5!5)(B") ("$,1?.(B" . "$,44h4z4(B") ("$,1?.?>(B" . "$,44h4z5!4{(B") ("$,1?.??(B" . "$,44i4(B") ("$,1?.?@(B" . "$,44i44~(B") ("$,1?.?J(B". "$,44h5&5 (B") ("$,1?.?K(B". "$,44h5&5 4~(B")
|
|
232 ("$,1?/?M(B" . "$,44^4z5!5)(B") ("$,1?/(B" . "$,44^4z4(B") ("$,1?/?>(B" . "$,44^4z5!4{(B")("$,1?/??(B" . "$,44_4(B") ("$,1?/?@(B" . "$,44_44~(B") ("$,1?/?J(B" . "$,44^5&5 (B") ("$,1?/?K(B" . "$,44^5&5 4~(B")
|
|
233 ("$,1?0?M(B" . "$,44a5)(B") ("$,1?0(B" . "$,44a(B") ("$,1?0??(B" . "$,44b(B") ("$,1?0?@(B" . "$,44b4~(B")
|
|
234 ("$,1?0?M(B" . "$,44a5)(B") ("$,1?0(B" . "$,44a(B") ("$,1?0??(B" . "$,44b(B") ("$,1?0?@(B" . "$,44b4~(B")
|
|
235 ("$,1?2?M(B" . "$,44e5)(B") ("$,1?2(B" . "$,44d(B") ("$,1?2?>(B" . "$,44e4{(B") ("$,1?2??(B" . "$,44f(B") ("$,1?2?@(B" . "$,44f4~(B") ("$,1?2?F(B" . "$,44e5&(B") ("$,1?2?G(B" . "$,44e5&4~(B") ("$,1?2?H(B" . "$,44e5&5'(B") ("$,1?2?J(B" . "$,44e5&5&5 (B") ("$,1?2?K(B" . "$,44e5&5&5 4~(B") ("$,1?2?L(B" . "$,44e5((B")
|
|
236 ("$,1?5?M(B" . "$,44h5)(B") ("$,1?5(B" . "$,44h(B") ("$,1?5??(B" . "$,44i(B") ("$,1?5?@(B" . "$,44i4~(B") ("$,1?5?A(B" . "$,44h5"(B") ("$,1?5?B(B" . "$,44h5#(B") ("$,1?5?J(B" . "$,44h5&5#(B") ("$,1?5?K(B" . "$,44h5&5#4~(B")
|
|
237 ("$,1?6?M(B" . "$,44k5)(B") ("$,1?6(B" . "$,44k(B") ("$,1?6??(B" . "$,44l(B") ("$,1?6?@(B" . "$,44l4~(B")
|
|
238 ("$,1?7?M(B" . "$,44n5)(B") ("$,1?7(B" . "$,44n(B") ("$,1?7??(B" . "$,44o(B") ("$,1?7?@(B" . "$,44o4~(B")
|
|
239 ("$,1?8?M(B" . "$,44q5)(B") ("$,1?8(B" . "$,44q(B") ("$,1?8??(B" . "$,44r(B") ("$,1?8?@(B" . "$,44r4~(B")
|
|
240 ("$,1?9?M(B" . "$,44t5)(B") ("$,1?9(B" . "$,44t(B") ("$,1?9??(B" . "$,44u(B") ("$,1?9?@(B" . "$,44u4~(B")
|
|
241 ("$,1?3?M(B" . "$,44w5)(B") ("$,1?3(B" . "$,44w(B") ("$,1?3??(B" . "$,44x(B") ("$,1?3?@(B" . "$,44x4~(B"))
|
|
242 "Kannada characters to glyphs conversion table.
|
|
243 Default value contains only the basic rules.")
|
|
244
|
|
245 (defvar knd-char-glyph-hash
|
|
246 (let* ((hash (make-hash-table :test 'equal)))
|
|
247 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
|
|
248 knd-char-glyph)
|
|
249 hash))
|
|
250
|
|
251 (defvar knd-char-glyph-regexp
|
|
252 (kannada-regexp-of-hashtbl-keys knd-char-glyph-hash))
|
|
253
|
|
254 (defvar knd-conjunct-glyph
|
|
255 '(("$,1>u(B" . "$,43Q(B") ("$,1>v(B" . "$,43U(B") ("$,1>w(B" . "$,43X(B") ("$,1>x(B" . "$,43[(B") ("$,1>y(B" . "$,43](B")
|
|
256 ("$,1>z(B" . "$,43`(B") ("$,1>{(B" . "$,43c(B") ("$,1>|(B" . "$,43g(B") ("$,1>}(B" . "$,43i(B") ("$,1>~(B" . "$,43k(B")
|
|
257 ("$,1>(B" . "$,43o(B") ("$,1? (B" . "$,43r(B") ("$,1?!(B" . "$,43u(B") ("$,1?"(B" . "$,43x(B") ("$,1?#(B" . "$,43|(B")
|
|
258 ("$,1?$(B" . "$,44A(B") ("$,1?%(B" . "$,44D(B") ("$,1?&(B" . "$,44G(B") ("$,1?'(B" . "$,44J(B") ("$,1?((B" . "$,44M(B")
|
|
259 ("$,1?*(B" . "$,44P(B") ("$,1?+(B" . "$,44U(B") ("$,1?,(B" . "$,44Y(B") ("$,1?-(B" . "$,44\(B") ("$,1?.(B" . "$,44](B")
|
|
260 ("$,1?/(B" . "$,44`(B") ("$,1?0(B" . "$,44c(B") ("$,1?2(B" . "$,44g(B") ("$,1?3(B" . "$,44y(B") ("$,1?5(B" . "$,44j(B")
|
|
261 ("$,1?6(B" . "$,44m(B") ("$,1?7(B" . "$,44p(B") ("$,1?8(B" . "$,44s(B") ("$,1?9(B" . "$,44v(B"))
|
|
262 "Kannada characters to conjunct glyphs conversion table.")
|
|
263
|
|
264 (defvar knd-conjunct-glyph-hash
|
|
265 (let* ((hash (make-hash-table :test 'equal)))
|
|
266 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
|
|
267 knd-conjunct-glyph)
|
|
268 hash))
|
|
269
|
|
270 (defvar knd-conjunct-glyph-regexp
|
|
271 (kannada-regexp-of-hashtbl-vals knd-conjunct-glyph-hash))
|
|
272
|
|
273 (mapc
|
|
274 (function (lambda (x)
|
|
275 (put-char-code-property (aref (cdr x) 0) 'reference-point '(5 . 3))))
|
|
276 knd-conjunct-glyph)
|
|
277
|
|
278 ;; glyph-to-glyph conversion table.
|
|
279 ;; it is supposed that glyphs are ordered in
|
|
280 ;; [consonant/nukta] - [matra/virama] - [preceding-r] - [anuswar].
|
|
281
|
|
282 (defvar knd-glyph-glyph
|
|
283 '(("$,45$4A(B" . "$,45*(B")
|
|
284 ("$,45'4A(B" . "$,45+(B")
|
|
285 ("$,44A3g(B" . "$,45,(B")
|
|
286 ("$,45$3Q(B" . "$,45-(B")))
|
|
287
|
|
288 (defvar knd-glyph-glyph-hash
|
|
289 (let* ((hash (make-hash-table :test 'equal)))
|
|
290 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash)))
|
|
291 knd-glyph-glyph)
|
|
292 hash))
|
|
293 (defvar knd-glyph-glyph-regexp
|
|
294 (kannada-regexp-of-hashtbl-keys knd-glyph-glyph-hash))
|
|
295
|
|
296 (defun knd-charseq (from &optional to)
|
|
297 (if (null to) (setq to from))
|
|
298 (number-sequence (decode-char 'kannada-cdac from)
|
|
299 (decode-char 'kannada-cdac to)))
|
|
300
|
|
301 (defvar knd-glyph-cv
|
|
302 (append
|
|
303 (knd-charseq #x40 #x50)
|
|
304 (knd-charseq #x52 #x54)
|
|
305 (knd-charseq #x56 #x57)
|
|
306 (knd-charseq #x59 #x5a)
|
|
307 (knd-charseq #x5c)
|
|
308 (knd-charseq #x5e #x5f)
|
|
309 (knd-charseq #x61 #x62)
|
|
310 (knd-charseq #x64 #x66)
|
|
311 (knd-charseq #x6a)
|
|
312 (knd-charseq #x6c #x6e)
|
|
313 (knd-charseq #x70 #x71)
|
|
314 (knd-charseq #x73 #x74)
|
|
315 (knd-charseq #x76 #x77)
|
|
316 (knd-charseq #x79 #x7b)
|
|
317 (knd-charseq #x7d #x7e)
|
|
318 (knd-charseq #xa2 #xa3)
|
|
319 (knd-charseq #xa5 #xa6)
|
|
320 (knd-charseq #xa8 #xa9)
|
|
321 (knd-charseq #xab #xac)
|
|
322 (knd-charseq #xae #xaf)
|
|
323 (knd-charseq #xb1 #xb2)
|
|
324 (knd-charseq #xb6 #xb8)
|
|
325 (knd-charseq #xb6 #xb8)
|
|
326 (knd-charseq #xba #xbb)
|
|
327 (knd-charseq #xbe #xbf)
|
|
328 (knd-charseq #xc1 #xc2)
|
|
329 (knd-charseq #xc4 #xc6)
|
|
330 (knd-charseq #xc8 #xc9)
|
|
331 (knd-charseq #xcb #xcc)
|
|
332 (knd-charseq #xce #xcf)
|
|
333 (knd-charseq #xd1 #xd2)
|
|
334 (knd-charseq #xd4 #xd5)
|
|
335 (knd-charseq #xd7 #xd8)
|
|
336 (knd-charseq #xc3))
|
|
337 "Kannada Consonants/Vowels/Nukta Glyphs")
|
|
338
|
|
339 (defvar knd-glyph-space
|
|
340 (knd-charseq #xb3 #xb4)
|
|
341 "Kannada Spacing Glyphs")
|
|
342
|
|
343 (defvar knd-glyph-right-modifier
|
|
344 (append
|
|
345 (knd-charseq #xdb #xdd)
|
|
346 (knd-charseq #xdf)
|
|
347 (knd-charseq #xe0 #xe3)
|
|
348 (knd-charseq #xe9))
|
|
349 "Kannada Modifiers attached at the right side.")
|
|
350
|
|
351 (defvar knd-glyph-right-modifier-regexp
|
|
352 (concat "[" knd-glyph-right-modifier "]"))
|
|
353
|
|
354 (defvar knd-glyph-jha-tail
|
|
355 (knd-charseq #x68)
|
|
356 "Kannada tail for jha.")
|
|
357
|
|
358 (defvar knd-glyph-top-matra
|
|
359 (append
|
|
360 (knd-charseq #xda)
|
|
361 (knd-charseq #xdd)
|
|
362 (knd-charseq #xe6)
|
|
363 (knd-charseq #xe8))
|
|
364 "Kannada Matras attached at the top side.")
|
|
365
|
|
366 (defvar knd-glyph-bottom-matra
|
|
367 (append
|
|
368 (knd-charseq #xe4 #xe5)
|
|
369 (knd-charseq #xe7))
|
|
370 "Kannada Matras attached at the bottom.")
|
|
371
|
|
372 (defvar knd-glyph-end-marks
|
|
373 (append
|
|
374 (knd-charseq #x25)
|
|
375 (knd-charseq #x4d #x4e)
|
|
376 (knd-charseq #xde))
|
|
377 "Kannada end marks: arkavattu, virama, au and diirghaa.")
|
|
378
|
|
379 (defvar knd-glyph-bottom-modifier
|
|
380 (append
|
|
381 (knd-charseq #x51)
|
|
382 (knd-charseq #x55)
|
|
383 (knd-charseq #x58)
|
|
384 (knd-charseq #x5b)
|
|
385 (knd-charseq #x5d)
|
|
386 (knd-charseq #x60)
|
|
387 (knd-charseq #x63)
|
|
388 (knd-charseq #x67)
|
|
389 (knd-charseq #x69)
|
|
390 (knd-charseq #x6b)
|
|
391 (knd-charseq #x6f)
|
|
392 (knd-charseq #x72)
|
|
393 (knd-charseq #x75)
|
|
394 (knd-charseq #x78)
|
|
395 (knd-charseq #x7c)
|
|
396 (knd-charseq #xa1)
|
|
397 (knd-charseq #xa4)
|
|
398 (knd-charseq #xa7)
|
|
399 (knd-charseq #xaa)
|
|
400 (knd-charseq #xad)
|
|
401 (knd-charseq #xb0)
|
|
402 (knd-charseq #xb5)
|
|
403 (knd-charseq #xb9)
|
|
404 (knd-charseq #xbc #xbd)
|
|
405 (knd-charseq #xc0)
|
|
406 (knd-charseq #xc3)
|
|
407 (knd-charseq #xc7)
|
|
408 (knd-charseq #xca)
|
|
409 (knd-charseq #xcd)
|
|
410 (knd-charseq #xd0)
|
|
411 (knd-charseq #xd3)
|
|
412 (knd-charseq #xd6)
|
|
413 (knd-charseq #xd9)
|
|
414 (knd-charseq #xea #xef))
|
|
415 "Kannada Modifiers attached at the bottom.")
|
|
416
|
|
417 (defvar knd-glyph-order
|
|
418 `((,knd-glyph-cv . 1)
|
|
419 (,knd-glyph-top-matra . 2)
|
|
420 (,knd-glyph-jha-tail . 3)
|
|
421 (,knd-glyph-right-modifier . 4)
|
|
422 (,knd-glyph-space . 5)
|
|
423 (,knd-glyph-bottom-modifier . 5)
|
|
424 (,knd-glyph-bottom-matra . 6)
|
|
425 (,knd-glyph-end-marks . 7)
|
|
426 ))
|
|
427
|
|
428 (mapc
|
|
429 (function (lambda (x)
|
|
430 (mapc
|
|
431 (function (lambda (y)
|
|
432 (put-char-code-property y 'composition-order (cdr x))))
|
|
433 (car x))))
|
|
434 knd-glyph-order)
|
|
435
|
|
436 (defun kannada-compose-syllable-string (string)
|
|
437 (with-temp-buffer
|
|
438 (insert (decompose-string string))
|
|
439 (kannada-compose-syllable-region (point-min) (point-max))
|
|
440 (buffer-string)))
|
|
441
|
|
442 ;; kch
|
|
443 (defun kannada-compose-syllable-region (from to)
|
|
444 "Compose kannada syllable in region FROM to TO."
|
|
445 (let ((glyph-str nil) (cons-num 0) (glyph-str-list nil)
|
|
446 (last-virama nil) (preceding-r nil) (last-modifier nil)
|
|
447 (last-char (char-before to)) match-str pos
|
|
448 glyph-block split-pos (conj nil) (rest nil))
|
|
449 (save-excursion
|
|
450 (save-restriction
|
|
451 ;;; *** char-to-glyph conversion ***
|
|
452 ;; Special rule 1. -- Last virama must be preserved.
|
|
453 (if (eq last-char ?$,1?M(B)
|
|
454 (progn
|
|
455 (setq last-virama t)
|
|
456 (narrow-to-region from (1- to)))
|
|
457 (narrow-to-region from to))
|
|
458 (goto-char (point-min))
|
|
459 ;; Special rule 2. -- preceding "r virama" must be modifier.
|
|
460 (when (looking-at "$,1?0?M(B.")
|
|
461 (setq preceding-r t)
|
|
462 (goto-char (+ 2 (point))))
|
|
463 ;; remove conjunct consonants
|
|
464 (while (re-search-forward knd-char-glyph-regexp nil t)
|
|
465 (setq match-str (match-string 0))
|
|
466 (if (and (string-match kannada-consonant match-str)
|
|
467 (> cons-num 0))
|
|
468 (progn
|
|
469 (setq conj (concat conj (gethash (match-string 0 match-str)
|
|
470 knd-conjunct-glyph-hash)))
|
|
471 (setq match-str (replace-match "" t nil match-str))
|
|
472 (if (string-match "$,1?M(B" rest)
|
|
473 (setq rest (replace-match "" t nil rest)))))
|
|
474 (setq rest (concat rest match-str))
|
|
475 ;; count the number of consonant-glyhs.
|
|
476 (if (string-match kannada-consonant match-str)
|
|
477 (setq cons-num (1+ cons-num))))
|
|
478 ;; translate the rest characters into glyphs
|
|
479 (setq pos 0)
|
|
480 (while (string-match knd-char-glyph-regexp rest pos)
|
|
481 (setq match-str (match-string 0 rest))
|
|
482 (setq pos (match-end 0))
|
|
483 (setq glyph-str
|
|
484 (concat glyph-str (gethash match-str knd-char-glyph-hash))))
|
|
485
|
|
486 (if conj (setq glyph-str (concat glyph-str conj)))
|
|
487 (if last-virama (setq glyph-str (concat glyph-str "$,45)(B"))
|
|
488 (goto-char (point-min))
|
|
489 (if (re-search-forward kannada-consonant-needs-twirl nil t)
|
|
490 (progn
|
|
491 (setq match-str (match-string 0))
|
|
492 (setq glyph-str (concat glyph-str "$,44z(B")))))
|
|
493 ;; preceding-r must be attached
|
|
494 (if preceding-r
|
|
495 (setq glyph-str (concat glyph-str "$,43%(B")))
|
|
496 ;;; *** glyph-to-glyph conversion ***
|
|
497 (when (string-match knd-glyph-glyph-regexp glyph-str)
|
|
498 (setq glyph-str
|
|
499 (replace-match (gethash (match-string 0 glyph-str)
|
|
500 knd-glyph-glyph-hash)
|
|
501 nil t glyph-str)))
|
|
502 ;;; *** glyph reordering ***
|
|
503 (while (setq split-pos (string-match "$,45)(B\\|.$" glyph-str))
|
|
504 (setq glyph-block (substring glyph-str 0 (1+ split-pos)))
|
|
505 (setq glyph-str (substring glyph-str (1+ split-pos)))
|
|
506 (setq
|
|
507 glyph-block
|
|
508 (sort (string-to-list glyph-block)
|
|
509 (function (lambda (x y)
|
|
510 (< (get-char-code-property x 'composition-order)
|
|
511 (get-char-code-property y 'composition-order))))))
|
|
512 (setq glyph-str-list (nconc glyph-str-list glyph-block)))
|
|
513 ;;; *** insert space glyphs for kerning ***
|
|
514 (if (> cons-num 0)
|
|
515 (let ((curr glyph-str-list) (prev nil) (last-bott nil) bott co)
|
|
516 (while curr
|
|
517 (setq co (get-char-code-property
|
|
518 (car curr) 'composition-order)
|
|
519 bott (or (eq co 5) (eq co 6)))
|
|
520 (if (and bott last-bott)
|
|
521 (setcdr prev (cons ?$,44T(B curr)))
|
|
522 (setq last-bott bott prev curr curr (cdr curr)))))
|
|
523 ;; concatenate and attach reference-points.
|
|
524 (setq glyph-str
|
|
525 (cdr
|
|
526 (apply
|
|
527 'nconc
|
|
528 (mapcar
|
|
529 (function (lambda (x)
|
|
530 (list
|
|
531 (or (get-char-code-property x 'reference-point)
|
|
532 '(5 . 3) ;; default reference point.
|
|
533 )
|
|
534 x)))
|
|
535 glyph-str-list))))))
|
|
536 (compose-region from to glyph-str)))
|
|
537
|
|
538 (provide 'knd-util)
|
|
539
|
|
540 ;;; arch-tag: 78d32230-a960-46a5-b622-61ed6ffcf8fc
|
|
541 ;;; knd-util.el ends here
|