Mercurial > emacs
annotate lisp/language/tml-util.el @ 91884:927a69bb381a
(emacs-uptime): Use before-init-time rather than emacs-startup-time.
author | Glenn Morris <rgm@gnu.org> |
---|---|
date | Sat, 16 Feb 2008 22:21:06 +0000 |
parents | 606f2d163a64 |
children |
rev | line source |
---|---|
49702 | 1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*- |
2 | |
79711 | 3 ;; Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007, 2008 |
4 ;; Free Software Foundation, Inc. | |
49702 | 5 |
6 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org> | |
7 ;; Keywords: multilingual, Indian, Tamil | |
8 | |
9 ;; This file is part of GNU Emacs. | |
10 | |
11 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 ;; it under the terms of the GNU General Public License as published by | |
78236
9355f9b7bbff
Switch license to GPLv3 or later.
Glenn Morris <rgm@gnu.org>
parents:
75347
diff
changeset
|
13 ;; the Free Software Foundation; either version 3, or (at your option) |
49702 | 14 ;; any later version. |
15 | |
16 ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 ;; GNU General Public License for more details. | |
20 | |
21 ;; You should have received a copy of the GNU General Public License | |
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
64085 | 23 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
24 ;; Boston, MA 02110-1301, USA. | |
49702 | 25 |
26 ;; Created: Nov. 08. 2002 | |
27 | |
28 ;;; Commentary: | |
29 | |
30 ;; This file provides character(Unicode) to glyph(CDAC) conversion and | |
31 ;; composition of Tamil script characters. | |
32 | |
33 ;;; Code: | |
34 | |
35 ;; Tamil Composable Pattern | |
36 ;; C .. Consonants | |
37 ;; V .. Vowel | |
38 ;; H .. Pulli | |
39 ;; M .. Matra | |
40 ;; V .. Vowel | |
41 ;; A .. Anuswar | |
42 ;; D .. Chandrabindu | |
43 ;; 1. vowel | |
44 ;; V | |
45 ;; 2. syllable : only ligature-formed pattern forms composition. | |
46 ;; (CkHCs|C)(H|M)? | |
47 ;; 3. sri special | |
48 ;; (CsHCrVi) | |
49 | |
50 ;; oririnal | |
51 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? | |
52 | |
53 (defconst tamil-consonant | |
54 "[$,1<5(B-$,1<Y(B]") | |
55 | |
56 (defconst tamil-composable-pattern | |
57 (concat | |
58 "\\([$,1<%(B-$,1<4(B]\\)\\|" | |
59 "[$,1<"<#(B]\\|" ;; vowel modifier considered independent | |
60 "\\(\\(?:\\(?:$,1<5<m<W(B\\)\\|[$,1<5(B-$,1<Y(B]\\)[$,1<m<^(B-$,1<l(B]?\\)\\|" | |
61 "\\($,1<W<m<P<`(B\\)") | |
62 "Regexp matching a composable sequence of Tamil characters.") | |
63 | |
64 ;;;###autoload | |
65 (defun tamil-compose-region (from to) | |
66 (interactive "r") | |
67 (save-excursion | |
68 (save-restriction | |
69 (narrow-to-region from to) | |
70 (goto-char (point-min)) | |
71 (while (re-search-forward tamil-composable-pattern nil t) | |
72 (tamil-compose-syllable-region (match-beginning 0) | |
73 (match-end 0)))))) | |
74 (defun tamil-compose-string (string) | |
75 (with-temp-buffer | |
76 (insert (decompose-string string)) | |
77 (tamil-compose-region (point-min) (point-max)) | |
78 (buffer-string))) | |
79 | |
52519
18d7e5b12285
(tamil-post-read-conversion): Add autoload cookie.
Kenichi Handa <handa@m17n.org>
parents:
52401
diff
changeset
|
80 ;;;###autoload |
49702 | 81 (defun tamil-post-read-conversion (len) |
82 (save-excursion | |
83 (save-restriction | |
84 (let ((buffer-modified-p (buffer-modified-p))) | |
85 (narrow-to-region (point) (+ (point) len)) | |
86 (tamil-compose-region (point-min) (point-max)) | |
87 (set-buffer-modified-p buffer-modified-p) | |
88 (- (point-max) (point-min)))))) | |
89 | |
90 (defun tamil-range (from to) | |
91 "Make the list of the integers of range FROM to TO." | |
92 (let (result) | |
93 (while (<= from to) (setq result (cons to result) to (1- to))) result)) | |
94 | |
95 (defun tamil-regexp-of-hashtbl-keys (hashtbl) | |
96 "Return a regular expression that matches all keys in hashtable HASHTBL." | |
97 (let ((max-specpdl-size 1000)) | |
98 (regexp-opt | |
99 (sort | |
100 (let (dummy) | |
101 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) | |
102 dummy) | |
103 (function (lambda (x y) (> (length x) (length y)))))))) | |
104 | |
105 | |
106 ;; Notes on conversion steps. | |
107 | |
108 ;; 1. chars to glyphs | |
109 ;; Simple replacement of characters to glyphs is done. | |
110 | |
111 ;; 2. glyphs reordering. | |
112 ;; following "$,4)j(B", "$,4)k(B", "$,4)l(B" goes to the front. | |
113 | |
114 ;; 3. glyphs to glyphs | |
115 ;; reordered vowels are ligatured to consonants. | |
116 | |
117 ;; 4. Composition. | |
118 ;; left modifiers will be attached at the left. | |
119 ;; others will be attached right. | |
120 | |
121 (defvar tml-char-glyph | |
122 '(;; various signs | |
89483 | 123 ("$,1<"(B" . "$,4)b(B") ;; not good |
49702 | 124 ("$,1<#(B" . "$,4*G(B") |
125 ;; Independent Vowels | |
126 ("$,1<%(B" . "$,4*<(B") | |
127 ("$,1<&(B" . "$,4*=(B") | |
128 ("$,1<'(B" . "$,4*>(B") | |
129 ("$,1<((B" . "$,4*?(B") | |
130 ("$,1<)(B" . "$,4*@(B") | |
131 ("$,1<*(B" . "$,4*A(B") | |
132 ("$,1<.(B" . "$,4*B(B") | |
133 ("$,1</(B" . "$,4*C(B") | |
134 ("$,1<0(B" . "$,4*D(B") | |
135 ("$,1<2(B" . "$,4*E(B") | |
136 ("$,1<3(B" . "$,4*F(B") | |
137 ("$,1<4(B" . "$,4*E*W(B") | |
138 ;; Consonants | |
139 ("$,1<5<m<W<m(B" . "$,4):(B") ; ks. | |
140 ("$,1<5<m<W(B" . "$,4*^(B") ; ks | |
141 ("$,1<5(B" . "$,4*H(B") | |
142 | |
143 ("$,1<9(B" . "$,4*I(B") | |
144 ("$,1<:(B" . "$,4*J(B") | |
145 ("$,1<<(B" . "$,4*\(B") | |
146 ("$,1<<<m(B" . "$,4)8(B") | |
147 ("$,1<>(B" . "$,4*K(B") | |
148 ("$,1<?(B" . "$,4*L(B") | |
149 ("$,1<C(B" . "$,4*M(B") | |
150 ("$,1<D(B" . "$,4*N(B") | |
151 ("$,1<H(B" . "$,4*O(B") | |
152 ("$,1<I(B" . "$,4*Y(B") | |
153 ("$,1<I<m(B" . "$,4)a(B") | |
154 ("$,1<J(B" . "$,4*P(B") | |
155 ("$,1<N(B" . "$,4*Q(B") | |
156 ("$,1<O(B" . "$,4*R(B") | |
157 ("$,1<P(B" . "$,4*S(B") | |
158 ("$,1<Q(B" . "$,4*X(B") | |
159 ("$,1<R(B" . "$,4*T(B") | |
160 ("$,1<S(B" . "$,4*W(B") | |
161 ("$,1<T(B" . "$,4*V(B") | |
162 ("$,1<U(B" . "$,4*U(B") | |
163 ("$,1<W(B" . "$,4*[(B") | |
164 ("$,1<W<m(B" . "$,4)7(B") | |
165 ("$,1<W<m<P<`(B" . "$,4*_(B") | |
166 ("$,1<X(B" . "$,4*Z(B") | |
167 ("$,1<X<m(B" . "$,4)6(B") | |
168 ("$,1<Y(B" . "$,4*](B") | |
169 ("$,1<Y<m(B" . "$,4)9(B") | |
170 | |
171 ;; Dependent vowel signs | |
172 ("$,1<^(B" . "$,4)c(B") | |
173 ("$,1<_(B" . "$,4)d(B") | |
174 ("$,1<`(B" . "$,4)f(B") | |
175 ("$,1<a(B" . "$,4)g(B") | |
176 ("$,1<b(B" . "$,4)h(B") | |
177 ("$,1<f(B" . "$,4)j(B") | |
178 ("$,1<g(B" . "$,4)k(B") | |
179 ("$,1<h(B" . "$,4)l(B") | |
180 ("$,1<j(B" . "$,4)j)c(B") | |
181 ("$,1<k(B" . "$,4)k)c(B") | |
182 ("$,1<l(B" . "$,4)j*W(B") | |
183 | |
184 ;; Various signs | |
185 ("$,1<m(B" . "$,4)b(B") | |
186 ("$,1<w(B" . "nil") ;; not supported? | |
187 )) | |
188 | |
189 (defvar tml-char-glyph-hash | |
190 (let* ((hash (make-hash-table :test 'equal))) | |
191 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
192 tml-char-glyph) | |
193 hash)) | |
194 | |
195 (defvar tml-char-glyph-regexp | |
196 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash)) | |
197 | |
198 ;; Tamil languages needed to be reordered. | |
199 | |
200 (defvar tml-consonants-regexp | |
201 "[$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*](B]") | |
202 | |
203 (defvar tml-glyph-reorder-key-glyphs "[$,4)j)k)l(B]") | |
204 | |
205 (defvar tml-glyph-reordering-regexp-list | |
206 (cons | |
207 (concat "\\(" tml-consonants-regexp "\\)\\([$,4)j)k)l(B]\\)") "\\2\\1")) | |
208 | |
209 ;; Tamil vowel modifiers to be ligatured. | |
210 (defvar tml-glyph-glyph | |
211 '( | |
212 ("$,4*H)d(B" . "$,4(a(B") ; ki | |
213 ("$,4*^)d(B" . "$,4(v(B") ; ksi | |
214 ("$,4*^)f(B" . "$,4)2(B") ; ksi~ | |
215 ("$,4*I)d(B" . "$,4(b(B") ; n^i | |
216 ("$,4*J)d(B" . "$,4(c(B") ; ci | |
217 ("$,4*K)d(B" . "$,4(d(B") ; n~i | |
218 ("$,4*L)d(B" . "$,4)n(B") ; t.i | |
219 ("$,4*M)d(B" . "$,4(e(B") ; n.i | |
220 ("$,4*N)d(B" . "$,4(f(B") ; ti | |
221 ("$,4*O)d(B" . "$,4(g(B") ; ni | |
222 ("$,4*P)d(B" . "$,4(h(B") ; pi | |
223 ("$,4*Q)d(B" . "$,4(i(B") ; mi | |
224 ("$,4*R)d(B" . "$,4(j(B") ; yi | |
225 ("$,4*S)d(B" . "$,4(k(B") ; ri | |
226 ("$,4*T)d(B" . "$,4(l(B") ; li | |
227 ("$,4*U)d(B" . "$,4(m(B") ; vi | |
228 ("$,4*V)d(B" . "$,4(n(B") ; l_i | |
229 ("$,4*W)d(B" . "$,4(o(B") ; l.i | |
230 ("$,4*X)d(B" . "$,4(p(B") ; r_i | |
231 ("$,4*Y)d(B" . "$,4(q(B") ; n_i | |
232 ("$,4*Z)d(B" . "$,4(r(B") ; si | |
233 ("$,4*[)d(B" . "$,4(s(B") ; s'i | |
234 ("$,4*\)d(B" . "$,4(t(B") ; ji | |
235 ("$,4*])d(B" . "$,4(u(B") ; hi | |
236 | |
237 ("$,4*H)f(B" . "$,4(w(B") ; ki~ | |
238 ("$,4*I)f(B" . "$,4(x(B") ; n^i~ | |
239 ("$,4*J)f(B" . "$,4(y(B") ; ci~ | |
240 ("$,4*K)f(B" . "$,4(z(B") ; n~i~ | |
241 ("$,4*L)f(B" . "$,4)o(B") ; t.i~ | |
242 ("$,4*M)f(B" . "$,4)!(B") ; n.i~ | |
243 ("$,4*N)f(B" . "$,4)"(B") ; ti~ | |
244 ("$,4*O)f(B" . "$,4)#(B") ; ni~ | |
245 ("$,4*P)f(B" . "$,4)$(B") ; pi~ | |
246 ("$,4*Q)f(B" . "$,4)%(B") ; mi~ | |
247 ("$,4*R)f(B" . "$,4)&(B") ; yi~ | |
248 ("$,4*S)f(B" . "$,4)'(B") ; ri~ | |
249 ("$,4*T)f(B" . "$,4)((B") ; li~ | |
250 ("$,4*U)f(B" . "$,4))(B") ; vi~ | |
251 ("$,4*V)f(B" . "$,4)*(B") ; l_i~ | |
252 ("$,4*W)f(B" . "$,4)+(B") ; l.i~ | |
253 ("$,4*X)f(B" . "$,4),(B") ; r_i~ | |
254 ("$,4*Y)f(B" . "$,4)-(B") ; n_i~ | |
255 ("$,4*Z)f(B" . "$,4).(B") ; si~ | |
256 ("$,4*[)f(B" . "$,4)/(B") ; s'i~ | |
257 ("$,4*\)f(B" . "$,4)0(B") ; ji~ | |
258 ("$,4*])f(B" . "$,4)1(B") ; hi~ | |
259 | |
260 ("$,4*H)g(B" . "$,4)p(B") ; ku | |
261 ("$,4*I)g(B" . "$,4)q(B") ; n^u | |
262 ("$,4*J)g(B" . "$,4)r(B") ; cu | |
263 ("$,4*K)g(B" . "$,4)s(B") ; n~u | |
264 ("$,4*L)g(B" . "$,4)t(B") ; t.u | |
265 ("$,4*M)g(B" . "$,4)u(B") ; n.u | |
266 ("$,4*N)g(B" . "$,4)v(B") ; tu | |
267 ("$,4*O)g(B" . "$,4)x(B") ; nu | |
268 ("$,4*P)g(B" . "$,4)y(B") ; pu | |
269 ("$,4*Q)g(B" . "$,4)z(B") ; mu | |
270 ("$,4*R)g(B" . "$,4){(B") ; yu | |
271 ("$,4*S)g(B" . "$,4)|(B") ; ru | |
272 ("$,4*T)g(B" . "$,4)}(B") ; lu | |
273 ("$,4*U)g(B" . "$,4)~(B") ; vu | |
274 ("$,4*V)g(B" . "$,4)(B") ; l_u | |
275 ("$,4*W)g(B" . "$,4* (B") ; l.u | |
276 ("$,4*X)g(B" . "$,4*!(B") ; r_u | |
277 ("$,4*Y)g(B" . "$,4*"(B") ; n_u | |
278 | |
279 ("$,4*H)h(B" . "$,4*#(B") ; ku~ | |
280 ("$,4*I)h(B" . "$,4*$(B") ; n^u~ | |
281 ("$,4*J)h(B" . "$,4*%(B") ; cu~ | |
282 ("$,4*K)h(B" . "$,4*&(B") ; n~u~ | |
283 ("$,4*L)h(B" . "$,4*'(B") ; t.u~ | |
284 ("$,4*M)h(B" . "$,4*((B") ; n.u~ | |
285 ("$,4*N)h(B" . "$,4*)(B") ; tu~ | |
286 ("$,4*O)h(B" . "$,4*+(B") ; nu~ | |
287 ("$,4*P)h(B" . "$,4*,(B") ; pu~ | |
288 ("$,4*Q)h(B" . "$,4*-(B") ; mu~ | |
289 ("$,4*R)h(B" . "$,4*.(B") ; yu~ | |
290 ("$,4*S)h(B" . "$,4*/(B") ; ru~ | |
291 ("$,4*T)h(B" . "$,4*6(B") ; lu~ | |
292 ("$,4*U)h(B" . "$,4*7(B") ; vu~ | |
293 ("$,4*V)h(B" . "$,4*8(B") ; l_u~ | |
294 ("$,4*W)h(B" . "$,4*9(B") ; l.u~ | |
295 ("$,4*X)h(B" . "$,4*:(B") ; r_u~ | |
296 ("$,4*Y)h(B" . "$,4*;(B") ; n_u~ | |
297 )) | |
298 | |
299 (defvar tml-glyph-glyph-hash | |
300 (let* ((hash (make-hash-table :test 'equal))) | |
301 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
302 tml-glyph-glyph) | |
303 hash)) | |
304 | |
305 (defvar tml-glyph-glyph-regexp | |
306 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash)) | |
307 | |
308 (defun tamil-compose-syllable-string (string) | |
309 (with-temp-buffer | |
310 (insert (decompose-string string)) | |
311 (tamil-compose-syllable-region (point-min) (point-max)) | |
312 (buffer-string))) | |
313 | |
314 (defun tamil-compose-syllable-region (from to) | |
315 "Compose tamil syllable in region FROM to TO." | |
316 (let (glyph-str match-str glyph-reorder-regexps) | |
317 (save-excursion | |
318 (save-restriction | |
319 (narrow-to-region from to) | |
320 (goto-char (point-min)) | |
321 ;; char-glyph-conversion | |
89483 | 322 (while (not (eobp)) |
323 (if (looking-at tml-char-glyph-regexp) | |
324 (progn | |
325 (setq match-str (match-string 0) | |
326 glyph-str | |
327 (concat glyph-str | |
328 (gethash match-str tml-char-glyph-hash))) | |
329 (goto-char (match-end 0))) | |
330 (setq glyph-str (concat glyph-str (string (following-char)))) | |
331 (forward-char 1))) | |
332 (or glyph-str | |
333 (aset glyph-str 0 (following-char))) | |
49702 | 334 ;; glyph reordering |
335 (when (string-match tml-glyph-reorder-key-glyphs glyph-str) | |
336 (if (string-match (car tml-glyph-reordering-regexp-list) | |
337 glyph-str) | |
338 (setq glyph-str | |
339 (replace-match (cdr tml-glyph-reordering-regexp-list) | |
340 nil nil glyph-str)))) | |
341 ;; glyph-glyph-conversion | |
342 (when (string-match tml-glyph-glyph-regexp glyph-str) | |
343 (setq match-str (match-string 0 glyph-str)) | |
344 (setq glyph-str | |
345 (replace-match (gethash match-str tml-glyph-glyph-hash) | |
346 nil nil glyph-str))) | |
347 ;; concatenate and attach reference-points. | |
348 (setq glyph-str | |
349 (cdr | |
350 (apply | |
351 'nconc | |
352 (mapcar | |
353 (function | |
354 (lambda (x) (list '(5 . 3) x))) ;; default ref. point. | |
355 glyph-str)))) | |
356 (compose-region from to glyph-str))))) | |
357 | |
89483 | 358 ;;;###autoload |
359 (defun tamil-composition-function (pos &optional string) | |
360 "Compose Tamil characters after the position POS. | |
361 If STRING is not nil, it is a string, and POS is an index to the string. | |
362 In this case, compose characters after POS of the string." | |
363 (if string | |
91199
06f583f75d55
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
91154
diff
changeset
|
364 (if auto-compose-current-font |
06f583f75d55
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
91154
diff
changeset
|
365 (if (eq (string-match "[$,1< (B-$,1=?(B]+" pos) pos) |
91154
e9a946a24fb0
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
90996
diff
changeset
|
366 (or (font-shape-text 0 (match-end 0) auto-compose-current-font |
e9a946a24fb0
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
90996
diff
changeset
|
367 string) |
e9a946a24fb0
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
90996
diff
changeset
|
368 pos))) |
89483 | 369 (goto-char pos) |
91199
06f583f75d55
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
91154
diff
changeset
|
370 (if auto-compose-current-font |
06f583f75d55
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
91154
diff
changeset
|
371 (if (looking-at "[$,1< (B-$,1=?(B]+") |
91154
e9a946a24fb0
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
90996
diff
changeset
|
372 (or (font-shape-text pos (match-end 0) auto-compose-current-font)) |
91199
06f583f75d55
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
91154
diff
changeset
|
373 (if (looking-at tamil-composable-pattern) |
06f583f75d55
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
91154
diff
changeset
|
374 (prog1 (match-end 0) |
06f583f75d55
(tamil-composition-function): Use
Kenichi Handa <handa@m17n.org>
parents:
91154
diff
changeset
|
375 (tamil-compose-syllable-region pos (match-end 0)))))))) |
89483 | 376 |
49702 | 377 (provide 'tml-util) |
378 | |
52401 | 379 ;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e |
49702 | 380 ;;; tml-util.el ends here |