Mercurial > emacs
annotate lisp/language/tibet-util.el @ 62412:6ac7ed8e212b
(makefile-dependency-regex): Turn it into a var, and refine it to mask one more level of nested vars.
(makefile-rule-action-regex): Turn it into a var, and refine it so it recognizes backslashed continuation lines as belonging to the same command.
(makefile-macroassign-regex): Refine it so it recognizes backslashed continuation lines as belonging to the same command.
(makefile-var-use-regex): Don't look at the next char, because it might be the same one to be skipped by the initial [^$], leading to an overlooked variable use.
(makefile-make-font-lock-keywords): Remove two parameters, which are now variables that some of the modes set locally. Handle dependency and rule action matching through functions, because regexps alone match too often. Dependency matching now comes last, so it can check, whether a colon already matched something else.
(makefile-mode): Inform that font-lock improves makefile parsing capabilities.
(makefile-match-dependency, makefile-match-action): New functions.
author | Daniel Pfeiffer <occitan@esperanto.org> |
---|---|
date | Mon, 16 May 2005 20:13:09 +0000 |
parents | 197607499a29 |
children | 18a818a2ee7c f042e7c0fe20 |
rev | line source |
---|---|
36685 | 1 ;;; tibet-util.el --- utilities for Tibetan -*- coding: iso-2022-7bit; -*- |
17301 | 2 |
62396 | 3 ;; Copyright (C) 1997, 2002 Free Software Foundation, Inc. |
4 ;; Copyright (C) 1995, 1997, 1998, 2000 | |
5 ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
6 ;; Registration Number H14PRO021 | |
17301 | 7 |
8 ;; Keywords: multilingual, Tibetan | |
9 | |
10 ;; This file is part of GNU Emacs. | |
11 | |
12 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 ;; it under the terms of the GNU General Public License as published by | |
14 ;; the Free Software Foundation; either version 2, or (at your option) | |
15 ;; any later version. | |
16 | |
17 ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ;; GNU General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
25 ;; Boston, MA 02111-1307, USA. | |
26 | |
27 ;; Author: Toru TOMABECHI, <Toru.Tomabechi@orient.unil.ch> | |
28 | |
29 ;; Created: Feb. 17. 1997 | |
30 | |
38436
b174db545cfd
Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents:
36685
diff
changeset
|
31 ;;; History: |
17301 | 32 ;; 1997.03.13 Modification in treatment of text properties; |
33 ;; Support for some special signs and punctuations. | |
26896 | 34 ;; 1999.10.25 Modification for a new composition way by K.Handa. |
17301 | 35 |
38436
b174db545cfd
Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents:
36685
diff
changeset
|
36 ;;; Commentary: |
b174db545cfd
Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents:
36685
diff
changeset
|
37 |
17301 | 38 ;;; Code: |
39 | |
45032
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
40 (defconst tibetan-obsolete-glyphs |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
41 `(("$(7!=(B" . "$(8!=(B") ; 2 col <-> 1 col |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
42 ("$(7!?(B" . "$(8!?(B") |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
43 ("$(7!@(B" . "$(8!@(B") |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
44 ("$(7!A(B" . "$(8!A(B") |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
45 ("$(7"`(B" . "$(8"`(B") |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
46 ("$(7!;(B" . "$(8!;(B") |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
47 ("$(7!D(B" . "$(8!D(B") |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
48 ;; Yes these are dirty. But ... |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
49 ("$(7!>(B $(7!>(B" . ,(compose-string "$(7!>(B $(7!>(B" 0 3 [?$(7!>(B (Br . Bl) ? (Br . Bl) ?$(7!>(B])) |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
50 ("$(7!4!5!5(B" . ,(compose-string |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
51 "$(7#R#S#S#S(B" 0 4 |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
52 [?$(7#R(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B])) |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
53 ("$(7!4!5(B" . ,(compose-string "$(7#R#S#S(B" 0 3 [?$(7#R(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B])) |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
54 ("$(7!6(B" . ,(compose-string "$(7#R#S!I(B" 0 3 [?$(7#R(B (Br . Bl) ?$(7#S(B (br . tr) ?$(7!I(B])) |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
55 ("$(7!4(B" . ,(compose-string "$(7#R#S(B" 0 2 [?$(7#R(B (Br . Bl) ?$(7#S(B])))) |
29a7076e3736
(tibetan-obsolete-glyphs): From tibetan.el.
Richard M. Stallman <rms@gnu.org>
parents:
42839
diff
changeset
|
56 |
17993
73869115ae0a
Most of setup-LANGUAGE-environment functions are
Kenichi Handa <handa@m17n.org>
parents:
17776
diff
changeset
|
57 ;;;###autoload |
26896 | 58 (defun tibetan-char-p (ch) |
59 "Check if char CH is Tibetan character. | |
60 Returns non-nil if CH is Tibetan. Otherwise, returns nil." | |
61 (memq (char-charset ch) '(tibetan tibetan-1-column))) | |
62 | |
63 ;;; Functions for Tibetan <-> Tibetan-transcription. | |
17301 | 64 |
65 ;;;###autoload | |
26896 | 66 (defun tibetan-tibetan-to-transcription (str) |
67 "Transcribe Tibetan string STR and return the corresponding Roman string." | |
68 (let (;; Accumulate transcriptions here in reverse order. | |
69 (trans nil) | |
70 (len (length str)) | |
71 (i 0) | |
72 ch this-trans) | |
73 (while (< i len) | |
29828 | 74 (let ((idx (string-match tibetan-precomposition-rule-regexp str i))) |
26896 | 75 (if (eq idx i) |
76 ;; Ith character and the followings matches precomposable | |
77 ;; Tibetan sequence. | |
78 (setq i (match-end 0) | |
79 this-trans | |
80 (car (rassoc | |
81 (cdr (assoc (match-string 0 str) | |
82 tibetan-precomposition-rule-alist)) | |
83 tibetan-precomposed-transcription-alist))) | |
84 (setq ch (substring str i (1+ i)) | |
85 i (1+ i) | |
86 this-trans | |
87 (car (or (rassoc ch tibetan-consonant-transcription-alist) | |
88 (rassoc ch tibetan-vowel-transcription-alist) | |
89 (rassoc ch tibetan-subjoined-transcription-alist))))) | |
90 (setq trans (cons this-trans trans)))) | |
91 (apply 'concat (nreverse trans)))) | |
17301 | 92 |
93 ;;;###autoload | |
26896 | 94 (defun tibetan-transcription-to-tibetan (str) |
95 "Convert Tibetan Roman string STR to Tibetan character string. | |
96 The returned string has no composition information." | |
97 (let (;; Case is significant. | |
17301 | 98 (case-fold-search nil) |
26896 | 99 (idx 0) |
100 ;; Accumulate Tibetan strings here in reverse order. | |
101 (t-str-list nil) | |
102 i subtrans) | |
103 (while (setq i (string-match tibetan-regexp str idx)) | |
104 (if (< idx i) | |
105 ;; STR contains a pattern that doesn't match Tibetan | |
106 ;; transcription. Include the pattern as is. | |
107 (setq t-str-list (cons (substring str idx i) t-str-list))) | |
108 (setq subtrans (match-string 0 str) | |
109 idx (match-end 0)) | |
110 (let ((t-char (cdr (assoc subtrans | |
111 tibetan-precomposed-transcription-alist)))) | |
112 (if t-char | |
113 ;; SUBTRANS corresponds to a transcription for | |
114 ;; precomposable Tibetan sequence. | |
115 (setq t-char (car (rassoc t-char | |
116 tibetan-precomposition-rule-alist))) | |
117 (setq t-char | |
118 (cdr | |
119 (or (assoc subtrans tibetan-consonant-transcription-alist) | |
120 (assoc subtrans tibetan-vowel-transcription-alist) | |
121 (assoc subtrans tibetan-modifier-transcription-alist) | |
122 (assoc subtrans tibetan-subjoined-transcription-alist))))) | |
123 (setq t-str-list (cons t-char t-str-list)))) | |
124 (if (< idx (length str)) | |
125 (setq t-str-list (cons (substring str idx) t-str-list))) | |
126 (apply 'concat (nreverse t-str-list)))) | |
17301 | 127 |
128 ;;; | |
26896 | 129 ;;; Functions for composing/decomposing Tibetan sequence. |
17301 | 130 ;;; |
131 ;;; A Tibetan syllable is typically structured as follows: | |
132 ;;; | |
133 ;;; [Prefix] C [C+] V [M] [Suffix [Post suffix]] | |
134 ;;; | |
135 ;;; where C's are all vertically stacked, V appears below or above | |
136 ;;; consonant cluster and M is always put above the C[C+]V combination. | |
137 ;;; (Sanskrit visarga, though it is a vowel modifier, is considered | |
138 ;;; to be a punctuation.) | |
139 ;;; | |
29828 | 140 ;;; Here are examples of the words "bsgrubs" and "hfauM" |
17301 | 141 ;;; |
29828 | 142 ;;; 4$(7"70"714%qx!"U0"G###C"U14"70"714"G0"G1(B 4$(7"Hx!"Rx!"Ur'"_0"H"R"U"_1(B |
17301 | 143 ;;; |
144 ;;; M | |
145 ;;; b s b s h | |
29828 | 146 ;;; g fa |
17301 | 147 ;;; r u |
148 ;;; u | |
149 ;;; | |
26896 | 150 ;;; Consonants `'' ($(7"A(B), `w' ($(7">(B), `y' ($(7"B(B), `r' ($(7"C(B) take special |
151 ;;; forms when they are used as subjoined consonant. Consonant `r' | |
152 ;;; takes another special form when used as superjoined in such a case | |
153 ;;; as "rka", while it does not change its form when conjoined with | |
154 ;;; subjoined `'', `w' or `y' as in "rwa", "rya". | |
155 | |
156 ;; Append a proper composition rule and glyph to COMPONENTS to compose | |
157 ;; CHAR with a composition that has COMPONENTS. | |
17301 | 158 |
26896 | 159 (defun tibetan-add-components (components char) |
160 (let ((last (last components)) | |
161 (stack-upper '(tc . bc)) | |
162 (stack-under '(bc . tc)) | |
29596
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
163 rule comp-vowel tmp) |
26896 | 164 ;; Special treatment for 'a chung. |
165 ;; If 'a follows a consonant, turn it into the subjoined form. | |
29596
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
166 ;; * Disabled by Tomabechi 2000/06/09 * |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
167 ;; Because in Unicode, $(7"A(B may follow directly a consonant without |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
45174
diff
changeset
|
168 ;; any intervening vowel, as in 4$(7"90"914""0"""Q14"A0"A1!;(B=4$(7"90"91(B 4$(7""0""1(B 4$(7"A0"A1(B not 4$(7"90"91(B 4$(7""0""1(B $(7"Q(B 4$(7"A0"A1(B |
29596
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
169 ;;(if (and (= char ?$(7"A(B) |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
170 ;; (aref (char-category-set (car last)) ?0)) |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
171 ;; (setq char ?$(7"R(B)) ;; modified for new font by Tomabechi 1999/12/10 |
17301 | 172 |
29596
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
173 ;; Composite vowel signs are decomposed before being added |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
174 ;; Added by Tomabechi 2000/06/08 |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
175 (if (memq char '(?$(7"T(B ?$(7"V(B ?$(7"W(B ?$(7"X(B ?$(7"Y(B ?$(7"Z(B ?$(7"b(B)) |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
176 (setq comp-vowel |
29828 | 177 (copy-sequence |
178 (cddr (assoc (char-to-string char) | |
179 tibetan-composite-vowel-alist))) | |
29596
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
180 char |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
181 (cadr (assoc (char-to-string char) |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
182 tibetan-composite-vowel-alist)))) |
26896 | 183 (cond |
184 ;; Compose upper vowel sign vertically over. | |
185 ((aref (char-category-set char) ?2) | |
186 (setq rule stack-upper)) | |
17301 | 187 |
26896 | 188 ;; Compose lower vowel sign vertically under. |
189 ((aref (char-category-set char) ?3) | |
29363
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
190 (if (eq char ?$(7"Q(B) ;; `$(7"Q(B' should not visible when composed. |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
191 (setq rule nil) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
192 (setq rule stack-under))) |
26896 | 193 ;; Transform ra-mgo (superscribed r) if followed by a subjoined |
194 ;; consonant other than w, ', y, r. | |
195 ((and (= (car last) ?$(7"C(B) | |
29363
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
196 (not (memq char '(?$(7#>(B ?$(7"R(B ?$(7#B(B ?$(7#C(B)))) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
197 (setcar last ?$(7!"(B) ;; modified for newfont by Tomabechi 1999/12/10 |
26896 | 198 (setq rule stack-under)) |
199 ;; Transform initial base consonant if followed by a subjoined | |
200 ;; consonant but 'a. | |
201 (t | |
202 (let ((laststr (char-to-string (car last)))) | |
29363
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
203 (if (and (/= char ?$(7"R(B) ;; modified for new font by Tomabechi |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
204 (string-match "[$(7"!(B-$(7"="?"@"D(B-$(7"J"K(B]" laststr)) |
26896 | 205 (setcar last (string-to-char |
206 (cdr (assoc (char-to-string (car last)) | |
207 tibetan-base-to-subjoined-alist))))) | |
208 (setq rule stack-under)))) | |
17301 | 209 |
29363
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
210 (if rule |
29596
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
211 (setcdr last (list rule char))) |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
212 ;; Added by Tomabechi 2000/06/08 |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
213 (if comp-vowel |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
214 (nconc last comp-vowel)) |
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
215 )) |
17301 | 216 |
217 ;;;###autoload | |
218 (defun tibetan-compose-string (str) | |
26896 | 219 "Compose Tibetan string STR." |
220 (let ((idx 0)) | |
221 ;; `$(7"A(B' is included in the pattern for subjoined consonants | |
222 ;; because we treat it specially in tibetan-add-components. | |
29596
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
223 ;; (This feature is removed by Tomabechi 2000/06/08) |
26896 | 224 (while (setq idx (string-match tibetan-composable-pattern str idx)) |
225 (let ((from idx) | |
226 (to (match-end 0)) | |
227 components) | |
228 (if (eq (string-match tibetan-precomposition-rule-regexp str idx) idx) | |
229 (setq idx (match-end 0) | |
230 components | |
231 (list (string-to-char | |
232 (cdr | |
233 (assoc (match-string 0 str) | |
234 tibetan-precomposition-rule-alist))))) | |
235 (setq components (list (aref str idx)) | |
236 idx (1+ idx))) | |
237 (while (< idx to) | |
238 (tibetan-add-components components (aref str idx)) | |
239 (setq idx (1+ idx))) | |
240 (compose-string str from to components)))) | |
241 str) | |
17301 | 242 |
19553
e63ba5228950
(tibetan-composition): Add autoload cookies.
Kenichi Handa <handa@m17n.org>
parents:
19366
diff
changeset
|
243 ;;;###autoload |
26896 | 244 (defun tibetan-compose-region (beg end) |
245 "Compose Tibetan text the region BEG and END." | |
17301 | 246 (interactive "r") |
26896 | 247 (let (str result chars) |
17301 | 248 (save-excursion |
249 (save-restriction | |
250 (narrow-to-region beg end) | |
251 (goto-char (point-min)) | |
26896 | 252 ;; `$(7"A(B' is included in the pattern for subjoined consonants |
253 ;; because we treat it specially in tibetan-add-components. | |
29596
c3845ffcb423
Convert all tibetan-1-column characters
Kenichi Handa <handa@m17n.org>
parents:
29363
diff
changeset
|
254 ;; (This feature is removed by Tomabechi 2000/06/08) |
26896 | 255 (while (re-search-forward tibetan-composable-pattern nil t) |
256 (let ((from (match-beginning 0)) | |
257 (to (match-end 0)) | |
258 components) | |
259 (goto-char from) | |
260 (if (looking-at tibetan-precomposition-rule-regexp) | |
261 (progn | |
262 (setq components | |
263 (list (string-to-char | |
264 (cdr | |
265 (assoc (match-string 0) | |
266 tibetan-precomposition-rule-alist))))) | |
267 (goto-char (match-end 0))) | |
268 (setq components (list (char-after from))) | |
269 (forward-char 1)) | |
270 (while (< (point) to) | |
271 (tibetan-add-components components (following-char)) | |
272 (forward-char 1)) | |
273 (compose-region from to components))))))) | |
17301 | 274 |
29363
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
275 (defvar tibetan-decompose-precomposition-alist |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
276 (mapcar (function (lambda (x) (cons (string-to-char (cdr x)) (car x)))) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
277 tibetan-precomposition-rule-alist)) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
278 |
17301 | 279 ;;;###autoload |
29363
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
280 (defun tibetan-decompose-region (from to) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
281 "Decompose Tibetan text in the region FROM and TO. |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
282 This is different from decompose-region because precomposed Tibetan characters |
42839 | 283 are decomposed into normal Tibetan character sequences." |
29363
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
284 (interactive "r") |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
285 (save-restriction |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
286 (narrow-to-region from to) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
287 (decompose-region from to) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
288 (goto-char from) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
289 (while (not (eobp)) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
290 (let* ((char (following-char)) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
291 (slot (assq char tibetan-decompose-precomposition-alist))) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
292 (if slot |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
293 (progn |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
294 (delete-char 1) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
295 (insert (cdr slot))) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
296 (forward-char 1)))))) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
297 |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
298 |
26896 | 299 ;;;###autoload |
29363
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
300 (defun tibetan-decompose-string (str) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
301 "Decompose Tibetan string STR. |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
302 This is different from decompose-string because precomposed Tibetan characters |
42839 | 303 are decomposed into normal Tibetan character sequences." |
29363
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
304 (let ((new "") |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
305 (len (length str)) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
306 (idx 0) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
307 char slot) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
308 (while (< idx len) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
309 (setq char (aref str idx) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
310 slot (assq (aref str idx) tibetan-decompose-precomposition-alist) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
311 new (concat new (if slot (cdr slot) (char-to-string char))) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
312 idx (1+ idx))) |
1ebd8db9c3dc
(tibetan-add-components): Fixes for new
Kenichi Handa <handa@m17n.org>
parents:
28906
diff
changeset
|
313 new)) |
26896 | 314 |
315 ;;;###autoload | |
316 (defun tibetan-composition-function (from to pattern &optional string) | |
317 (if string | |
318 (tibetan-compose-string string) | |
319 (tibetan-compose-region from to)) | |
320 (- to from)) | |
17301 | 321 |
322 ;;; | |
323 ;;; This variable is used to avoid repeated decomposition. | |
324 ;;; | |
325 (setq-default tibetan-decomposed nil) | |
326 | |
327 ;;;###autoload | |
328 (defun tibetan-decompose-buffer () | |
329 "Decomposes Tibetan characters in the buffer into their components. | |
26896 | 330 See also the documentation of the function `tibetan-decompose-region'." |
17301 | 331 (interactive) |
332 (make-local-variable 'tibetan-decomposed) | |
333 (cond ((not tibetan-decomposed) | |
334 (tibetan-decompose-region (point-min) (point-max)) | |
335 (setq tibetan-decomposed t)))) | |
336 | |
337 ;;;###autoload | |
338 (defun tibetan-compose-buffer () | |
339 "Composes Tibetan character components in the buffer. | |
340 See also docstring of the function tibetan-compose-region." | |
341 (interactive) | |
342 (make-local-variable 'tibetan-decomposed) | |
343 (tibetan-compose-region (point-min) (point-max)) | |
344 (setq tibetan-decomposed nil)) | |
345 | |
346 ;;;###autoload | |
347 (defun tibetan-post-read-conversion (len) | |
348 (save-excursion | |
349 (save-restriction | |
350 (let ((buffer-modified-p (buffer-modified-p))) | |
351 (narrow-to-region (point) (+ (point) len)) | |
352 (tibetan-compose-region (point-min) (point-max)) | |
353 (set-buffer-modified-p buffer-modified-p) | |
20107
4595a463b105
(tibetan-post-read-conversion): Return
Kenichi Handa <handa@m17n.org>
parents:
19553
diff
changeset
|
354 (make-local-variable 'tibetan-decomposed) |
4595a463b105
(tibetan-post-read-conversion): Return
Kenichi Handa <handa@m17n.org>
parents:
19553
diff
changeset
|
355 (setq tibetan-decomposed nil) |
4595a463b105
(tibetan-post-read-conversion): Return
Kenichi Handa <handa@m17n.org>
parents:
19553
diff
changeset
|
356 (- (point-max) (point-min)))))) |
17301 | 357 |
358 | |
359 ;;;###autoload | |
360 (defun tibetan-pre-write-conversion (from to) | |
361 (setq tibetan-decomposed-temp tibetan-decomposed) | |
23522
0f86fe9632e0
(tibetan-pre-write-conversion): Use with-temp-buffer.
Kenichi Handa <handa@m17n.org>
parents:
22987
diff
changeset
|
362 (let ((old-buf (current-buffer))) |
23545
0d25c6f765ab
(tibetan-pre-write-conversion): Cancel previous
Kenichi Handa <handa@m17n.org>
parents:
23522
diff
changeset
|
363 (set-buffer (generate-new-buffer " *temp*")) |
0d25c6f765ab
(tibetan-pre-write-conversion): Cancel previous
Kenichi Handa <handa@m17n.org>
parents:
23522
diff
changeset
|
364 (if (stringp from) |
0d25c6f765ab
(tibetan-pre-write-conversion): Cancel previous
Kenichi Handa <handa@m17n.org>
parents:
23522
diff
changeset
|
365 (insert from) |
0d25c6f765ab
(tibetan-pre-write-conversion): Cancel previous
Kenichi Handa <handa@m17n.org>
parents:
23522
diff
changeset
|
366 (insert-buffer-substring old-buf from to)) |
0d25c6f765ab
(tibetan-pre-write-conversion): Cancel previous
Kenichi Handa <handa@m17n.org>
parents:
23522
diff
changeset
|
367 (if (not tibetan-decomposed-temp) |
0d25c6f765ab
(tibetan-pre-write-conversion): Cancel previous
Kenichi Handa <handa@m17n.org>
parents:
23522
diff
changeset
|
368 (tibetan-decompose-region (point-min) (point-max))) |
17776
ece62fdeeebb
(tibetan-pre-write-conversion): Make it work
Kenichi Handa <handa@m17n.org>
parents:
17315
diff
changeset
|
369 ;; Should return nil as annotations. |
ece62fdeeebb
(tibetan-pre-write-conversion): Make it work
Kenichi Handa <handa@m17n.org>
parents:
17315
diff
changeset
|
370 nil)) |
17301 | 371 |
45174
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
372 |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
373 ;;; |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
374 ;;; Unicode-related definitions. |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
45174
diff
changeset
|
375 ;;; |
45174
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
376 |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
377 (defvar tibetan-canonicalize-for-unicode-alist |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
378 '(("$(7"Q(B" . "") ;; remove vowel a |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
379 ("$(7"T(B" . "$(7"R"S(B") ;; decompose vowels whose use is ``discouraged'' in Unicode 3.0 |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
380 ("$(7"V(B" . "$(7"R"U(B") |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
381 ("$(7"W(B" . "$(7#C"a(B") |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
382 ("$(7"X(B" . "$(7#C"R"a(B") |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
383 ("$(7"Y(B" . "$(7#D"a(B") |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
384 ("$(7"Z(B" . "$(7#D"R"a(B") |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
385 ("$(7"b(B" . "$(7"R"a(B")) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
386 "Rules for canonicalizing Tibetan vowels for Unicode.") |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
387 |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
388 (defvar tibetan-canonicalize-for-unicode-regexp |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
389 "[$(7"Q"T"V"W"X"Y"Z"b(B]" |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
390 "Regexp for Tibetan vowels to be canonicalized in Unicode.") |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
391 |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
392 (defun tibetan-canonicalize-for-unicode-region (from to) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
393 (save-restriction |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
394 (narrow-to-region from to) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
395 (goto-char from) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
396 (while (re-search-forward tibetan-canonicalize-for-unicode-regexp nil t) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
397 (let ( |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
398 ;;(from (match-beginning 0)) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
399 ;;(to (match-end 0)) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
400 (canonical-form |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
401 (cdr (assoc (match-string 0) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
402 tibetan-canonicalize-for-unicode-alist)))) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
403 ;;(goto-char from) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
404 ;;(delete-region from to) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
405 ;;(insert canonical-form) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
406 (replace-match canonical-form) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
407 )))) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
408 |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
409 (defvar tibetan-strict-unicode t |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
410 "*Flag to control Tibetan canonicalizing for Unicode. |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
411 |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
412 If non-nil, the vowel a is removed and composite vowels are decomposed |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
413 before writing buffer in Unicode. See also |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
414 `tibetan-canonicalize-for-unicode-regexp' and |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
415 `tibetan-canonicalize-for-unicode-alist'.") |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
416 |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
417 ;;;###autoload |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
418 (defun tibetan-pre-write-canonicalize-for-unicode (from to) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
419 (let ((old-buf (current-buffer)) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
420 (strict-unicode tibetan-strict-unicode)) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
421 (set-buffer (generate-new-buffer " *temp*")) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
422 (if (stringp from) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
423 (insert from) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
424 (insert-buffer-substring old-buf from to)) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
425 (if strict-unicode |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
426 (tibetan-canonicalize-for-unicode-region (point-min) (point-max))) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
427 ;; Should return nil as annotations. |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
428 nil)) |
15af34d3b6f7
(tibetan-canonicalize-for-unicode-alist)
Kenichi Handa <handa@m17n.org>
parents:
45032
diff
changeset
|
429 |
18309
bd8b521f5218
Provide XXX-util instead of
Kenichi Handa <handa@m17n.org>
parents:
17993
diff
changeset
|
430 (provide 'tibet-util) |
17301 | 431 |
52401 | 432 ;;; arch-tag: 7a7333e8-1584-446c-b39c-a02b9def265d |
38436
b174db545cfd
Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents:
36685
diff
changeset
|
433 ;;; tibet-util.el ends here |