36683
|
1 ;;; lao-util.el --- utilities for Lao -*- coding: iso-2022-7bit; -*-
|
17083
|
2
|
|
3 ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
|
18377
|
4 ;; Licensed to the Free Software Foundation.
|
41890
|
5 ;; Copyright (C) 2001 Free Software Foundation, Inc.
|
17083
|
6
|
|
7 ;; Keywords: multilingual, Lao
|
|
8
|
|
9 ;; This file is part of GNU Emacs.
|
|
10
|
|
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
12 ;; it under the terms of the GNU General Public License as published by
|
|
13 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
14 ;; any later version.
|
|
15
|
|
16 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19 ;; GNU General Public License for more details.
|
|
20
|
|
21 ;; You should have received a copy of the GNU General Public License
|
|
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
24 ;; Boston, MA 02111-1307, USA.
|
|
25
|
38414
|
26 ;;; Commentary:
|
|
27
|
17083
|
28 ;;; Code:
|
|
29
|
26894
|
30 ;; Setting information of Thai characters.
|
|
31
|
|
32 (defconst lao-category-table (make-category-table))
|
|
33 (define-category ?c "Lao consonant" lao-category-table)
|
|
34 (define-category ?s "Lao semi-vowel" lao-category-table)
|
|
35 (define-category ?v "Lao upper/lower vowel" lao-category-table)
|
|
36 (define-category ?t "Lao tone" lao-category-table)
|
|
37
|
17083
|
38 (let ((l '((?(1!(B consonant "LETTER KOR KAI'" "CHICKEN")
|
|
39 (?(1"(B consonant "LETTER KHOR KHAI'" "EGG")
|
|
40 (?(1#(B invalid nil)
|
|
41 (?(1$(B consonant "LETTER QHOR QHWARGN" "BUFFALO")
|
|
42 (?(1%(B invalid nil)
|
|
43 (? invalid nil)
|
|
44 (?(1'(B consonant "LETTER NGOR NGUU" "SNAKE")
|
|
45 (?(1((B consonant "LETTER JOR JUA" "BUDDHIST NOVICE")
|
|
46 (?(1)(B invalid nil)
|
|
47 (?(1*(B consonant "LETTER XOR X\"ARNG" "ELEPHANT")
|
|
48 (?(1+(B invalid nil)
|
|
49 (?(1,(B invalid nil)
|
|
50 (?(1-(B consonant "LETTER YOR YUNG" "MOSQUITO")
|
|
51 (?(1.(B invalid nil)
|
|
52 (?(1.(B invalid nil)
|
|
53 (?(1.(B invalid nil)
|
|
54 (?(1.(B invalid nil)
|
|
55 (?(1.(B invalid nil)
|
|
56 (?(1.(B invalid nil)
|
|
57 (?(14(B consonant "LETTER DOR DANG" "NOSE")
|
|
58 (?(15(B consonant "LETTER TOR TAR" "EYE")
|
19271
|
59 (?(16(B consonant "LETTER THOR THUNG" "TO ASK,QUESTION")
|
|
60 (?(17(B consonant "LETTER DHOR DHARM" "FLAG")
|
17083
|
61 (?(18(B invalid nil)
|
|
62 (?(19(B consonant "LETTER NOR NOK" "BIRD")
|
|
63 (?(1:(B consonant "LETTER BOR BED" "FISHHOOK")
|
|
64 (?(1;(B consonant "LETTER POR PAR" "FISH")
|
|
65 (?(1<(B consonant "LETTER HPOR HPER\"" "BEE")
|
|
66 (?(1=(B consonant "LETTER FHOR FHAR" "WALL")
|
|
67 (?(1>(B consonant "LETTER PHOR PHUU" "MOUNTAIN")
|
|
68 (?(1?(B consonant "LETTER FOR FAI" "FIRE")
|
|
69 (?(1@(B invalid nil)
|
|
70 (?(1A(B consonant "LETTER MOR MAR\"" "HORSE")
|
|
71 (?(1B(B consonant "LETTER GNOR GNAR" "MEDICINE")
|
|
72 (?(1C(B consonant "LETTER ROR ROD" "CAR")
|
|
73 (?(1D(B invalid nil)
|
|
74 (?(1E(B consonant "LETTER LOR LIING" "MONKEY")
|
|
75 (?(1F(B invalid nil)
|
|
76 (?(1G(B consonant "LETTER WOR WII" "HAND FAN")
|
|
77 (?(1H(B invalid nil)
|
|
78 (?(1I(B invalid nil)
|
|
79 (?(1J(B consonant "LETTER SOR SEA" "TIGER")
|
|
80 (?(1K(B consonant "LETTER HHOR HHAI" "JAR")
|
|
81 (?(1L(B invalid nil)
|
|
82 (?(1M(B consonant "LETTER OR OOW" "TAKE")
|
|
83 (?(1N(B consonant "LETTER HOR HEA" "BOAT")
|
|
84 (?(1O(B special "ELLIPSIS")
|
|
85 (?(1P(B vowel-base "VOWEL SIGN SARA A")
|
|
86 (?(1Q(B vowel-upper "VOWEL SIGN MAI KAN")
|
|
87 (?(1R(B vowel-base "VOWEL SIGN SARA AR")
|
|
88 (?(1S(B vowel-base "VOWEL SIGN SARA AM")
|
49598
|
89 (?(1T(B vowel-upper "VOWEL SIGN SARA I")
|
17083
|
90 (?(1U(B vowel-upper "VOWEL SIGN SARA II")
|
|
91 (?(1V(B vowel-upper "VOWEL SIGN SARA EU")
|
|
92 (?(1W(B vowel-upper "VOWEL SIGN SARA UR")
|
|
93 (?(1X(B vowel-lower "VOWEL SIGN SARA U")
|
|
94 (?(1Y(B vowel-lower "VOWEL SIGN SARA UU")
|
|
95 (?(1Z(B invalid nil)
|
|
96 (?(1[(B vowel-upper "VOWEL SIGN MAI KONG")
|
|
97 (?(1\(B semivowel-lower "SEMIVOWEL SIGN LO")
|
|
98 (?(1](B vowel-base "SEMIVOWEL SIGN SARA IA")
|
|
99 (?(1^(B invalid nil)
|
|
100 (?(1_(B invalid nil)
|
|
101 (?(1`(B vowel-base "VOWEL SIGN SARA EE")
|
|
102 (?(1a(B vowel-base "VOWEL SIGN SARA AA")
|
|
103 (?(1b(B vowel-base "VOWEL SIGN SARA OO")
|
|
104 (?(1c(B vowel-base "VOWEL SIGN SARA EI MAI MUAN\"")
|
|
105 (?(1d(B vowel-base "VOWEL SIGN SARA AI MAI MAY")
|
|
106 (?(1e(B invalid nil)
|
49598
|
107 (?(1f(B special "KO LA (REPETITION)")
|
17083
|
108 (?(1g(B invalid nil)
|
|
109 (?(1h(B tone "TONE MAI EK")
|
|
110 (?(1i(B tone "TONE MAI THO")
|
|
111 (?(1j(B tone "TONE MAI TI")
|
|
112 (?(1k(B tone "TONE MAI JADTAWAR")
|
|
113 (?(1l(B tone "CANCELLATION MARK")
|
|
114 (?(1m(B vowel-upper "VOWEL SIGN SARA OR")
|
|
115 (?(1n(B invalid nil)
|
|
116 (?(1o(B invalid nil)
|
|
117 (?(1p(B special "DIGIT ZERO")
|
|
118 (?(1q(B special "DIGIT ONE")
|
|
119 (?(1r(B special "DIGIT TWO")
|
|
120 (?(1s(B special "DIGIT THREE")
|
|
121 (?(1t(B special "DIGIT FOUR")
|
|
122 (?(1u(B special "DIGIT FIVE")
|
|
123 (?(1v(B special "DIGIT SIX")
|
|
124 (?(1w(B special "DIGIT SEVEN")
|
|
125 (?(1x(B special "DIGIT EIGHT")
|
|
126 (?(1y(B special "DIGIT NINE")
|
|
127 (?(1z(B invalid nil)
|
|
128 (?(1{(B invalid nil)
|
|
129 (?(1|(B consonant "LETTER NHOR NHUU" "MOUSE")
|
|
130 (?(1}(B consonant "LETTER MHOR MHAR" "DOG")
|
|
131 (?(1~(B invalid nil)
|
41890
|
132 ;; Unicode equivalents
|
|
133 (?$,1D!(B consonant "LETTER KOR KAI'" "CHICKEN")
|
|
134 (?$,1D"(B consonant "LETTER KHOR KHAI'" "EGG")
|
|
135 (?$,1D$(B consonant "LETTER QHOR QHWARGN" "BUFFALO")
|
|
136 (?$,1D'(B consonant "LETTER NGOR NGUU" "SNAKE")
|
|
137 (?$,1D((B consonant "LETTER JOR JUA" "BUDDHIST NOVICE")
|
|
138 (?$,1D*(B consonant "LETTER XOR X\"ARNG" "ELEPHANT")
|
|
139 (?$,1D-(B consonant "LETTER YOR YUNG" "MOSQUITO")
|
|
140 (?$,1D4(B consonant "LETTER DOR DANG" "NOSE")
|
|
141 (?$,1D5(B consonant "LETTER TOR TAR" "EYE")
|
|
142 (?$,1D6(B consonant "LETTER THOR THUNG" "TO ASK,QUESTION")
|
|
143 (?$,1D7(B consonant "LETTER DHOR DHARM" "FLAG")
|
|
144 (?$,1D9(B consonant "LETTER NOR NOK" "BIRD")
|
|
145 (?$,1D:(B consonant "LETTER BOR BED" "FISHHOOK")
|
|
146 (?$,1D;(B consonant "LETTER POR PAR" "FISH")
|
|
147 (?$,1D<(B consonant "LETTER HPOR HPER\"" "BEE")
|
|
148 (?$,1D=(B consonant "LETTER FHOR FHAR" "WALL")
|
|
149 (?$,1D>(B consonant "LETTER PHOR PHUU" "MOUNTAIN")
|
|
150 (?$,1D?(B consonant "LETTER FOR FAI" "FIRE")
|
|
151 (?$,1DA(B consonant "LETTER MOR MAR\"" "HORSE")
|
|
152 (?$,1DB(B consonant "LETTER GNOR GNAR" "MEDICINE")
|
|
153 (?$,1DC(B consonant "LETTER ROR ROD" "CAR")
|
|
154 (?$,1DE(B consonant "LETTER LOR LIING" "MONKEY")
|
|
155 (?$,1DG(B consonant "LETTER WOR WII" "HAND FAN")
|
|
156 (?$,1DJ(B consonant "LETTER SOR SEA" "TIGER")
|
|
157 (?$,1DK(B consonant "LETTER HHOR HHAI" "JAR")
|
|
158 (?$,1DM(B consonant "LETTER OR OOW" "TAKE")
|
|
159 (?$,1DN(B consonant "LETTER HOR HEA" "BOAT")
|
|
160 (?$,1DO(B special "ELLIPSIS")
|
|
161 (?$,1DP(B vowel-base "VOWEL SIGN SARA A")
|
|
162 (?$,1DQ(B vowel-upper "VOWEL SIGN MAI KAN")
|
|
163 (?$,1DR(B vowel-base "VOWEL SIGN SARA AR")
|
|
164 (?$,1DS(B vowel-base "VOWEL SIGN SARA AM")
|
49598
|
165 (?$,1DT(B vowel-upper "VOWEL SIGN SARA I")
|
41890
|
166 (?$,1DU(B vowel-upper "VOWEL SIGN SARA II")
|
|
167 (?$,1DV(B vowel-upper "VOWEL SIGN SARA EU")
|
|
168 (?$,1DW(B vowel-upper "VOWEL SIGN SARA UR")
|
|
169 (?$,1DX(B vowel-lower "VOWEL SIGN SARA U")
|
|
170 (?$,1DY(B vowel-lower "VOWEL SIGN SARA UU")
|
|
171 (?$,1D[(B vowel-upper "VOWEL SIGN MAI KONG")
|
|
172 (?$,1D\(B semivowel-lower "SEMIVOWEL SIGN LO")
|
|
173 (?$,1D](B vowel-base "SEMIVOWEL SIGN SARA IA")
|
|
174 (?$,1D`(B vowel-base "VOWEL SIGN SARA EE")
|
|
175 (?$,1Da(B vowel-base "VOWEL SIGN SARA AA")
|
|
176 (?$,1Db(B vowel-base "VOWEL SIGN SARA OO")
|
|
177 (?$,1Dc(B vowel-base "VOWEL SIGN SARA EI MAI MUAN\"")
|
|
178 (?$,1Dd(B vowel-base "VOWEL SIGN SARA AI MAI MAY")
|
|
179 (?$,1Df(B special "KO LA (REPETITION)")
|
|
180 (?$,1Dh(B tone "TONE MAI EK")
|
|
181 (?$,1Di(B tone "TONE MAI THO")
|
|
182 (?$,1Dj(B tone "TONE MAI TI")
|
|
183 (?$,1Dk(B tone "TONE MAI JADTAWAR")
|
|
184 (?$,1Dl(B tone "CANCELLATION MARK")
|
|
185 (?$,1Dm(B vowel-upper "VOWEL SIGN SARA OR")
|
|
186 (?$,1Dp(B special "DIGIT ZERO")
|
|
187 (?$,1Dq(B special "DIGIT ONE")
|
|
188 (?$,1Dr(B special "DIGIT TWO")
|
|
189 (?$,1Ds(B special "DIGIT THREE")
|
|
190 (?$,1Dt(B special "DIGIT FOUR")
|
|
191 (?$,1Du(B special "DIGIT FIVE")
|
|
192 (?$,1Dv(B special "DIGIT SIX")
|
|
193 (?$,1Dw(B special "DIGIT SEVEN")
|
|
194 (?$,1Dx(B special "DIGIT EIGHT")
|
|
195 (?$,1Dy(B special "DIGIT NINE")
|
|
196 (?$,1D|(B consonant "LETTER NHOR NHUU" "MOUSE")
|
|
197 (?$,1D}(B consonant "LETTER MHOR MHAR" "DOG")))
|
17083
|
198 elm)
|
|
199 (while l
|
26894
|
200 (setq elm (car l) l (cdr l))
|
|
201 (let ((char (car elm))
|
|
202 (ptype (nth 1 elm)))
|
|
203 (cond ((eq ptype 'consonant)
|
|
204 (modify-category-entry char ?c lao-category-table))
|
|
205 ((memq ptype '(vowel-upper vowel-lower))
|
|
206 (modify-category-entry char ?v lao-category-table))
|
|
207 ((eq ptype 'semivowel-lower)
|
|
208 (modify-category-entry char ?s lao-category-table))
|
|
209 ((eq ptype 'tone)
|
|
210 (modify-category-entry char ?t lao-category-table)))
|
|
211 (put-char-code-property char 'phonetic-type ptype)
|
|
212 (put-char-code-property char 'name (nth 2 elm))
|
|
213 (put-char-code-property char 'meaning (nth 3 elm)))))
|
|
214
|
|
215 ;; The general composing rules are as follows:
|
|
216 ;;
|
|
217 ;; T
|
|
218 ;; V T V T
|
|
219 ;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C
|
|
220 ;; v v
|
|
221 ;; T
|
|
222 ;; V T V T
|
|
223 ;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CvT -> C
|
|
224 ;; s s s s s
|
|
225 ;; v v
|
|
226
|
|
227
|
|
228 ;; where C: consonant, V: vowel upper, v: vowel lower,
|
|
229 ;; T: tone mark, s: semivowel lower
|
|
230
|
|
231 (defvar lao-composition-pattern
|
|
232 "\\cc\\(\\ct\\|\\cv\\ct?\\|\\cs\\(\\ct\\|\\cv\\ct?\\)?\\)"
|
|
233 "Regular expression matching a Lao composite sequence.")
|
|
234
|
|
235 ;;;###autoload
|
|
236 (defun lao-compose-string (str)
|
|
237 (with-category-table lao-category-table
|
|
238 (let ((idx 0))
|
|
239 (while (setq idx (string-match lao-composition-pattern str idx))
|
|
240 (compose-string str idx (match-end 0))
|
|
241 (setq idx (match-end 0))))
|
|
242 str))
|
|
243
|
|
244 ;;; LRT: Lao <-> Roman Transcription
|
|
245
|
|
246 ;; Upper vowels and tone-marks are put on the letter.
|
|
247 ;; Semi-vowel-sign-lo and lower vowels are put under the letter.
|
|
248
|
|
249 (defconst lao-transcription-consonant-alist
|
|
250 (sort '(;; single consonants
|
|
251 ("k" . "(1!(B")
|
|
252 ("kh" . "(1"(B")
|
|
253 ("qh" . "(1$(B")
|
|
254 ("ng" . "(1'(B")
|
|
255 ("j" . "(1((B")
|
|
256 ("s" . "(1J(B")
|
|
257 ("x" . "(1*(B")
|
|
258 ("y" . "(1-(B")
|
|
259 ("d" . "(14(B")
|
|
260 ("t" . "(15(B")
|
|
261 ("th" . "(16(B")
|
|
262 ("dh" . "(17(B")
|
|
263 ("n" . "(19(B")
|
|
264 ("b" . "(1:(B")
|
|
265 ("p" . "(1;(B")
|
|
266 ("hp" . "(1<(B")
|
|
267 ("fh" . "(1=(B")
|
|
268 ("ph" . "(1>(B")
|
|
269 ("f" . "(1?(B")
|
|
270 ("m" . "(1A(B")
|
|
271 ("gn" . "(1B(B")
|
|
272 ("l" . "(1E(B")
|
|
273 ("r" . "(1C(B")
|
|
274 ("v" . "(1G(B")
|
|
275 ("w" . "(1G(B")
|
|
276 ("hh" . "(1K(B")
|
|
277 ("O" . "(1M(B")
|
|
278 ("h" . "(1N(B")
|
|
279 ("nh" . "(1|(B")
|
|
280 ("mh" . "(1}(B")
|
|
281 ("lh" . ["(1K\(B"])
|
|
282 ;; double consonants
|
|
283 ("ngh" . ["(1K'(B"])
|
|
284 ("yh" . ["(1K](B"])
|
|
285 ("wh" . ["(1KG(B"])
|
|
286 ("hl" . ["(1KE(B"])
|
|
287 ("hy" . ["(1K-(B"])
|
|
288 ("hn" . ["(1K9(B"])
|
|
289 ("hm" . ["(1KA(B"])
|
|
290 )
|
|
291 (function (lambda (x y) (> (length (car x)) (length (car y)))))))
|
|
292
|
|
293 (defconst lao-transcription-semi-vowel-alist
|
|
294 '(("r" . "(1\(B")))
|
|
295
|
|
296 (defconst lao-transcription-vowel-alist
|
|
297 (sort '(("a" . "(1P(B")
|
|
298 ("ar" . "(1R(B")
|
|
299 ("i" . "(1T(B")
|
|
300 ("ii" . "(1U(B")
|
|
301 ("eu" . "(1V(B")
|
|
302 ("ur" . "(1W(B")
|
|
303 ("u" . "(1X(B")
|
|
304 ("uu" . "(1Y(B")
|
|
305 ("e" . ["(1`P(B"])
|
|
306 ("ee" . "(1`(B")
|
|
307 ("ae" . ["(1aP(B"])
|
|
308 ("aa" . "(1a(B")
|
|
309 ("o" . ["(1bP(B"])
|
|
310 ("oo" . "(1b(B")
|
|
311 ("oe" . ["(1`RP(B"])
|
|
312 ("or" . "(1m(B")
|
|
313 ("er" . ["(1`T(B"])
|
|
314 ("ir" . ["(1`U(B"])
|
|
315 ("ua" . ["(1[GP(B"])
|
|
316 ("uaa" . ["(1[G(B"])
|
|
317 ("ie" . ["(1`Q]P(B"])
|
|
318 ("ia" . ["(1`Q](B"])
|
|
319 ("ea" . ["(1`VM(B"])
|
|
320 ("eaa" . ["(1`WM(B"])
|
|
321 ("ai" . "(1d(B")
|
|
322 ("ei" . "(1c(B")
|
|
323 ("ao" . ["(1`[R(B"])
|
|
324 ("aM" . "(1S(B"))
|
|
325 (function (lambda (x y) (> (length (car x)) (length (car y)))))))
|
|
326
|
|
327 ;; Maa-sakod is put at the tail.
|
|
328 (defconst lao-transcription-maa-sakod-alist
|
|
329 '(("k" . "(1!(B")
|
|
330 ("g" . "(1'(B")
|
|
331 ("y" . "(1-(B")
|
|
332 ("d" . "(14(B")
|
|
333 ("n" . "(19(B")
|
|
334 ("b" . "(1:(B")
|
|
335 ("m" . "(1A(B")
|
|
336 ("v" . "(1G(B")
|
|
337 ("w" . "(1G(B")
|
|
338 ))
|
|
339
|
|
340 (defconst lao-transcription-tone-alist
|
|
341 '(("'" . "(1h(B")
|
|
342 ("\"" . "(1i(B")
|
|
343 ("^" . "(1j(B")
|
|
344 ("+" . "(1k(B")
|
|
345 ("~" . "(1l(B")))
|
|
346
|
|
347 (defconst lao-transcription-punctuation-alist
|
|
348 '(("\\0" . "(1p(B")
|
|
349 ("\\1" . "(1q(B")
|
|
350 ("\\2" . "(1r(B")
|
|
351 ("\\3" . "(1s(B")
|
|
352 ("\\4" . "(1t(B")
|
|
353 ("\\5" . "(1u(B")
|
|
354 ("\\6" . "(1v(B")
|
|
355 ("\\7" . "(1w(B")
|
|
356 ("\\8" . "(1x(B")
|
|
357 ("\\9" . "(1y(B")
|
|
358 ("\\\\" . "(1f(B")
|
|
359 ("\\$" . "(1O(B")))
|
|
360
|
|
361 (defconst lao-transcription-pattern
|
|
362 (concat
|
|
363 "\\("
|
|
364 (mapconcat 'car lao-transcription-consonant-alist "\\|")
|
|
365 "\\)\\("
|
|
366 (mapconcat 'car lao-transcription-semi-vowel-alist "\\|")
|
|
367 "\\)?\\(\\("
|
|
368 (mapconcat 'car lao-transcription-vowel-alist "\\|")
|
|
369 "\\)\\("
|
|
370 (mapconcat 'car lao-transcription-maa-sakod-alist "\\|")
|
|
371 "\\)?\\("
|
|
372 (mapconcat (lambda (x) (regexp-quote (car x)))
|
|
373 lao-transcription-tone-alist "\\|")
|
|
374 "\\)?\\)?\\|"
|
|
375 (mapconcat (lambda (x) (regexp-quote (car x)))
|
|
376 lao-transcription-punctuation-alist "\\|")
|
|
377 )
|
|
378 "Regexp of Roman transcription pattern for one Lao syllable.")
|
|
379
|
|
380 (defconst lao-transcription-pattern
|
|
381 (concat
|
|
382 "\\("
|
|
383 (regexp-opt (mapcar 'car lao-transcription-consonant-alist))
|
|
384 "\\)\\("
|
|
385 (regexp-opt (mapcar 'car lao-transcription-semi-vowel-alist))
|
|
386 "\\)?\\(\\("
|
|
387 (regexp-opt (mapcar 'car lao-transcription-vowel-alist))
|
|
388 "\\)\\("
|
|
389 (regexp-opt (mapcar 'car lao-transcription-maa-sakod-alist))
|
|
390 "\\)?\\("
|
|
391 (regexp-opt (mapcar 'car lao-transcription-tone-alist))
|
|
392 "\\)?\\)?\\|"
|
|
393 (regexp-opt (mapcar 'car lao-transcription-punctuation-alist))
|
|
394 )
|
|
395 "Regexp of Roman transcription pattern for one Lao syllable.")
|
|
396
|
|
397 (defconst lao-vowel-reordering-rule
|
|
398 '(("(1P(B" (0 ?(1P(B) (0 ?(1Q(B))
|
|
399 ("(1R(B" (0 ?(1R(B))
|
|
400 ("(1T(B" (0 ?(1U(B))
|
|
401 ("(1U(B" (0 ?(1U(B))
|
|
402 ("(1V(B" (0 ?(1V(B))
|
|
403 ("(1W(B" (0 ?(1W(B))
|
|
404 ("(1X(B" (0 ?(1X(B))
|
|
405 ("(1Y(B" (0 ?(1Y(B))
|
|
406 ("(1`P(B" (?(1`(B 0 ?(1P(B) (?(1`(B 0 ?(1Q(B))
|
|
407 ("(1`(B" (?(1`(B 0))
|
|
408 ("(1aP(B" (?(1a(B 0 ?(1P(B) (?(1a(B 0 ?(1Q(B))
|
|
409 ("(1a(B" (?(1a(B 0))
|
|
410 ("(1bP(B" (?(1b(B 0 ?(1P(B) (0 ?(1[(B) (?(1-(B ?(1b(B 0 ?(1Q(B) (?(1G(B ?(1b(B 0 ?(1Q(B))
|
|
411 ("(1b(B" (?(1b(B 0))
|
|
412 ("(1`RP(B" (?(1`(B 0 ?(1R(B ?(1P(B) (0 ?(1Q(B ?(1M(B))
|
|
413 ("(1m(B" (0 ?(1m(B) (0 ?(1M(B))
|
|
414 ("(1`T(B" (?(1`(B 0 ?(1T(B))
|
|
415 ("(1`U(B" (?(1`(B 0 ?(1U(B))
|
|
416 ("(1[GP(B" (0 ?(1[(B ?(1G(B ?(1P(B) (0 ?(1Q(B ?(1G(B))
|
|
417 ("(1[G(B" (0 ?(1[(B ?(1G(B) (0 ?(1G(B))
|
|
418 ("(1`Q]P(B" (?(1`(B 0 ?(1Q(B ?(1](B ?(1P(B) (0 ?(1Q(B ?(1](B))
|
|
419 ("(1`Q](B" (?(1`(B 0 ?(1Q(B ?(1](B) (0 ?(1](B))
|
|
420 ("(1`VM(B" (?(1`(B 0 ?(1V(B ?(1M(B))
|
|
421 ("(1`WM(B" (?(1`(B 0 ?(1W(B ?(1M(B))
|
|
422 ("(1d(B" (?(1d(B 0))
|
|
423 ("(1c(B" (?(1c(B 0))
|
|
424 ("(1`[R(B" (?(1`(B 0 ?(1[(B ?(1R(B))
|
41890
|
425 ("(1S(B" (0 ?(1S(B))
|
|
426
|
|
427 ;; Unicode equivalents
|
|
428 ("$,1DP(B" (0 ?$,1DP(B) (0 ?$,1DQ(B))
|
|
429 ("$,1DR(B" (0 ?$,1DR(B))
|
|
430 ("$,1DT(B" (0 ?$,1DU(B))
|
|
431 ("$,1DU(B" (0 ?$,1DU(B))
|
|
432 ("$,1DV(B" (0 ?$,1DV(B))
|
|
433 ("$,1DW(B" (0 ?$,1DW(B))
|
|
434 ("$,1DX(B" (0 ?$,1DX(B))
|
|
435 ("$,1DY(B" (0 ?$,1DY(B))
|
|
436 ("$,1D`DP(B" (?$,1D`(B 0 ?$,1DP(B) (?$,1D`(B 0 ?$,1DQ(B))
|
|
437 ("$,1D`(B" (?$,1D`(B 0))
|
|
438 ("$,1DaDP(B" (?$,1Da(B 0 ?$,1DP(B) (?$,1Da(B 0 ?$,1DQ(B))
|
|
439 ("$,1Da(B" (?$,1Da(B 0))
|
|
440 ("$,1DbDP(B" (?$,1Db(B 0 ?$,1DP(B) (0 ?$,1D[(B) (?$,1D-(B ?$,1Db(B 0 ?$,1DQ(B) (?$,1DG(B ?$,1Db(B 0 ?$,1DQ(B))
|
|
441 ("$,1Db(B" (?$,1Db(B 0))
|
|
442 ("$,1D`DRDP(B" (?$,1D`(B 0 ?$,1DR(B ?$,1DP(B) (0 ?$,1DQ(B ?$,1DM(B))
|
|
443 ("$,1Dm(B" (0 ?$,1Dm(B) (0 ?$,1DM(B))
|
|
444 ("$,1D`DT(B" (?$,1D`(B 0 ?$,1DT(B))
|
|
445 ("$,1D`DU(B" (?$,1D`(B 0 ?$,1DU(B))
|
|
446 ("$,1D[DGDP(B" (0 ?$,1D[(B ?$,1DG(B ?$,1DP(B) (0 ?$,1DQ(B ?$,1DG(B))
|
|
447 ("$,1D[DG(B" (0 ?$,1D[(B ?$,1DG(B) (0 ?$,1DG(B))
|
|
448 ("$,1D`DQD]DP(B" (?$,1D`(B 0 ?$,1DQ(B ?$,1D](B ?$,1DP(B) (0 ?$,1DQ(B ?$,1D](B))
|
|
449 ("$,1D`DQD](B" (?$,1D`(B 0 ?$,1DQ(B ?$,1D](B) (0 ?$,1D](B))
|
|
450 ("$,1D`DVDM(B" (?$,1D`(B 0 ?$,1DV(B ?$,1DM(B))
|
|
451 ("$,1D`DWDM(B" (?$,1D`(B 0 ?$,1DW(B ?$,1DM(B))
|
|
452 ("$,1Dd(B" (?$,1Dd(B 0))
|
|
453 ("$,1Dc(B" (?$,1Dc(B 0))
|
|
454 ("$,1D`D[DR(B" (?$,1D`(B 0 ?$,1D[(B ?$,1DR(B))
|
|
455 ("$,1DS(B" (0 ?$,1DS(B)))
|
26894
|
456 "Alist of Lao vowel string vs the corresponding re-ordering rule.
|
|
457 Each element has this form:
|
|
458 (VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...)
|
|
459
|
|
460 VOWEL is a vowel string (e.g. \"(1`Q]P(B\").
|
|
461
|
|
462 NO-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL following a
|
|
463 consonant. It is a list vowel characters or 0. The element 0
|
|
464 indicate the place to embed a consonant.
|
|
465
|
|
466 Optional WITH-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL
|
|
467 follwoing a consonant and preceding a maa-sakod character. If it is
|
|
468 nil, NO-MAA-SAKOD-RULE is used. The maa-sakod character is alwasy
|
|
469 appended at the tail.
|
|
470
|
|
471 For instance, rule `(\"(1`WM(B\" (?(1`(B t ?(1W(B ?(1M(B))' tells that this vowel
|
|
472 string following a consonant `(1!(B' should be re-ordered as \"(1`!WM(B\".
|
|
473
|
|
474 Optional (MAA-SAKOD-n RULE-n) are rules specially applied to maa-sakod
|
|
475 character MAA-SAKOD-n.")
|
|
476
|
|
477 ;;;###autoload
|
|
478 (defun lao-transcribe-single-roman-syllable-to-lao (from to &optional str)
|
|
479 "Transcribe a Romanized Lao syllable in the region FROM and TO to Lao string.
|
|
480 Only the first syllable is transcribed.
|
|
481 The value has the form: (START END LAO-STRING), where
|
|
482 START and END are the beggining and end positions of the Roman Lao syllable,
|
|
483 LAO-STRING is the Lao character transcription of it.
|
|
484
|
|
485 Optional 3rd arg STR, if non-nil, is a string to search for Roman Lao
|
|
486 syllable. In that case, FROM and TO are indexes to STR."
|
|
487 (if str
|
|
488 (if (setq from (string-match lao-transcription-pattern str from))
|
|
489 (progn
|
|
490 (if (>= from to)
|
|
491 (setq from nil)
|
|
492 (setq to (match-end 0)))))
|
|
493 (save-excursion
|
|
494 (goto-char from)
|
|
495 (if (setq to (re-search-forward lao-transcription-pattern to t))
|
|
496 (setq from (match-beginning 0))
|
|
497 (setq from nil))))
|
|
498 (if from
|
|
499 (let* ((consonant (match-string 1 str))
|
|
500 (semivowel (match-string 3 str))
|
|
501 (vowel (match-string 5 str))
|
|
502 (maa-sakod (match-string 8 str))
|
|
503 (tone (match-string 9 str))
|
|
504 lao-consonant lao-semivowel lao-vowel lao-maa-sakod lao-tone
|
|
505 clen cidx)
|
|
506 (setq to (match-end 0))
|
|
507 (if (not consonant)
|
|
508 (setq str (cdr (assoc (match-string 0 str)
|
|
509 lao-transcription-punctuation-alist)))
|
|
510 (setq lao-consonant
|
|
511 (cdr (assoc consonant lao-transcription-consonant-alist)))
|
|
512 (if (vectorp lao-consonant)
|
|
513 (setq lao-consonant (aref lao-consonant 0)))
|
|
514 (setq clen (length lao-consonant))
|
|
515 (if semivowel
|
|
516 ;; Include semivowel in STR.
|
|
517 (setq lao-semivowel
|
|
518 (cdr (assoc semivowel lao-transcription-semi-vowel-alist))
|
|
519 str (if (= clen 1)
|
|
520 (concat lao-consonant lao-semivowel)
|
|
521 (concat (substring lao-consonant 0 1) lao-semivowel
|
|
522 (substring lao-consonant 1))))
|
|
523 (setq str lao-consonant))
|
|
524 (if vowel
|
|
525 (let (rule)
|
|
526 (setq lao-vowel
|
|
527 (cdr (assoc vowel lao-transcription-vowel-alist)))
|
|
528 (if (vectorp lao-vowel)
|
|
529 (setq lao-vowel (aref lao-vowel 0)))
|
|
530 (setq rule (assoc lao-vowel lao-vowel-reordering-rule))
|
|
531 (if (null maa-sakod)
|
|
532 (setq rule (nth 1 rule))
|
|
533 (setq lao-maa-sakod
|
|
534 (cdr (assoc maa-sakod lao-transcription-maa-sakod-alist))
|
|
535 rule
|
|
536 (or (cdr (assq (aref lao-maa-sakod 0) (nthcdr 2 rule)))
|
|
537 (nth 2 rule)
|
|
538 (nth 1 rule))))
|
|
539 (or rule
|
|
540 (error "Lao vowel %S has no re-ordering rule" lao-vowel))
|
|
541 (setq lao-consonant str str "")
|
|
542 (while rule
|
|
543 (if (= (car rule) 0)
|
|
544 (setq str (concat str lao-consonant)
|
|
545 cidx (length str))
|
|
546 (setq str (concat str (list (car rule)))))
|
|
547 (setq rule (cdr rule)))
|
|
548 (or cidx
|
|
549 (error "Lao vowel %S has malformed re-ordering rule" vowel))
|
|
550 ;; Set CIDX to after upper or lower vowel if any.
|
|
551 (let ((len (length str)))
|
|
552 (while (and (< cidx len)
|
|
553 (memq (get-char-code-property (aref str cidx)
|
|
554 'phonetic-type)
|
|
555 '(vowel-lower vowel-upper)))
|
|
556 (setq cidx (1+ cidx))))
|
|
557 (if lao-maa-sakod
|
|
558 (setq str (concat str lao-maa-sakod)))
|
|
559 (if tone
|
|
560 (setq lao-tone
|
|
561 (cdr (assoc tone lao-transcription-tone-alist))
|
|
562 str (concat (substring str 0 cidx) lao-tone
|
|
563 (substring str cidx)))))))
|
|
564 (list from to (lao-compose-string str)))))
|
|
565
|
|
566 ;;;###autoload
|
|
567 (defun lao-transcribe-roman-to-lao-string (str)
|
|
568 "Transcribe Romanized Lao string STR to Lao character string."
|
|
569 (let ((from 0)
|
|
570 (to (length str))
|
|
571 (lao-str "")
|
|
572 val)
|
|
573 (while (setq val (lao-transcribe-single-roman-syllable-to-lao from to str))
|
|
574 (let ((start (car val))
|
|
575 (end (nth 1 val))
|
|
576 (lao (nth 2 val)))
|
|
577 (if (> start from)
|
|
578 (setq lao-str (concat lao-str (substring str from start) lao))
|
|
579 (setq lao-str (concat lao-str lao)))
|
|
580 (setq from end)))
|
|
581 (if (< from to)
|
|
582 (concat lao-str (substring str from to))
|
|
583 lao-str)))
|
|
584
|
|
585 ;;;###autoload
|
41890
|
586 (defun lao-post-read-conversion (len)
|
|
587 (lao-compose-region (point) (+ (point) len))
|
|
588 len)
|
|
589
|
|
590 ;;;###autoload
|
26894
|
591 (defun lao-composition-function (from to pattern &optional string)
|
|
592 "Compose Lao text in the region FROM and TO.
|
|
593 The text matches the regular expression PATTERN.
|
|
594 Optional 4th argument STRING, if non-nil, is a string containing text
|
|
595 to compose.
|
|
596
|
|
597 The return value is number of composed characters."
|
|
598 (if (< (1+ from) to)
|
48042
|
599 (progn
|
26894
|
600 (if string
|
29361
|
601 (compose-string string from to)
|
26894
|
602 (compose-region from to))
|
|
603 (- to from))))
|
17083
|
604
|
28791
|
605 ;;;###autoload
|
|
606 (defun lao-compose-region (from to)
|
|
607 (interactive "r")
|
|
608 (save-restriction
|
|
609 (narrow-to-region from to)
|
|
610 (goto-char (point-min))
|
|
611 (with-category-table lao-category-table
|
|
612 (while (re-search-forward lao-composition-pattern nil t)
|
|
613 (compose-region (match-beginning 0) (point))))))
|
|
614
|
17083
|
615 ;;
|
18309
|
616 (provide 'lao-util)
|
17083
|
617
|
|
618 ;;; lao-util.el ends here
|