Mercurial > emacs
diff lisp/language/lao-util.el @ 26894:e0a13ff5901d
Mostly rewritten.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 15 Dec 1999 00:47:53 +0000 |
parents | 708271862495 |
children | bdf7581eb093 |
line wrap: on
line diff
--- a/lisp/language/lao-util.el Wed Dec 15 00:47:31 1999 +0000 +++ b/lisp/language/lao-util.el Wed Dec 15 00:47:53 1999 +0000 @@ -30,6 +30,14 @@ (interactive) (set-language-environment "Lao")) +;; Setting information of Thai characters. + +(defconst lao-category-table (make-category-table)) +(define-category ?c "Lao consonant" lao-category-table) +(define-category ?s "Lao semi-vowel" lao-category-table) +(define-category ?v "Lao upper/lower vowel" lao-category-table) +(define-category ?t "Lao tone" lao-category-table) + (let ((l '((?(1!(B consonant "LETTER KOR KAI'" "CHICKEN") (?(1"(B consonant "LETTER KHOR KHAI'" "EGG") (?(1#(B invalid nil) @@ -127,11 +135,375 @@ )) elm) (while l - (setq elm (car l)) - (put-char-code-property (car elm) 'phonetic-type (car (cdr elm))) - (put-char-code-property (car elm) 'name (nth 2 elm)) - (put-char-code-property (car elm) 'meaning (nth 3 elm)) - (setq l (cdr l)))) + (setq elm (car l) l (cdr l)) + (let ((char (car elm)) + (ptype (nth 1 elm))) + (cond ((eq ptype 'consonant) + (modify-category-entry char ?c lao-category-table)) + ((memq ptype '(vowel-upper vowel-lower)) + (modify-category-entry char ?v lao-category-table)) + ((eq ptype 'semivowel-lower) + (modify-category-entry char ?s lao-category-table)) + ((eq ptype 'tone) + (modify-category-entry char ?t lao-category-table))) + (put-char-code-property char 'phonetic-type ptype) + (put-char-code-property char 'name (nth 2 elm)) + (put-char-code-property char 'meaning (nth 3 elm))))) + +;; The general composing rules are as follows: +;; +;; T +;; V T V T +;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C +;; v v +;; T +;; V T V T +;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CvT -> C +;; s s s s s +;; v v + + +;; where C: consonant, V: vowel upper, v: vowel lower, +;; T: tone mark, s: semivowel lower + +(defvar lao-composition-pattern + "\\cc\\(\\ct\\|\\cv\\ct?\\|\\cs\\(\\ct\\|\\cv\\ct?\\)?\\)" + "Regular expression matching a Lao composite sequence.") + +;;;###autoload +(defun lao-compose-string (str) + (with-category-table lao-category-table + (let ((idx 0)) + (while (setq idx (string-match lao-composition-pattern str idx)) + (compose-string str idx (match-end 0)) + (setq idx (match-end 0)))) + str)) + +;;; LRT: Lao <-> Roman Transcription + +;; Upper vowels and tone-marks are put on the letter. +;; Semi-vowel-sign-lo and lower vowels are put under the letter. + +(defconst lao-transcription-consonant-alist + (sort '(;; single consonants + ("k" . "(1!(B") + ("kh" . "(1"(B") + ("qh" . "(1$(B") + ("ng" . "(1'(B") + ("j" . "(1((B") + ("s" . "(1J(B") + ("x" . "(1*(B") + ("y" . "(1-(B") + ("d" . "(14(B") + ("t" . "(15(B") + ("th" . "(16(B") + ("dh" . "(17(B") + ("n" . "(19(B") + ("b" . "(1:(B") + ("p" . "(1;(B") + ("hp" . "(1<(B") + ("fh" . "(1=(B") + ("ph" . "(1>(B") + ("f" . "(1?(B") + ("m" . "(1A(B") + ("gn" . "(1B(B") + ("l" . "(1E(B") + ("r" . "(1C(B") + ("v" . "(1G(B") + ("w" . "(1G(B") + ("hh" . "(1K(B") + ("O" . "(1M(B") + ("h" . "(1N(B") + ("nh" . "(1|(B") + ("mh" . "(1}(B") + ("lh" . ["(1K\(B"]) + ;; double consonants + ("ngh" . ["(1K'(B"]) + ("yh" . ["(1K](B"]) + ("wh" . ["(1KG(B"]) + ("hl" . ["(1KE(B"]) + ("hy" . ["(1K-(B"]) + ("hn" . ["(1K9(B"]) + ("hm" . ["(1KA(B"]) + ) + (function (lambda (x y) (> (length (car x)) (length (car y))))))) + +(defconst lao-transcription-semi-vowel-alist + '(("r" . "(1\(B"))) + +(defconst lao-transcription-vowel-alist + (sort '(("a" . "(1P(B") + ("ar" . "(1R(B") + ("i" . "(1T(B") + ("ii" . "(1U(B") + ("eu" . "(1V(B") + ("ur" . "(1W(B") + ("u" . "(1X(B") + ("uu" . "(1Y(B") + ("e" . ["(1`P(B"]) + ("ee" . "(1`(B") + ("ae" . ["(1aP(B"]) + ("aa" . "(1a(B") + ("o" . ["(1bP(B"]) + ("oo" . "(1b(B") + ("oe" . ["(1`RP(B"]) + ("or" . "(1m(B") + ("er" . ["(1`T(B"]) + ("ir" . ["(1`U(B"]) + ("ua" . ["(1[GP(B"]) + ("uaa" . ["(1[G(B"]) + ("ie" . ["(1`Q]P(B"]) + ("ia" . ["(1`Q](B"]) + ("ea" . ["(1`VM(B"]) + ("eaa" . ["(1`WM(B"]) + ("ai" . "(1d(B") + ("ei" . "(1c(B") + ("ao" . ["(1`[R(B"]) + ("aM" . "(1S(B")) + (function (lambda (x y) (> (length (car x)) (length (car y))))))) + +;; Maa-sakod is put at the tail. +(defconst lao-transcription-maa-sakod-alist + '(("k" . "(1!(B") + ("g" . "(1'(B") + ("y" . "(1-(B") + ("d" . "(14(B") + ("n" . "(19(B") + ("b" . "(1:(B") + ("m" . "(1A(B") + ("v" . "(1G(B") + ("w" . "(1G(B") + )) + +(defconst lao-transcription-tone-alist + '(("'" . "(1h(B") + ("\"" . "(1i(B") + ("^" . "(1j(B") + ("+" . "(1k(B") + ("~" . "(1l(B"))) + +(defconst lao-transcription-punctuation-alist + '(("\\0" . "(1p(B") + ("\\1" . "(1q(B") + ("\\2" . "(1r(B") + ("\\3" . "(1s(B") + ("\\4" . "(1t(B") + ("\\5" . "(1u(B") + ("\\6" . "(1v(B") + ("\\7" . "(1w(B") + ("\\8" . "(1x(B") + ("\\9" . "(1y(B") + ("\\\\" . "(1f(B") + ("\\$" . "(1O(B"))) + +(defconst lao-transcription-pattern + (concat + "\\(" + (mapconcat 'car lao-transcription-consonant-alist "\\|") + "\\)\\(" + (mapconcat 'car lao-transcription-semi-vowel-alist "\\|") + "\\)?\\(\\(" + (mapconcat 'car lao-transcription-vowel-alist "\\|") + "\\)\\(" + (mapconcat 'car lao-transcription-maa-sakod-alist "\\|") + "\\)?\\(" + (mapconcat (lambda (x) (regexp-quote (car x))) + lao-transcription-tone-alist "\\|") + "\\)?\\)?\\|" + (mapconcat (lambda (x) (regexp-quote (car x))) + lao-transcription-punctuation-alist "\\|") + ) + "Regexp of Roman transcription pattern for one Lao syllable.") + +(defconst lao-transcription-pattern + (concat + "\\(" + (regexp-opt (mapcar 'car lao-transcription-consonant-alist)) + "\\)\\(" + (regexp-opt (mapcar 'car lao-transcription-semi-vowel-alist)) + "\\)?\\(\\(" + (regexp-opt (mapcar 'car lao-transcription-vowel-alist)) + "\\)\\(" + (regexp-opt (mapcar 'car lao-transcription-maa-sakod-alist)) + "\\)?\\(" + (regexp-opt (mapcar 'car lao-transcription-tone-alist)) + "\\)?\\)?\\|" + (regexp-opt (mapcar 'car lao-transcription-punctuation-alist)) + ) + "Regexp of Roman transcription pattern for one Lao syllable.") + +(defconst lao-vowel-reordering-rule + '(("(1P(B" (0 ?(1P(B) (0 ?(1Q(B)) + ("(1R(B" (0 ?(1R(B)) + ("(1T(B" (0 ?(1U(B)) + ("(1U(B" (0 ?(1U(B)) + ("(1V(B" (0 ?(1V(B)) + ("(1W(B" (0 ?(1W(B)) + ("(1X(B" (0 ?(1X(B)) + ("(1Y(B" (0 ?(1Y(B)) + ("(1`P(B" (?(1`(B 0 ?(1P(B) (?(1`(B 0 ?(1Q(B)) + ("(1`(B" (?(1`(B 0)) + ("(1aP(B" (?(1a(B 0 ?(1P(B) (?(1a(B 0 ?(1Q(B)) + ("(1a(B" (?(1a(B 0)) + ("(1bP(B" (?(1b(B 0 ?(1P(B) (0 ?(1[(B) (?(1-(B ?(1b(B 0 ?(1Q(B) (?(1G(B ?(1b(B 0 ?(1Q(B)) + ("(1b(B" (?(1b(B 0)) + ("(1`RP(B" (?(1`(B 0 ?(1R(B ?(1P(B) (0 ?(1Q(B ?(1M(B)) + ("(1m(B" (0 ?(1m(B) (0 ?(1M(B)) + ("(1`T(B" (?(1`(B 0 ?(1T(B)) + ("(1`U(B" (?(1`(B 0 ?(1U(B)) + ("(1[GP(B" (0 ?(1[(B ?(1G(B ?(1P(B) (0 ?(1Q(B ?(1G(B)) + ("(1[G(B" (0 ?(1[(B ?(1G(B) (0 ?(1G(B)) + ("(1`Q]P(B" (?(1`(B 0 ?(1Q(B ?(1](B ?(1P(B) (0 ?(1Q(B ?(1](B)) + ("(1`Q](B" (?(1`(B 0 ?(1Q(B ?(1](B) (0 ?(1](B)) + ("(1`VM(B" (?(1`(B 0 ?(1V(B ?(1M(B)) + ("(1`WM(B" (?(1`(B 0 ?(1W(B ?(1M(B)) + ("(1d(B" (?(1d(B 0)) + ("(1c(B" (?(1c(B 0)) + ("(1`[R(B" (?(1`(B 0 ?(1[(B ?(1R(B)) + ("(1S(B" (0 ?(1S(B))) + "Alist of Lao vowel string vs the corresponding re-ordering rule. +Each element has this form: + (VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...) + +VOWEL is a vowel string (e.g. \"(1`Q]P(B\"). + +NO-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL following a +consonant. It is a list vowel characters or 0. The element 0 +indicate the place to embed a consonant. + +Optional WITH-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL +follwoing a consonant and preceding a maa-sakod character. If it is +nil, NO-MAA-SAKOD-RULE is used. The maa-sakod character is alwasy +appended at the tail. + +For instance, rule `(\"(1`WM(B\" (?(1`(B t ?(1W(B ?(1M(B))' tells that this vowel +string following a consonant `(1!(B' should be re-ordered as \"(1`!WM(B\". + +Optional (MAA-SAKOD-n RULE-n) are rules specially applied to maa-sakod +character MAA-SAKOD-n.") + +;;;###autoload +(defun lao-transcribe-single-roman-syllable-to-lao (from to &optional str) + "Transcribe a Romanized Lao syllable in the region FROM and TO to Lao string. +Only the first syllable is transcribed. +The value has the form: (START END LAO-STRING), where +START and END are the beggining and end positions of the Roman Lao syllable, +LAO-STRING is the Lao character transcription of it. + +Optional 3rd arg STR, if non-nil, is a string to search for Roman Lao +syllable. In that case, FROM and TO are indexes to STR." + (if str + (if (setq from (string-match lao-transcription-pattern str from)) + (progn + (if (>= from to) + (setq from nil) + (setq to (match-end 0))))) + (save-excursion + (goto-char from) + (if (setq to (re-search-forward lao-transcription-pattern to t)) + (setq from (match-beginning 0)) + (setq from nil)))) + (if from + (let* ((consonant (match-string 1 str)) + (semivowel (match-string 3 str)) + (vowel (match-string 5 str)) + (maa-sakod (match-string 8 str)) + (tone (match-string 9 str)) + lao-consonant lao-semivowel lao-vowel lao-maa-sakod lao-tone + clen cidx) + (setq to (match-end 0)) + (if (not consonant) + (setq str (cdr (assoc (match-string 0 str) + lao-transcription-punctuation-alist))) + (setq lao-consonant + (cdr (assoc consonant lao-transcription-consonant-alist))) + (if (vectorp lao-consonant) + (setq lao-consonant (aref lao-consonant 0))) + (setq clen (length lao-consonant)) + (if semivowel + ;; Include semivowel in STR. + (setq lao-semivowel + (cdr (assoc semivowel lao-transcription-semi-vowel-alist)) + str (if (= clen 1) + (concat lao-consonant lao-semivowel) + (concat (substring lao-consonant 0 1) lao-semivowel + (substring lao-consonant 1)))) + (setq str lao-consonant)) + (if vowel + (let (rule) + (setq lao-vowel + (cdr (assoc vowel lao-transcription-vowel-alist))) + (if (vectorp lao-vowel) + (setq lao-vowel (aref lao-vowel 0))) + (setq rule (assoc lao-vowel lao-vowel-reordering-rule)) + (if (null maa-sakod) + (setq rule (nth 1 rule)) + (setq lao-maa-sakod + (cdr (assoc maa-sakod lao-transcription-maa-sakod-alist)) + rule + (or (cdr (assq (aref lao-maa-sakod 0) (nthcdr 2 rule))) + (nth 2 rule) + (nth 1 rule)))) + (or rule + (error "Lao vowel %S has no re-ordering rule" lao-vowel)) + (setq lao-consonant str str "") + (while rule + (if (= (car rule) 0) + (setq str (concat str lao-consonant) + cidx (length str)) + (setq str (concat str (list (car rule))))) + (setq rule (cdr rule))) + (or cidx + (error "Lao vowel %S has malformed re-ordering rule" vowel)) + ;; Set CIDX to after upper or lower vowel if any. + (let ((len (length str))) + (while (and (< cidx len) + (memq (get-char-code-property (aref str cidx) + 'phonetic-type) + '(vowel-lower vowel-upper))) + (setq cidx (1+ cidx)))) + (if lao-maa-sakod + (setq str (concat str lao-maa-sakod))) + (if tone + (setq lao-tone + (cdr (assoc tone lao-transcription-tone-alist)) + str (concat (substring str 0 cidx) lao-tone + (substring str cidx))))))) + (list from to (lao-compose-string str))))) + +;;;###autoload +(defun lao-transcribe-roman-to-lao-string (str) + "Transcribe Romanized Lao string STR to Lao character string." + (let ((from 0) + (to (length str)) + (lao-str "") + val) + (while (setq val (lao-transcribe-single-roman-syllable-to-lao from to str)) + (let ((start (car val)) + (end (nth 1 val)) + (lao (nth 2 val))) + (if (> start from) + (setq lao-str (concat lao-str (substring str from start) lao)) + (setq lao-str (concat lao-str lao))) + (setq from end))) + (if (< from to) + (concat lao-str (substring str from to)) + lao-str))) + +;;;###autoload +(defun lao-composition-function (from to pattern &optional string) + "Compose Lao text in the region FROM and TO. +The text matches the regular expression PATTERN. +Optional 4th argument STRING, if non-nil, is a string containing text +to compose. + +The return value is number of composed characters." + (if (< (1+ from) to) + (prog1 (- to from) + (if string + (compose-string from to) + (compose-region from to)) + (- to from)))) ;; (provide 'lao-util)