diff lisp/language/lao-util.el @ 26894:e0a13ff5901d

Mostly rewritten.
author Kenichi Handa <handa@m17n.org>
date Wed, 15 Dec 1999 00:47:53 +0000
parents 708271862495
children bdf7581eb093
line wrap: on
line diff
--- a/lisp/language/lao-util.el	Wed Dec 15 00:47:31 1999 +0000
+++ b/lisp/language/lao-util.el	Wed Dec 15 00:47:53 1999 +0000
@@ -30,6 +30,14 @@
   (interactive)
   (set-language-environment "Lao"))
 
+;; Setting information of Thai characters.
+
+(defconst lao-category-table (make-category-table))
+(define-category ?c "Lao consonant" lao-category-table)
+(define-category ?s "Lao semi-vowel" lao-category-table)
+(define-category ?v "Lao upper/lower vowel" lao-category-table)
+(define-category ?t "Lao tone" lao-category-table)
+
 (let ((l '((?(1!(B consonant "LETTER KOR  KAI'" "CHICKEN")
 	   (?(1"(B consonant "LETTER KHOR KHAI'" "EGG")
 	   (?(1#(B invalid nil)
@@ -127,11 +135,375 @@
 	   ))
       elm)
   (while l
-    (setq elm (car l))
-    (put-char-code-property (car elm) 'phonetic-type (car (cdr elm)))
-    (put-char-code-property (car elm) 'name (nth 2 elm))
-    (put-char-code-property (car elm) 'meaning (nth 3 elm))
-    (setq l (cdr l))))
+    (setq elm (car l) l (cdr l))
+    (let ((char (car elm))
+	  (ptype (nth 1 elm)))
+      (cond ((eq ptype 'consonant)
+	     (modify-category-entry char ?c lao-category-table))
+	    ((memq ptype '(vowel-upper vowel-lower))
+	     (modify-category-entry char ?v lao-category-table))
+	    ((eq ptype 'semivowel-lower)
+	     (modify-category-entry char ?s lao-category-table))
+	    ((eq ptype 'tone)
+	     (modify-category-entry char ?t lao-category-table)))
+      (put-char-code-property char 'phonetic-type ptype)
+      (put-char-code-property char 'name (nth 2 elm))
+      (put-char-code-property char 'meaning (nth 3 elm)))))
+
+;; The general composing rules are as follows:
+;;
+;;                          T
+;;       V        T         V                  T
+;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C
+;;                                   v         v
+;;                             T
+;;        V         T          V                   T
+;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CvT -> C
+;;        s         s          s         s         s
+;;                                       v         v
+
+
+;; where C: consonant, V: vowel upper, v: vowel lower,
+;;       T: tone mark, s: semivowel lower
+
+(defvar lao-composition-pattern
+  "\\cc\\(\\ct\\|\\cv\\ct?\\|\\cs\\(\\ct\\|\\cv\\ct?\\)?\\)"
+  "Regular expression matching a Lao composite sequence.")
+
+;;;###autoload
+(defun lao-compose-string (str)
+  (with-category-table lao-category-table
+   (let ((idx 0))
+     (while (setq idx (string-match lao-composition-pattern str idx))
+       (compose-string str idx (match-end 0))
+       (setq idx (match-end 0))))
+   str))
+
+;;; LRT: Lao <-> Roman Transcription
+
+;; Upper vowels and tone-marks are put on the letter.
+;; Semi-vowel-sign-lo and lower vowels are put under the letter.
+
+(defconst lao-transcription-consonant-alist
+  (sort '(;; single consonants
+	  ("k" . "(1!(B")
+	  ("kh" . "(1"(B")
+	  ("qh" . "(1$(B")
+	  ("ng" . "(1'(B")
+	  ("j" . "(1((B")
+	  ("s" . "(1J(B")
+	  ("x" . "(1*(B")
+	  ("y" . "(1-(B")
+	  ("d" . "(14(B")
+	  ("t" . "(15(B")
+	  ("th" . "(16(B")
+	  ("dh" . "(17(B")
+	  ("n" . "(19(B")
+	  ("b" . "(1:(B")
+	  ("p" . "(1;(B")
+	  ("hp" . "(1<(B")
+	  ("fh" . "(1=(B")
+	  ("ph" . "(1>(B")
+	  ("f" . "(1?(B")
+	  ("m" . "(1A(B")
+	  ("gn" . "(1B(B")
+	  ("l" . "(1E(B")
+	  ("r" . "(1C(B")
+	  ("v" . "(1G(B")
+	  ("w" . "(1G(B")
+	  ("hh" . "(1K(B")
+	  ("O" . "(1M(B")
+	  ("h" . "(1N(B")
+	  ("nh" . "(1|(B")
+	  ("mh" . "(1}(B")
+	  ("lh" . ["(1K\(B"])
+	  ;; double consonants
+	  ("ngh" . ["(1K'(B"])
+	  ("yh" . ["(1K](B"])
+	  ("wh" . ["(1KG(B"])
+	  ("hl" . ["(1KE(B"])
+	  ("hy" . ["(1K-(B"])
+	  ("hn" . ["(1K9(B"])
+	  ("hm" . ["(1KA(B"])
+	  )
+	(function (lambda (x y) (> (length (car x)) (length (car y)))))))
+
+(defconst lao-transcription-semi-vowel-alist
+  '(("r" . "(1\(B")))
+
+(defconst lao-transcription-vowel-alist
+  (sort '(("a" . "(1P(B")
+	  ("ar" . "(1R(B")
+	  ("i" . "(1T(B")
+	  ("ii" . "(1U(B")
+	  ("eu" . "(1V(B")
+	  ("ur" . "(1W(B")
+	  ("u" . "(1X(B")
+	  ("uu" . "(1Y(B")
+	  ("e" . ["(1`P(B"])
+	  ("ee" . "(1`(B")
+	  ("ae" . ["(1aP(B"])
+	  ("aa" . "(1a(B")
+	  ("o" . ["(1bP(B"])
+	  ("oo" . "(1b(B")
+	  ("oe" . ["(1`RP(B"])
+	  ("or" . "(1m(B")
+	  ("er" . ["(1`T(B"])
+	  ("ir" . ["(1`U(B"])
+	  ("ua" . ["(1[GP(B"])
+	  ("uaa" . ["(1[G(B"])
+	  ("ie" . ["(1`Q]P(B"])
+	  ("ia" . ["(1`Q](B"])
+	  ("ea" . ["(1`VM(B"])
+	  ("eaa" . ["(1`WM(B"])
+	  ("ai" . "(1d(B")
+	  ("ei" . "(1c(B")
+	  ("ao" . ["(1`[R(B"])
+	  ("aM" . "(1S(B"))
+	(function (lambda (x y) (> (length (car x)) (length (car y)))))))
+
+;; Maa-sakod is put at the tail.
+(defconst lao-transcription-maa-sakod-alist
+  '(("k" . "(1!(B")
+    ("g" . "(1'(B")
+    ("y" . "(1-(B")
+    ("d" . "(14(B")
+    ("n" . "(19(B")
+    ("b" . "(1:(B")
+    ("m" . "(1A(B")
+    ("v" . "(1G(B")
+    ("w" . "(1G(B")
+    ))
+
+(defconst lao-transcription-tone-alist
+  '(("'" . "(1h(B")
+    ("\"" . "(1i(B")
+    ("^" . "(1j(B")
+    ("+" . "(1k(B")
+    ("~" . "(1l(B")))
+
+(defconst lao-transcription-punctuation-alist
+  '(("\\0" . "(1p(B")
+    ("\\1" . "(1q(B")
+    ("\\2" . "(1r(B")
+    ("\\3" . "(1s(B")
+    ("\\4" . "(1t(B")
+    ("\\5" . "(1u(B")
+    ("\\6" . "(1v(B")
+    ("\\7" . "(1w(B")
+    ("\\8" . "(1x(B")
+    ("\\9" . "(1y(B")
+    ("\\\\" . "(1f(B")
+    ("\\$" . "(1O(B")))
+
+(defconst lao-transcription-pattern
+  (concat
+   "\\("
+   (mapconcat 'car lao-transcription-consonant-alist "\\|")
+   "\\)\\("
+   (mapconcat 'car lao-transcription-semi-vowel-alist "\\|")
+   "\\)?\\(\\("
+   (mapconcat 'car lao-transcription-vowel-alist "\\|")
+   "\\)\\("
+   (mapconcat 'car lao-transcription-maa-sakod-alist "\\|")
+   "\\)?\\("
+   (mapconcat (lambda (x) (regexp-quote (car x)))
+	      lao-transcription-tone-alist "\\|")
+   "\\)?\\)?\\|"
+   (mapconcat (lambda (x) (regexp-quote (car x)))
+	      lao-transcription-punctuation-alist "\\|")
+   )
+  "Regexp of Roman transcription pattern for one Lao syllable.")
+
+(defconst lao-transcription-pattern
+  (concat
+   "\\("
+   (regexp-opt (mapcar 'car lao-transcription-consonant-alist))
+   "\\)\\("
+   (regexp-opt (mapcar 'car lao-transcription-semi-vowel-alist))
+   "\\)?\\(\\("
+   (regexp-opt (mapcar 'car lao-transcription-vowel-alist))
+   "\\)\\("
+   (regexp-opt (mapcar 'car lao-transcription-maa-sakod-alist))
+   "\\)?\\("
+   (regexp-opt (mapcar 'car lao-transcription-tone-alist))
+   "\\)?\\)?\\|"
+   (regexp-opt (mapcar 'car lao-transcription-punctuation-alist))
+   )
+  "Regexp of Roman transcription pattern for one Lao syllable.")
+
+(defconst lao-vowel-reordering-rule
+  '(("(1P(B" (0 ?(1P(B) (0 ?(1Q(B))
+    ("(1R(B" (0 ?(1R(B))
+    ("(1T(B" (0 ?(1U(B))
+    ("(1U(B" (0 ?(1U(B))
+    ("(1V(B" (0 ?(1V(B))
+    ("(1W(B" (0 ?(1W(B))
+    ("(1X(B" (0 ?(1X(B))
+    ("(1Y(B" (0 ?(1Y(B))
+    ("(1`P(B" (?(1`(B 0 ?(1P(B) (?(1`(B 0 ?(1Q(B))
+    ("(1`(B" (?(1`(B 0))
+    ("(1aP(B" (?(1a(B 0 ?(1P(B) (?(1a(B 0 ?(1Q(B))
+    ("(1a(B" (?(1a(B 0))
+    ("(1bP(B" (?(1b(B 0 ?(1P(B) (0 ?(1[(B) (?(1-(B ?(1b(B 0 ?(1Q(B) (?(1G(B ?(1b(B 0 ?(1Q(B))
+    ("(1b(B" (?(1b(B 0))
+    ("(1`RP(B" (?(1`(B 0 ?(1R(B ?(1P(B) (0 ?(1Q(B ?(1M(B))
+    ("(1m(B" (0 ?(1m(B) (0 ?(1M(B))
+    ("(1`T(B" (?(1`(B 0 ?(1T(B))
+    ("(1`U(B" (?(1`(B 0 ?(1U(B))
+    ("(1[GP(B" (0 ?(1[(B ?(1G(B ?(1P(B) (0 ?(1Q(B ?(1G(B))
+    ("(1[G(B" (0 ?(1[(B ?(1G(B) (0 ?(1G(B))
+    ("(1`Q]P(B" (?(1`(B 0 ?(1Q(B ?(1](B ?(1P(B) (0 ?(1Q(B ?(1](B))
+    ("(1`Q](B" (?(1`(B 0 ?(1Q(B ?(1](B) (0 ?(1](B))
+    ("(1`VM(B" (?(1`(B 0 ?(1V(B ?(1M(B))
+    ("(1`WM(B" (?(1`(B 0 ?(1W(B ?(1M(B))
+    ("(1d(B" (?(1d(B 0))
+    ("(1c(B" (?(1c(B 0))
+    ("(1`[R(B" (?(1`(B 0 ?(1[(B ?(1R(B))
+    ("(1S(B" (0 ?(1S(B)))
+  "Alist of Lao vowel string vs the corresponding re-ordering rule.
+Each element has this form:
+	(VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...)
+
+VOWEL is a vowel string (e.g. \"(1`Q]P(B\").
+
+NO-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL following a
+consonant.  It is a list vowel characters or 0.  The element 0
+indicate the place to embed a consonant.
+
+Optional WITH-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL
+follwoing a consonant and preceding a maa-sakod character.  If it is
+nil, NO-MAA-SAKOD-RULE is used.  The maa-sakod character is alwasy
+appended at the tail.
+
+For instance, rule `(\"(1`WM(B\" (?(1`(B t ?(1W(B ?(1M(B))' tells that this vowel
+string following a consonant `(1!(B' should be re-ordered as \"(1`!WM(B\".
+
+Optional (MAA-SAKOD-n RULE-n) are rules specially applied to maa-sakod
+character MAA-SAKOD-n.")
+
+;;;###autoload
+(defun lao-transcribe-single-roman-syllable-to-lao (from to &optional str)
+  "Transcribe a Romanized Lao syllable in the region FROM and TO to Lao string.
+Only the first syllable is transcribed.
+The value has the form: (START END LAO-STRING), where
+START and END are the beggining and end positions of the Roman Lao syllable,
+LAO-STRING is the Lao character transcription of it.
+
+Optional 3rd arg STR, if non-nil, is a string to search for Roman Lao
+syllable.  In that case, FROM and TO are indexes to STR."
+  (if str
+      (if (setq from (string-match lao-transcription-pattern str from))
+	  (progn
+	    (if (>= from to)
+		(setq from nil)
+	      (setq to (match-end 0)))))
+    (save-excursion
+      (goto-char from)
+      (if (setq to (re-search-forward lao-transcription-pattern to t))
+	  (setq from (match-beginning 0))
+	(setq from nil))))
+  (if from
+      (let* ((consonant (match-string 1 str))
+	     (semivowel (match-string 3 str))
+	     (vowel (match-string 5 str))
+	     (maa-sakod (match-string 8 str))
+	     (tone (match-string 9 str))
+	     lao-consonant lao-semivowel lao-vowel lao-maa-sakod lao-tone
+	     clen cidx)
+	(setq to (match-end 0))
+	(if (not consonant)
+	    (setq str (cdr (assoc (match-string 0 str)
+				  lao-transcription-punctuation-alist)))
+	  (setq lao-consonant
+		(cdr (assoc consonant lao-transcription-consonant-alist)))
+	  (if (vectorp lao-consonant)
+	      (setq lao-consonant (aref lao-consonant 0)))
+	  (setq clen (length lao-consonant))
+	  (if semivowel
+	      ;; Include semivowel in STR.
+	      (setq lao-semivowel
+		    (cdr (assoc semivowel lao-transcription-semi-vowel-alist))
+		    str (if (= clen 1)
+			    (concat lao-consonant lao-semivowel)
+			  (concat (substring lao-consonant 0 1) lao-semivowel
+				  (substring lao-consonant 1))))
+	    (setq str lao-consonant))
+	  (if vowel
+	      (let (rule)
+		(setq lao-vowel
+		      (cdr (assoc vowel lao-transcription-vowel-alist)))
+		(if (vectorp lao-vowel)
+		    (setq lao-vowel (aref lao-vowel 0)))
+		(setq rule (assoc lao-vowel lao-vowel-reordering-rule))
+		(if (null maa-sakod)
+		    (setq rule (nth 1 rule))
+		  (setq lao-maa-sakod
+			(cdr (assoc maa-sakod lao-transcription-maa-sakod-alist))
+			rule
+			(or (cdr (assq (aref lao-maa-sakod 0) (nthcdr 2 rule)))
+			    (nth 2 rule)
+			    (nth 1 rule))))
+		(or rule
+		    (error "Lao vowel %S has no re-ordering rule" lao-vowel))
+		(setq lao-consonant str str "")
+		(while rule
+		  (if (= (car rule) 0)
+		      (setq str (concat str lao-consonant)
+			    cidx (length str))
+		    (setq str (concat str (list (car rule)))))
+		  (setq rule (cdr rule)))
+		(or cidx
+		    (error "Lao vowel %S has malformed re-ordering rule" vowel))
+		;; Set CIDX to after upper or lower vowel if any.
+		(let ((len (length str)))
+		  (while (and (< cidx len)
+			      (memq (get-char-code-property (aref str cidx)
+							    'phonetic-type)
+				    '(vowel-lower vowel-upper)))
+		    (setq cidx (1+ cidx))))
+		(if lao-maa-sakod
+		    (setq str (concat str lao-maa-sakod)))
+		(if tone
+		    (setq lao-tone
+			  (cdr (assoc tone lao-transcription-tone-alist))
+			  str (concat (substring str 0 cidx) lao-tone
+				      (substring str cidx)))))))
+	(list from to (lao-compose-string str)))))
+
+;;;###autoload
+(defun lao-transcribe-roman-to-lao-string (str)
+  "Transcribe Romanized Lao string STR to Lao character string."
+  (let ((from 0)
+	(to (length str))
+	(lao-str "")
+	val)
+    (while (setq val (lao-transcribe-single-roman-syllable-to-lao from to str))
+      (let ((start (car val))
+	    (end (nth 1 val))
+	    (lao (nth 2 val)))
+	(if (> start from)
+	    (setq lao-str (concat lao-str (substring str from start) lao))
+	  (setq lao-str (concat lao-str lao)))
+	(setq from end)))
+    (if (< from to)
+	(concat lao-str (substring str from to))
+      lao-str)))
+
+;;;###autoload
+(defun lao-composition-function (from to pattern &optional string)
+  "Compose Lao text in the region FROM and TO.
+The text matches the regular expression PATTERN.
+Optional 4th argument STRING, if non-nil, is a string containing text
+to compose.
+
+The return value is number of composed characters."
+  (if (< (1+ from) to)
+      (prog1 (- to from)
+	(if string
+	    (compose-string from to)
+	  (compose-region from to))
+	(- to from))))
 
 ;;
 (provide 'lao-util)