changeset 26894:e0a13ff5901d

Mostly rewritten.
author Kenichi Handa <handa@m17n.org>
date Wed, 15 Dec 1999 00:47:53 +0000
parents 78d4a8d767d5
children 5562243fdd2b
files lisp/language/devan-util.el lisp/language/lao-util.el
diffstat 2 files changed, 533 insertions(+), 183 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/language/devan-util.el	Wed Dec 15 00:47:31 1999 +0000
+++ b/lisp/language/devan-util.el	Wed Dec 15 00:47:53 1999 +0000
@@ -49,58 +49,64 @@
 ;;; Basic functions.
 
 ;;;###autoload
-(defun indian-to-devanagari (ch)
-  "Convert IS 13194 characters to Devanagari basic characters."
-  (let ((charcodes (split-char ch)))
+(defun indian-to-devanagari (char)
+  "Convert IS 13194 character CHAR to Devanagari basic characters.
+If CHAR is not IS 13194, return CHAR as is."
+  (let ((charcodes (split-char char)))
     (if (eq (car charcodes) 'indian-is13194)
 	(make-char 'indian-2-column ?\x21 (nth 1 charcodes))
-      ch)))
+      char)))
 
 ;;;###autoload
-(defun devanagari-to-indian (ch)
-  "Convert Devanagari basic characters to IS 13194 characters."
-  (let* ((charcodes (split-char ch))
-	 (charset (car charcodes))
-	 (code-h  (car (cdr charcodes))))
+(defun devanagari-to-indian (char)
+  "Convert Devanagari basic character CHAR to IS 13194 characters.
+If CHAR is not Devanagari basic character, return CHAR as is."
+  (let ((charcodes (split-char char)))
     (if (and (eq (car charcodes) 'indian-2-column)
 	     (= (nth 1 charcodes) ?\x21))
 	(make-char 'indian-is13194 (nth 2 charcodes))
-      ch)))
+      char)))
 
 ;;;###autoload
 (defun indian-to-devanagari-region (from to)
-  "Convert IS 13194 characters in region to Devanagari basic characters."
+  "Convert IS 13194 characters in region to Devanagari basic characters.
+When called from a program, expects two arguments,
+positions (integers or markers) specifying the region."
   (interactive "r")
-  (save-restriction 
-    (narrow-to-region from to)
-    (goto-char (point-min))
-;   (while (re-search-forward "\\cd" nil t)
-    (while (re-search-forward "." nil t)
-      (let* ((devanagari-char (indian-to-devanagari (preceding-char))))
-	(delete-char -1)
-	(insert devanagari-char)))))
+  (save-excursion
+    (goto-char from)
+    (while (< (point) to)
+      (let ((char (following-char)))
+	(if (eq (char-charset char) 'indian-is13194)
+	    (progn
+	      (delete-char 1)
+	      (insert (indian-to-devanagari char)))
+	  (forward-char 1))))))
 
 ;;;###autoload
 (defun devanagari-to-indian-region (from to)
-  "Convert Devanagari basic characters in region to Indian characters."
+  "Convert Devanagari basic characters in region to Indian characters.
+When called from a program, expects two arguments,
+positions (integers or markers) specifying the region."
   (interactive "r")
-  (save-restriction
-    (narrow-to-region from to)
-    (goto-char (point-min))
-;   (while (re-search-forward "\\cD" nil t) ; Devanagari Character Code.
-    (while (re-search-forward "." nil t) 
-      (let* ((indian-char (devanagari-to-indian (preceding-char))))
-	(delete-char -1)
-	(insert indian-char)))))
+  (save-excursion
+    (goto-char from)
+    (while (< (point) to)
+      (let ((char (following-char)))
+	(if (eq (char-charset char) 'indian-2-column)
+	    (progn
+	      (delete-char -1)
+	      (insert (devanagari-to-indian char)))
+	  (forward-char 1))))))
 
 ;;;###autoload
-(defun indian-to-devanagari-string (str)
-  "Convert Indian String to Devanagari Basic Character String."
-  (let* ((len (length str))
+(defun indian-to-devanagari-string (string)
+  "Convert Indian characters in STRING to Devanagari Basic characters."
+  (let* ((len (length string))
 	 (i 0)
 	 (vec (make-vector len 0)))
     (while (< i len)
-      (aset vec i (indian-to-devanagari (aref str i)))
+      (aset vec i (indian-to-devanagari (aref string i)))
       (setq i (1+ i)))
     (concat vec)))
 
@@ -256,12 +262,12 @@
 ;; Finally, convert 2-column glyphs to 1-column glyph
 ;; if such a glyph exist.
 ;;
-;; => $(6![(B (ml.mr) $(6!X(B / $(6!D(B (ml.mr) $(6"F(B (mr ml) $(6!\(B
+;; => $(6!X(B (ml.mr) $(6![(B / $(6!D(B (ml.mr) $(6"F(B (mr ml) $(6!\(B
 ;;
 ;; Compose the glyph.
 ;;
-;; => 2$(6!X@![(B1/2$(6!D@"FP!\(B1
-;; => 2$(6!X@![(B12$(6!D@"FP!\(B1
+;; => 4$(6!Xt%![0!X![1(B/4$(6!Dt%"Fv#!\0!D"F!\1(B
+;; => 4$(6!Xt%![0!X![14!Dt%"Fv#!\0!D"F!\1(B
 ;;
 
 ;;
@@ -269,7 +275,7 @@
 ;; 
 ;;
 ;; IMPORTANT:  
-;;        There may be many rules which you many want to be suppressed.
+;;        There may be many rules that you many want to suppress.
 ;;        In that case, please comment out that rule.
 ;;
 ;;        RULES WILL BE EVALUATED FROM FIRST TO LAST.
@@ -277,7 +283,7 @@
 ;;
 ;; TO DO: 
 ;;        Prepare multiple specific list of rules for each languages
-;;        which adopts Devanagari script.
+;;        that adopt Devanagari script.
 ;;
 
 (defconst devanagari-char-to-glyph-rules
@@ -558,20 +564,18 @@
 ;; glyphs-to-characters conversion.
 ;;
 
-(defun max-match-len (regexp-str)
-  "Return the possible length of matched string of given regexp.
-Only [...] pattern of regexp is recognized.
-The last character of inside of [....] is used for its length."
-  (let ((dest-str regexp-str))
-    (while (string-match "\\[\\([^\]]\\)+\\]" dest-str)
-      (setq dest-str 
-	    (concat (substring dest-str 0 (match-beginning 0))
-		    (substring dest-str (match-beginning 1) (match-end 1))
-		    (substring dest-str (match-end 0)))))
-    (length dest-str)))
+(defun max-match-len (regexp)
+  "Return the maximum length of text that can match the pattern REGEXP.
+Only [...] pattern of regexp is recognized."
+  (let ((len 0)
+	(index 0))
+    (while (string-match "\\[\\([^\]]\\)+\\]" regexp index)
+      (setq len (+ len (- (match-beginning 0) index) 1)
+	    index (match-end 0)))
+    len))
 
-;; Return t iff LIST1 and LIST2 has a same member.
-(defun rule-intersection (list1 list2)
+;; Return t iff at least one member appears in both LIST1 and LIST2.
+(defun intersecting-p (list1 list2)
   (let ((found nil))
     (while (and list1 (not found))
       (if (memq (car list1) list2)
@@ -579,28 +583,32 @@
 	(setq list1 (cdr list1))))
     found))
 
-(defun string-conversion-by-rule (src-str symbol &rest specs)
-  "Convert string SRC-STR to a new string according to
-the rules described in the each character's SYMBOL property.  The
-rules are described in the forms of '((regexp str <specs>) ...), and
-the character sequence in the string which matches to 'regexp' are
-replaced with str.  If SPECS are not specified, only rules with no
-<specs> would be applied.  If SPECS are specified, then rules with no
-<specs> specified and rules with <spec> matches with SPECS would be
-applied.  Rules are tested in the order of the list, thus more
-specific rules should be placed in front of less important rules.  No
-composite character is supported, thus such must be converted by
-decompose-char before applying to this function.  If rule is given in
-the forms of regexp '...\\(...\\)...', then inside the parenthesis is
-the subject of the match.  Otherwise, the entire expression is the
-subject of the match."
+(defun string-conversion-by-rule (source symbol &rest specs)
+  "Convert string SOURCE by rules stored in SYMBOL property of each character.
+The remaining arguments forms a list SPECS that restricts applicable rules.
+
+The rules has the form ((REGEXP STR RULE-SPEC ...) ...).
+Each character sequence in STRING that matches REGEXP is
+replaced by STR.
+
+If SPECS is nil, only rules with no RULE-SPECs is applied.  Otherwise
+rules with no RULE-SPECS and rules that have at least one member of
+SPECS in RULE-SPECs is applied.
+
+Rules are tested in the order of the list, thus more specific rules
+should be placed in front of less specific rules.
+
+If rule is given in the forms of regexp '...\\(...\\)...', a character
+sequence that matches the pattern inside of the parenthesis is the
+subject of the match.  Otherwise, the entire expression is the subject
+of the match."
   (let ((pos 0) 
 	(dst-str ""))
-    (while (< pos (length src-str))
+    (while (< pos (length source))
       (let ((found nil)
 	    (rules (get-char-code-property 
 		    (string-to-char 
-		     (substring src-str pos)) symbol)))
+		     (substring source pos)) symbol)))
 	(while rules
 	  (let* ((rule (car rules))
 		 (regexp (car rule))
@@ -608,7 +616,7 @@
 		 (rule-specs (cdr (cdr rule)))
 		 search-pos)
 	    (if (not (or (null rule-specs)
-			 (rule-intersection specs rule-specs)))
+			 (intersecting-p specs rule-specs)))
 		(setq rules (cdr rules))
 	      (if (null (string-match "\\\\(.+\\\\)" regexp))
 		  (progn
@@ -619,7 +627,7 @@
 						    (string-match "^[^\\\\]*" regexp)
 						    (match-end 0))))))
 	      (if (< search-pos 0) (setq search-pos 0))
-	      (if (string-match regexp src-str search-pos)
+	      (if (string-match regexp source search-pos)
 		  (if (= (match-beginning 1) pos)
 		      (progn
 			(setq dst-str (concat dst-str replace-str))
@@ -631,7 +639,7 @@
 		(setq rules (cdr rules))))))
 	;; proceed to next position
 	(if (not found)
-	    (setq dst-str (concat dst-str (substring src-str pos (1+ pos)))
+	    (setq dst-str (concat dst-str (substring source pos (1+ pos)))
 		  pos (1+ pos)))))
     dst-str))
 
@@ -641,12 +649,12 @@
 ;;
 
 ;;;###autoload
-(defun char-to-glyph-devanagari (src-str &rest langs)
-  "Convert Devanagari characters in the string to Devanagari glyphs.  
+(defun char-to-glyph-devanagari (string &rest langs)
+  "Convert Devanagari characters in STRING to Devanagari glyphs.  
 Ligatures and special rules are processed."
   (apply 
    'string-conversion-by-rule 
-   (append (list src-str 'char-to-glyph) langs)))
+   (append (list string 'char-to-glyph) langs)))
 
 ;; Example:
 ;;(char-to-glyph-devanagari "$(5!X![!F!h!D!\(B") => "$(5!X!["F!D!\(B"
@@ -656,7 +664,7 @@
 ;; Phase 2: Compose Glyphs to form One Glyph.
 ;;
 
-;; Each list consist of glyph, application-priority and application-direction.
+;; Each list consists of glyph, application-priority and application-direction.
 ;;
 ;; Glyphs will be ordered from low priority number to high priority number.
 ;; If application-priority is omitted, it is assumed to be 0.
@@ -1044,21 +1052,22 @@
 ;; Determine composition priority and rule of the array of Glyphs.
 ;; Sort the glyphs with their priority.
 
-(defun devanagari-reorder-glyphs-for-composition (glyph-alist)
-  (let* ((pos 0)
-	 (ordered-glyphs '()))
-    (while (< pos (length glyph-alist))
-      (let* ((glyph (aref glyph-alist pos)))
+(defun devanagari-reorder-glyphs-for-composition (string start end)
+  (let ((pos start)
+	(ordered-glyphs nil))
+    (while (< pos end)
+      (let ((glyph (aref string pos)))
 	(setq pos (1+ pos))
 	(setq ordered-glyphs 
-	      (append ordered-glyphs (list (assq glyph devanagari-composition-rules))))))
+	      (append ordered-glyphs
+		      (list (assq glyph devanagari-composition-rules))))))
     (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y)))))))
 
-;;(devanagari-compose-to-one-glyph "$(5"5!X![(B") => "2$(6!XP"5@![(B1"
+! ;;(devanagari-compose-to-one-glyph "$(5"5!X![(B") => "4$(6!Xv#"5t%![0!X"5![1(B"
 
 (defun devanagari-compose-to-one-glyph (devanagari-string)
   (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition
-			(string-to-vector devanagari-string)))
+			devanagari-string 0 (length devanagari-string)))
 	 ;; List of glyphs to be composed.
 	 (cmp-glyph-list (list (car (car o-glyph-list)))) 
 	 (o-glyph-list (cdr o-glyph-list)))
@@ -1077,11 +1086,31 @@
     (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list))
       (apply 'compose-chars cmp-glyph-list))))
 
+(defun devanagari-composition-component (string &optional start end)
+  (or start (setq start 0))
+  (or end (setq end (length string)))
+  (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition
+			string start end))
+	 ;; List of glyphs to be composed.
+	 (cmp-glyph-list (list (car (car o-glyph-list)))))
+    (setq o-glyph-list (cdr o-glyph-list))
+    (while o-glyph-list
+      (let* ((o-glyph (car o-glyph-list))
+	     (glyph (if (< 2 (length o-glyph))
+			;; default composition
+			(list (car (cdr (cdr o-glyph))) (car o-glyph))
+		      ;; composition with a specified rule
+		      (list '(mr . ml) (car o-glyph)))))
+	(setq o-glyph-list (cdr o-glyph-list))
+	(setq cmp-glyph-list (append cmp-glyph-list glyph))))
+    ;; Convert glyphs to 1-column width if possible.
+    (devanagari-wide-to-narrow cmp-glyph-list)))
+
 ;; Utility function for Phase 2.5
-;; Check whether given glyph is a Devanagari vertical modifier or not.
+
+;; Check whether GLYPH is a Devanagari vertical modifier or not.
 ;; If it is a vertical modifier, whether it should be 1-column shape or not
 ;; depends on previous non-vertical modifier.
-   ; return nil if it is not vertical modifier.
 (defun devanagari-vertical-modifier-p (glyph)
   (string-match (char-to-string glyph)
 		"[$(5!"!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M(B]"))
@@ -1092,12 +1121,13 @@
 		"[$(5![(B]"))
 
 (defun devanagari-wide-to-narrow-char (char)
-  "Return the corresponding narrow character if it exists."
+  "Convert Devanagari character CHAR to the corresponding narrow character.
+If there's no corresponding narrow character, return CHAR as is."
   (let ((narrow (cdr (assq char devanagari-1-column-char))))
-    (if narrow narrow char)))
+    (or narrow char)))
 
 ;;
-;;    Phase 2.5  Convert Appropriate Character to 1-column shape.
+;;    Phase 2.5  Convert appropriate character to 1-column shape.
 ;;
 ;; This is temporary and should be removed out when Emacs supports 
 ;; variable width characters.
@@ -1121,7 +1151,8 @@
     (cond ((null src-list) '())
 	  ; not glyph code
 	  ((not (numberp glyph)) 
-	   (cons glyph (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))
+	   (cons glyph
+		 (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))
 	  ; glyphs to be processed regardless of the value of "2-col-glyph"
 	  ((devanagari-non-vertical-modifier-p glyph)
 	   (cons (devanagari-wide-to-narrow-char glyph)
@@ -1132,7 +1163,8 @@
 	       (cons glyph
 		     (devanagari-wide-to-narrow-iter (cdr src-list) t))
 	       (cons (devanagari-wide-to-narrow-char glyph)
-		     (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))))
+		     (devanagari-wide-to-narrow-iter (cdr src-list)
+						     2-col-glyph))))
 	  ; normal glyph
 	  (t
 	   (if (cdr (assq glyph devanagari-1-column-char))
@@ -1147,65 +1179,18 @@
 ;; 
 
 ;;
-;; Decomposition of composite font.
+;; Decomposition of composite sequence.
 ;;
 
-(defun devanagari-normalize-narrow-glyph (charlist)
-  (let ((wide-char (car (rassoc (car charlist) devanagari-1-column-char))))
-    (if (null charlist) nil
-      (cons (if (null wide-char) (car charlist) wide-char)
-	    (devanagari-normalize-narrow-glyph (cdr charlist))))))
-
-(defvar devanagari-decomposition-rules
-  '(
-    (?$(5"p(B -10)
-    )
-  )
-
-(defun devanagari-reorder-glyphs-for-decomposition (glyphlist)
-  "This function re-orders glyph list for decomposition."
-  (sort glyphlist 
-	'(lambda (x y) 
-	   (let ((xx (nth 1 (assoc x devanagari-decomposition-rules)))
-		 (yy (nth 1 (assoc y devanagari-decomposition-rules))))
-	     (if (null xx) (setq xx 0))
-	     (if (null yy) (setq yy 0))
-	     (< xx yy)))))
-
-(defun devanagari-decompose-char (glyph)
-  "This function decomposes one Devanagari composite glyph to 
-   basic Devanagari characters as a string."
-  (let ((glyphlist 
-	 (if (eq (car (split-char glyph)) 'composition) 
-	     (string-to-list (decompose-composite-char glyph))
-	   (list glyph))))
-    (setq glyphlist (devanagari-normalize-narrow-glyph glyphlist))
-    (setq glyphlist (devanagari-reorder-glyphs-for-decomposition glyphlist))
-    (string-conversion-by-rule 
-     (mapconcat 'char-to-string glyphlist "") 'glyph-to-char)))
-
 ;;;###autoload
 (defun devanagari-decompose-string (str)
-  "Decompose Devanagari glyph string STR to basic Devanagari character string."
-  (let ((len (length str))
-	(i 0)
-	(dst ""))
-    (while (< i len)
-      (setq dst (concat dst (devanagari-decompose-char (aref str i)))
-	    i (1+ i)))
-    dst))
+  "Decompose Devanagari string STR"
+  (decompose-string (copy-sequence str)))
 
 ;;;###autoload
 (defun devanagari-decompose-region (from to)
   (interactive "r")
-  (save-restriction
-    (narrow-to-region from to)
-    (goto-char (point-min))
-    (while (re-search-forward "." nil t)
-      (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
-	     (decmps (devanagari-decompose-string (buffer-substring match-b match-e))))
-	(delete-char -1)
-	(insert decmps)))))
+  (decompose-region from to))
 
 ;;;
 ;;; Composition
@@ -1213,37 +1198,34 @@
 
 ;;;###autoload
 (defun devanagari-compose-string (str &rest langs)
-  (let ((len (length str))
-	(src (devanagari-decompose-string str)) (dst "") rest match-b match-e)
-    (while (string-match devanagari-composite-glyph-unit src)
-      (setq match-b (match-beginning 0) match-e (match-end 0))
-      (setq dst 
-	    (concat dst 
-		    (substring src 0 match-b)
-		    (devanagari-compose-to-one-glyph 
-		     (apply 
-		      'char-to-glyph-devanagari
-		      (cons (substring src match-b match-e)
-			    langs)))))
-      (setq src (substring src match-e)))
-    (setq dst (concat dst src))
-    dst))
+  (setq str (copy-sequence str))
+  (let ((idx 0)
+	rest match-b match-e)
+    (while (string-match devanagari-composite-glyph-unit str idx)
+      (let* ((match-b (match-beginning 0))
+	     (match-e (match-end 0))
+	     (cmps (devanagari-composition-component
+		    (apply 
+		     'char-to-glyph-devanagari
+		     (cons (substring str match-b match-e) langs)))))
+	(compose-string str match-b match-e cmps)
+	(setq idx match-e))))
+  str)
 
 ;;;###autoload
 (defun devanagari-compose-region (from to &rest langs)
   (interactive "r")
-  (save-restriction
-    (narrow-to-region from to)
-    (goto-char (point-min))
-    (while (re-search-forward devanagari-composite-glyph-unit nil t)
-      (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
-	     (cmps (devanagari-compose-to-one-glyph
-		    (apply 
-		     'char-to-glyph-devanagari
-		     (cons (buffer-substring match-b match-e)
-			   langs)))))
-	(delete-region match-b match-e)
-	(insert cmps)))))
+  (save-excursion
+    (save-restriction
+      (narrow-to-region from to)
+      (goto-char (point-min))
+      (while (re-search-forward devanagari-composite-glyph-unit nil t)
+	(let* ((match-b (match-beginning 0)) (match-e (match-end 0))
+	       (cmps (devanagari-composition-component
+		      (apply 
+		       'char-to-glyph-devanagari
+		       (cons (buffer-substring match-b match-e) langs)))))
+	  (compose-region match-b match-e cmps))))))
 
 ;; For pre-write and post-read conversion
 
@@ -1260,20 +1242,18 @@
 
 ;;;###autoload
 (defun in-is13194-devanagari-post-read-conversion (len)
-  (let ((pos (point))
-	(buffer-modified-p (buffer-modified-p)))
-    (prog1
-	(devanagari-compose-from-is13194-region pos (+ pos len))
-      (set-buffer-modified-p buffer-modified-p))))
+  (let ((pos (point)))
+    (devanagari-compose-from-is13194-region pos (+ pos len))))
 
 ;;;###autoload
 (defun devanagari-decompose-to-is13194-region (from to)
   "Decompose Devanagari characters in the region to IS 13194 characters."
   (interactive "r")
-  (save-restriction
-    (narrow-to-region from to)
-    (devanagari-decompose-region (point-min) (point-max))
-    (devanagari-to-indian-region (point-min) (point-max))))
+  (save-excursion
+    (save-restriction
+      (narrow-to-region from to)
+      (devanagari-decompose-region (point-min) (point-max))
+      (devanagari-to-indian-region (point-min) (point-max)))))
 
 ;;;###autoload
 (defun in-is13194-devanagari-pre-write-conversion (from to)
@@ -1304,8 +1284,6 @@
     (indian-decode-itrans-region (point-min) (point-max))
     (devanagari-compose-from-is13194-region (point-min) (point-max))))
 
-;; Test comment.
-
 ;;
 (provide 'devan-util)
 
--- a/lisp/language/lao-util.el	Wed Dec 15 00:47:31 1999 +0000
+++ b/lisp/language/lao-util.el	Wed Dec 15 00:47:53 1999 +0000
@@ -30,6 +30,14 @@
   (interactive)
   (set-language-environment "Lao"))
 
+;; Setting information of Thai characters.
+
+(defconst lao-category-table (make-category-table))
+(define-category ?c "Lao consonant" lao-category-table)
+(define-category ?s "Lao semi-vowel" lao-category-table)
+(define-category ?v "Lao upper/lower vowel" lao-category-table)
+(define-category ?t "Lao tone" lao-category-table)
+
 (let ((l '((?(1!(B consonant "LETTER KOR  KAI'" "CHICKEN")
 	   (?(1"(B consonant "LETTER KHOR KHAI'" "EGG")
 	   (?(1#(B invalid nil)
@@ -127,11 +135,375 @@
 	   ))
       elm)
   (while l
-    (setq elm (car l))
-    (put-char-code-property (car elm) 'phonetic-type (car (cdr elm)))
-    (put-char-code-property (car elm) 'name (nth 2 elm))
-    (put-char-code-property (car elm) 'meaning (nth 3 elm))
-    (setq l (cdr l))))
+    (setq elm (car l) l (cdr l))
+    (let ((char (car elm))
+	  (ptype (nth 1 elm)))
+      (cond ((eq ptype 'consonant)
+	     (modify-category-entry char ?c lao-category-table))
+	    ((memq ptype '(vowel-upper vowel-lower))
+	     (modify-category-entry char ?v lao-category-table))
+	    ((eq ptype 'semivowel-lower)
+	     (modify-category-entry char ?s lao-category-table))
+	    ((eq ptype 'tone)
+	     (modify-category-entry char ?t lao-category-table)))
+      (put-char-code-property char 'phonetic-type ptype)
+      (put-char-code-property char 'name (nth 2 elm))
+      (put-char-code-property char 'meaning (nth 3 elm)))))
+
+;; The general composing rules are as follows:
+;;
+;;                          T
+;;       V        T         V                  T
+;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C
+;;                                   v         v
+;;                             T
+;;        V         T          V                   T
+;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CvT -> C
+;;        s         s          s         s         s
+;;                                       v         v
+
+
+;; where C: consonant, V: vowel upper, v: vowel lower,
+;;       T: tone mark, s: semivowel lower
+
+(defvar lao-composition-pattern
+  "\\cc\\(\\ct\\|\\cv\\ct?\\|\\cs\\(\\ct\\|\\cv\\ct?\\)?\\)"
+  "Regular expression matching a Lao composite sequence.")
+
+;;;###autoload
+(defun lao-compose-string (str)
+  (with-category-table lao-category-table
+   (let ((idx 0))
+     (while (setq idx (string-match lao-composition-pattern str idx))
+       (compose-string str idx (match-end 0))
+       (setq idx (match-end 0))))
+   str))
+
+;;; LRT: Lao <-> Roman Transcription
+
+;; Upper vowels and tone-marks are put on the letter.
+;; Semi-vowel-sign-lo and lower vowels are put under the letter.
+
+(defconst lao-transcription-consonant-alist
+  (sort '(;; single consonants
+	  ("k" . "(1!(B")
+	  ("kh" . "(1"(B")
+	  ("qh" . "(1$(B")
+	  ("ng" . "(1'(B")
+	  ("j" . "(1((B")
+	  ("s" . "(1J(B")
+	  ("x" . "(1*(B")
+	  ("y" . "(1-(B")
+	  ("d" . "(14(B")
+	  ("t" . "(15(B")
+	  ("th" . "(16(B")
+	  ("dh" . "(17(B")
+	  ("n" . "(19(B")
+	  ("b" . "(1:(B")
+	  ("p" . "(1;(B")
+	  ("hp" . "(1<(B")
+	  ("fh" . "(1=(B")
+	  ("ph" . "(1>(B")
+	  ("f" . "(1?(B")
+	  ("m" . "(1A(B")
+	  ("gn" . "(1B(B")
+	  ("l" . "(1E(B")
+	  ("r" . "(1C(B")
+	  ("v" . "(1G(B")
+	  ("w" . "(1G(B")
+	  ("hh" . "(1K(B")
+	  ("O" . "(1M(B")
+	  ("h" . "(1N(B")
+	  ("nh" . "(1|(B")
+	  ("mh" . "(1}(B")
+	  ("lh" . ["(1K\(B"])
+	  ;; double consonants
+	  ("ngh" . ["(1K'(B"])
+	  ("yh" . ["(1K](B"])
+	  ("wh" . ["(1KG(B"])
+	  ("hl" . ["(1KE(B"])
+	  ("hy" . ["(1K-(B"])
+	  ("hn" . ["(1K9(B"])
+	  ("hm" . ["(1KA(B"])
+	  )
+	(function (lambda (x y) (> (length (car x)) (length (car y)))))))
+
+(defconst lao-transcription-semi-vowel-alist
+  '(("r" . "(1\(B")))
+
+(defconst lao-transcription-vowel-alist
+  (sort '(("a" . "(1P(B")
+	  ("ar" . "(1R(B")
+	  ("i" . "(1T(B")
+	  ("ii" . "(1U(B")
+	  ("eu" . "(1V(B")
+	  ("ur" . "(1W(B")
+	  ("u" . "(1X(B")
+	  ("uu" . "(1Y(B")
+	  ("e" . ["(1`P(B"])
+	  ("ee" . "(1`(B")
+	  ("ae" . ["(1aP(B"])
+	  ("aa" . "(1a(B")
+	  ("o" . ["(1bP(B"])
+	  ("oo" . "(1b(B")
+	  ("oe" . ["(1`RP(B"])
+	  ("or" . "(1m(B")
+	  ("er" . ["(1`T(B"])
+	  ("ir" . ["(1`U(B"])
+	  ("ua" . ["(1[GP(B"])
+	  ("uaa" . ["(1[G(B"])
+	  ("ie" . ["(1`Q]P(B"])
+	  ("ia" . ["(1`Q](B"])
+	  ("ea" . ["(1`VM(B"])
+	  ("eaa" . ["(1`WM(B"])
+	  ("ai" . "(1d(B")
+	  ("ei" . "(1c(B")
+	  ("ao" . ["(1`[R(B"])
+	  ("aM" . "(1S(B"))
+	(function (lambda (x y) (> (length (car x)) (length (car y)))))))
+
+;; Maa-sakod is put at the tail.
+(defconst lao-transcription-maa-sakod-alist
+  '(("k" . "(1!(B")
+    ("g" . "(1'(B")
+    ("y" . "(1-(B")
+    ("d" . "(14(B")
+    ("n" . "(19(B")
+    ("b" . "(1:(B")
+    ("m" . "(1A(B")
+    ("v" . "(1G(B")
+    ("w" . "(1G(B")
+    ))
+
+(defconst lao-transcription-tone-alist
+  '(("'" . "(1h(B")
+    ("\"" . "(1i(B")
+    ("^" . "(1j(B")
+    ("+" . "(1k(B")
+    ("~" . "(1l(B")))
+
+(defconst lao-transcription-punctuation-alist
+  '(("\\0" . "(1p(B")
+    ("\\1" . "(1q(B")
+    ("\\2" . "(1r(B")
+    ("\\3" . "(1s(B")
+    ("\\4" . "(1t(B")
+    ("\\5" . "(1u(B")
+    ("\\6" . "(1v(B")
+    ("\\7" . "(1w(B")
+    ("\\8" . "(1x(B")
+    ("\\9" . "(1y(B")
+    ("\\\\" . "(1f(B")
+    ("\\$" . "(1O(B")))
+
+(defconst lao-transcription-pattern
+  (concat
+   "\\("
+   (mapconcat 'car lao-transcription-consonant-alist "\\|")
+   "\\)\\("
+   (mapconcat 'car lao-transcription-semi-vowel-alist "\\|")
+   "\\)?\\(\\("
+   (mapconcat 'car lao-transcription-vowel-alist "\\|")
+   "\\)\\("
+   (mapconcat 'car lao-transcription-maa-sakod-alist "\\|")
+   "\\)?\\("
+   (mapconcat (lambda (x) (regexp-quote (car x)))
+	      lao-transcription-tone-alist "\\|")
+   "\\)?\\)?\\|"
+   (mapconcat (lambda (x) (regexp-quote (car x)))
+	      lao-transcription-punctuation-alist "\\|")
+   )
+  "Regexp of Roman transcription pattern for one Lao syllable.")
+
+(defconst lao-transcription-pattern
+  (concat
+   "\\("
+   (regexp-opt (mapcar 'car lao-transcription-consonant-alist))
+   "\\)\\("
+   (regexp-opt (mapcar 'car lao-transcription-semi-vowel-alist))
+   "\\)?\\(\\("
+   (regexp-opt (mapcar 'car lao-transcription-vowel-alist))
+   "\\)\\("
+   (regexp-opt (mapcar 'car lao-transcription-maa-sakod-alist))
+   "\\)?\\("
+   (regexp-opt (mapcar 'car lao-transcription-tone-alist))
+   "\\)?\\)?\\|"
+   (regexp-opt (mapcar 'car lao-transcription-punctuation-alist))
+   )
+  "Regexp of Roman transcription pattern for one Lao syllable.")
+
+(defconst lao-vowel-reordering-rule
+  '(("(1P(B" (0 ?(1P(B) (0 ?(1Q(B))
+    ("(1R(B" (0 ?(1R(B))
+    ("(1T(B" (0 ?(1U(B))
+    ("(1U(B" (0 ?(1U(B))
+    ("(1V(B" (0 ?(1V(B))
+    ("(1W(B" (0 ?(1W(B))
+    ("(1X(B" (0 ?(1X(B))
+    ("(1Y(B" (0 ?(1Y(B))
+    ("(1`P(B" (?(1`(B 0 ?(1P(B) (?(1`(B 0 ?(1Q(B))
+    ("(1`(B" (?(1`(B 0))
+    ("(1aP(B" (?(1a(B 0 ?(1P(B) (?(1a(B 0 ?(1Q(B))
+    ("(1a(B" (?(1a(B 0))
+    ("(1bP(B" (?(1b(B 0 ?(1P(B) (0 ?(1[(B) (?(1-(B ?(1b(B 0 ?(1Q(B) (?(1G(B ?(1b(B 0 ?(1Q(B))
+    ("(1b(B" (?(1b(B 0))
+    ("(1`RP(B" (?(1`(B 0 ?(1R(B ?(1P(B) (0 ?(1Q(B ?(1M(B))
+    ("(1m(B" (0 ?(1m(B) (0 ?(1M(B))
+    ("(1`T(B" (?(1`(B 0 ?(1T(B))
+    ("(1`U(B" (?(1`(B 0 ?(1U(B))
+    ("(1[GP(B" (0 ?(1[(B ?(1G(B ?(1P(B) (0 ?(1Q(B ?(1G(B))
+    ("(1[G(B" (0 ?(1[(B ?(1G(B) (0 ?(1G(B))
+    ("(1`Q]P(B" (?(1`(B 0 ?(1Q(B ?(1](B ?(1P(B) (0 ?(1Q(B ?(1](B))
+    ("(1`Q](B" (?(1`(B 0 ?(1Q(B ?(1](B) (0 ?(1](B))
+    ("(1`VM(B" (?(1`(B 0 ?(1V(B ?(1M(B))
+    ("(1`WM(B" (?(1`(B 0 ?(1W(B ?(1M(B))
+    ("(1d(B" (?(1d(B 0))
+    ("(1c(B" (?(1c(B 0))
+    ("(1`[R(B" (?(1`(B 0 ?(1[(B ?(1R(B))
+    ("(1S(B" (0 ?(1S(B)))
+  "Alist of Lao vowel string vs the corresponding re-ordering rule.
+Each element has this form:
+	(VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...)
+
+VOWEL is a vowel string (e.g. \"(1`Q]P(B\").
+
+NO-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL following a
+consonant.  It is a list vowel characters or 0.  The element 0
+indicate the place to embed a consonant.
+
+Optional WITH-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL
+follwoing a consonant and preceding a maa-sakod character.  If it is
+nil, NO-MAA-SAKOD-RULE is used.  The maa-sakod character is alwasy
+appended at the tail.
+
+For instance, rule `(\"(1`WM(B\" (?(1`(B t ?(1W(B ?(1M(B))' tells that this vowel
+string following a consonant `(1!(B' should be re-ordered as \"(1`!WM(B\".
+
+Optional (MAA-SAKOD-n RULE-n) are rules specially applied to maa-sakod
+character MAA-SAKOD-n.")
+
+;;;###autoload
+(defun lao-transcribe-single-roman-syllable-to-lao (from to &optional str)
+  "Transcribe a Romanized Lao syllable in the region FROM and TO to Lao string.
+Only the first syllable is transcribed.
+The value has the form: (START END LAO-STRING), where
+START and END are the beggining and end positions of the Roman Lao syllable,
+LAO-STRING is the Lao character transcription of it.
+
+Optional 3rd arg STR, if non-nil, is a string to search for Roman Lao
+syllable.  In that case, FROM and TO are indexes to STR."
+  (if str
+      (if (setq from (string-match lao-transcription-pattern str from))
+	  (progn
+	    (if (>= from to)
+		(setq from nil)
+	      (setq to (match-end 0)))))
+    (save-excursion
+      (goto-char from)
+      (if (setq to (re-search-forward lao-transcription-pattern to t))
+	  (setq from (match-beginning 0))
+	(setq from nil))))
+  (if from
+      (let* ((consonant (match-string 1 str))
+	     (semivowel (match-string 3 str))
+	     (vowel (match-string 5 str))
+	     (maa-sakod (match-string 8 str))
+	     (tone (match-string 9 str))
+	     lao-consonant lao-semivowel lao-vowel lao-maa-sakod lao-tone
+	     clen cidx)
+	(setq to (match-end 0))
+	(if (not consonant)
+	    (setq str (cdr (assoc (match-string 0 str)
+				  lao-transcription-punctuation-alist)))
+	  (setq lao-consonant
+		(cdr (assoc consonant lao-transcription-consonant-alist)))
+	  (if (vectorp lao-consonant)
+	      (setq lao-consonant (aref lao-consonant 0)))
+	  (setq clen (length lao-consonant))
+	  (if semivowel
+	      ;; Include semivowel in STR.
+	      (setq lao-semivowel
+		    (cdr (assoc semivowel lao-transcription-semi-vowel-alist))
+		    str (if (= clen 1)
+			    (concat lao-consonant lao-semivowel)
+			  (concat (substring lao-consonant 0 1) lao-semivowel
+				  (substring lao-consonant 1))))
+	    (setq str lao-consonant))
+	  (if vowel
+	      (let (rule)
+		(setq lao-vowel
+		      (cdr (assoc vowel lao-transcription-vowel-alist)))
+		(if (vectorp lao-vowel)
+		    (setq lao-vowel (aref lao-vowel 0)))
+		(setq rule (assoc lao-vowel lao-vowel-reordering-rule))
+		(if (null maa-sakod)
+		    (setq rule (nth 1 rule))
+		  (setq lao-maa-sakod
+			(cdr (assoc maa-sakod lao-transcription-maa-sakod-alist))
+			rule
+			(or (cdr (assq (aref lao-maa-sakod 0) (nthcdr 2 rule)))
+			    (nth 2 rule)
+			    (nth 1 rule))))
+		(or rule
+		    (error "Lao vowel %S has no re-ordering rule" lao-vowel))
+		(setq lao-consonant str str "")
+		(while rule
+		  (if (= (car rule) 0)
+		      (setq str (concat str lao-consonant)
+			    cidx (length str))
+		    (setq str (concat str (list (car rule)))))
+		  (setq rule (cdr rule)))
+		(or cidx
+		    (error "Lao vowel %S has malformed re-ordering rule" vowel))
+		;; Set CIDX to after upper or lower vowel if any.
+		(let ((len (length str)))
+		  (while (and (< cidx len)
+			      (memq (get-char-code-property (aref str cidx)
+							    'phonetic-type)
+				    '(vowel-lower vowel-upper)))
+		    (setq cidx (1+ cidx))))
+		(if lao-maa-sakod
+		    (setq str (concat str lao-maa-sakod)))
+		(if tone
+		    (setq lao-tone
+			  (cdr (assoc tone lao-transcription-tone-alist))
+			  str (concat (substring str 0 cidx) lao-tone
+				      (substring str cidx)))))))
+	(list from to (lao-compose-string str)))))
+
+;;;###autoload
+(defun lao-transcribe-roman-to-lao-string (str)
+  "Transcribe Romanized Lao string STR to Lao character string."
+  (let ((from 0)
+	(to (length str))
+	(lao-str "")
+	val)
+    (while (setq val (lao-transcribe-single-roman-syllable-to-lao from to str))
+      (let ((start (car val))
+	    (end (nth 1 val))
+	    (lao (nth 2 val)))
+	(if (> start from)
+	    (setq lao-str (concat lao-str (substring str from start) lao))
+	  (setq lao-str (concat lao-str lao)))
+	(setq from end)))
+    (if (< from to)
+	(concat lao-str (substring str from to))
+      lao-str)))
+
+;;;###autoload
+(defun lao-composition-function (from to pattern &optional string)
+  "Compose Lao text in the region FROM and TO.
+The text matches the regular expression PATTERN.
+Optional 4th argument STRING, if non-nil, is a string containing text
+to compose.
+
+The return value is number of composed characters."
+  (if (< (1+ from) to)
+      (prog1 (- to from)
+	(if string
+	    (compose-string from to)
+	  (compose-region from to))
+	(- to from))))
 
 ;;
 (provide 'lao-util)