Mercurial > emacs
changeset 26894:e0a13ff5901d
Mostly rewritten.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 15 Dec 1999 00:47:53 +0000 |
parents | 78d4a8d767d5 |
children | 5562243fdd2b |
files | lisp/language/devan-util.el lisp/language/lao-util.el |
diffstat | 2 files changed, 533 insertions(+), 183 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/language/devan-util.el Wed Dec 15 00:47:31 1999 +0000 +++ b/lisp/language/devan-util.el Wed Dec 15 00:47:53 1999 +0000 @@ -49,58 +49,64 @@ ;;; Basic functions. ;;;###autoload -(defun indian-to-devanagari (ch) - "Convert IS 13194 characters to Devanagari basic characters." - (let ((charcodes (split-char ch))) +(defun indian-to-devanagari (char) + "Convert IS 13194 character CHAR to Devanagari basic characters. +If CHAR is not IS 13194, return CHAR as is." + (let ((charcodes (split-char char))) (if (eq (car charcodes) 'indian-is13194) (make-char 'indian-2-column ?\x21 (nth 1 charcodes)) - ch))) + char))) ;;;###autoload -(defun devanagari-to-indian (ch) - "Convert Devanagari basic characters to IS 13194 characters." - (let* ((charcodes (split-char ch)) - (charset (car charcodes)) - (code-h (car (cdr charcodes)))) +(defun devanagari-to-indian (char) + "Convert Devanagari basic character CHAR to IS 13194 characters. +If CHAR is not Devanagari basic character, return CHAR as is." + (let ((charcodes (split-char char))) (if (and (eq (car charcodes) 'indian-2-column) (= (nth 1 charcodes) ?\x21)) (make-char 'indian-is13194 (nth 2 charcodes)) - ch))) + char))) ;;;###autoload (defun indian-to-devanagari-region (from to) - "Convert IS 13194 characters in region to Devanagari basic characters." + "Convert IS 13194 characters in region to Devanagari basic characters. +When called from a program, expects two arguments, +positions (integers or markers) specifying the region." (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) -; (while (re-search-forward "\\cd" nil t) - (while (re-search-forward "." nil t) - (let* ((devanagari-char (indian-to-devanagari (preceding-char)))) - (delete-char -1) - (insert devanagari-char))))) + (save-excursion + (goto-char from) + (while (< (point) to) + (let ((char (following-char))) + (if (eq (char-charset char) 'indian-is13194) + (progn + (delete-char 1) + (insert (indian-to-devanagari char))) + (forward-char 1)))))) ;;;###autoload (defun devanagari-to-indian-region (from to) - "Convert Devanagari basic characters in region to Indian characters." + "Convert Devanagari basic characters in region to Indian characters. +When called from a program, expects two arguments, +positions (integers or markers) specifying the region." (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) -; (while (re-search-forward "\\cD" nil t) ; Devanagari Character Code. - (while (re-search-forward "." nil t) - (let* ((indian-char (devanagari-to-indian (preceding-char)))) - (delete-char -1) - (insert indian-char))))) + (save-excursion + (goto-char from) + (while (< (point) to) + (let ((char (following-char))) + (if (eq (char-charset char) 'indian-2-column) + (progn + (delete-char -1) + (insert (devanagari-to-indian char))) + (forward-char 1)))))) ;;;###autoload -(defun indian-to-devanagari-string (str) - "Convert Indian String to Devanagari Basic Character String." - (let* ((len (length str)) +(defun indian-to-devanagari-string (string) + "Convert Indian characters in STRING to Devanagari Basic characters." + (let* ((len (length string)) (i 0) (vec (make-vector len 0))) (while (< i len) - (aset vec i (indian-to-devanagari (aref str i))) + (aset vec i (indian-to-devanagari (aref string i))) (setq i (1+ i))) (concat vec))) @@ -256,12 +262,12 @@ ;; Finally, convert 2-column glyphs to 1-column glyph ;; if such a glyph exist. ;; -;; => $(6![(B (ml.mr) $(6!X(B / $(6!D(B (ml.mr) $(6"F(B (mr ml) $(6!\(B +;; => $(6!X(B (ml.mr) $(6![(B / $(6!D(B (ml.mr) $(6"F(B (mr ml) $(6!\(B ;; ;; Compose the glyph. ;; -;; => 2$(6!X@![(B1/2$(6!D@"FP!\(B1 -;; => 2$(6!X@![(B12$(6!D@"FP!\(B1 +;; => 4$(6!Xt%![0!X![1(B/4$(6!Dt%"Fv#!\0!D"F!\1(B +;; => 4$(6!Xt%![0!X![14!Dt%"Fv#!\0!D"F!\1(B ;; ;; @@ -269,7 +275,7 @@ ;; ;; ;; IMPORTANT: -;; There may be many rules which you many want to be suppressed. +;; There may be many rules that you many want to suppress. ;; In that case, please comment out that rule. ;; ;; RULES WILL BE EVALUATED FROM FIRST TO LAST. @@ -277,7 +283,7 @@ ;; ;; TO DO: ;; Prepare multiple specific list of rules for each languages -;; which adopts Devanagari script. +;; that adopt Devanagari script. ;; (defconst devanagari-char-to-glyph-rules @@ -558,20 +564,18 @@ ;; glyphs-to-characters conversion. ;; -(defun max-match-len (regexp-str) - "Return the possible length of matched string of given regexp. -Only [...] pattern of regexp is recognized. -The last character of inside of [....] is used for its length." - (let ((dest-str regexp-str)) - (while (string-match "\\[\\([^\]]\\)+\\]" dest-str) - (setq dest-str - (concat (substring dest-str 0 (match-beginning 0)) - (substring dest-str (match-beginning 1) (match-end 1)) - (substring dest-str (match-end 0))))) - (length dest-str))) +(defun max-match-len (regexp) + "Return the maximum length of text that can match the pattern REGEXP. +Only [...] pattern of regexp is recognized." + (let ((len 0) + (index 0)) + (while (string-match "\\[\\([^\]]\\)+\\]" regexp index) + (setq len (+ len (- (match-beginning 0) index) 1) + index (match-end 0))) + len)) -;; Return t iff LIST1 and LIST2 has a same member. -(defun rule-intersection (list1 list2) +;; Return t iff at least one member appears in both LIST1 and LIST2. +(defun intersecting-p (list1 list2) (let ((found nil)) (while (and list1 (not found)) (if (memq (car list1) list2) @@ -579,28 +583,32 @@ (setq list1 (cdr list1)))) found)) -(defun string-conversion-by-rule (src-str symbol &rest specs) - "Convert string SRC-STR to a new string according to -the rules described in the each character's SYMBOL property. The -rules are described in the forms of '((regexp str <specs>) ...), and -the character sequence in the string which matches to 'regexp' are -replaced with str. If SPECS are not specified, only rules with no -<specs> would be applied. If SPECS are specified, then rules with no -<specs> specified and rules with <spec> matches with SPECS would be -applied. Rules are tested in the order of the list, thus more -specific rules should be placed in front of less important rules. No -composite character is supported, thus such must be converted by -decompose-char before applying to this function. If rule is given in -the forms of regexp '...\\(...\\)...', then inside the parenthesis is -the subject of the match. Otherwise, the entire expression is the -subject of the match." +(defun string-conversion-by-rule (source symbol &rest specs) + "Convert string SOURCE by rules stored in SYMBOL property of each character. +The remaining arguments forms a list SPECS that restricts applicable rules. + +The rules has the form ((REGEXP STR RULE-SPEC ...) ...). +Each character sequence in STRING that matches REGEXP is +replaced by STR. + +If SPECS is nil, only rules with no RULE-SPECs is applied. Otherwise +rules with no RULE-SPECS and rules that have at least one member of +SPECS in RULE-SPECs is applied. + +Rules are tested in the order of the list, thus more specific rules +should be placed in front of less specific rules. + +If rule is given in the forms of regexp '...\\(...\\)...', a character +sequence that matches the pattern inside of the parenthesis is the +subject of the match. Otherwise, the entire expression is the subject +of the match." (let ((pos 0) (dst-str "")) - (while (< pos (length src-str)) + (while (< pos (length source)) (let ((found nil) (rules (get-char-code-property (string-to-char - (substring src-str pos)) symbol))) + (substring source pos)) symbol))) (while rules (let* ((rule (car rules)) (regexp (car rule)) @@ -608,7 +616,7 @@ (rule-specs (cdr (cdr rule))) search-pos) (if (not (or (null rule-specs) - (rule-intersection specs rule-specs))) + (intersecting-p specs rule-specs))) (setq rules (cdr rules)) (if (null (string-match "\\\\(.+\\\\)" regexp)) (progn @@ -619,7 +627,7 @@ (string-match "^[^\\\\]*" regexp) (match-end 0)))))) (if (< search-pos 0) (setq search-pos 0)) - (if (string-match regexp src-str search-pos) + (if (string-match regexp source search-pos) (if (= (match-beginning 1) pos) (progn (setq dst-str (concat dst-str replace-str)) @@ -631,7 +639,7 @@ (setq rules (cdr rules)))))) ;; proceed to next position (if (not found) - (setq dst-str (concat dst-str (substring src-str pos (1+ pos))) + (setq dst-str (concat dst-str (substring source pos (1+ pos))) pos (1+ pos))))) dst-str)) @@ -641,12 +649,12 @@ ;; ;;;###autoload -(defun char-to-glyph-devanagari (src-str &rest langs) - "Convert Devanagari characters in the string to Devanagari glyphs. +(defun char-to-glyph-devanagari (string &rest langs) + "Convert Devanagari characters in STRING to Devanagari glyphs. Ligatures and special rules are processed." (apply 'string-conversion-by-rule - (append (list src-str 'char-to-glyph) langs))) + (append (list string 'char-to-glyph) langs))) ;; Example: ;;(char-to-glyph-devanagari "$(5!X![!F!h!D!\(B") => "$(5!X!["F!D!\(B" @@ -656,7 +664,7 @@ ;; Phase 2: Compose Glyphs to form One Glyph. ;; -;; Each list consist of glyph, application-priority and application-direction. +;; Each list consists of glyph, application-priority and application-direction. ;; ;; Glyphs will be ordered from low priority number to high priority number. ;; If application-priority is omitted, it is assumed to be 0. @@ -1044,21 +1052,22 @@ ;; Determine composition priority and rule of the array of Glyphs. ;; Sort the glyphs with their priority. -(defun devanagari-reorder-glyphs-for-composition (glyph-alist) - (let* ((pos 0) - (ordered-glyphs '())) - (while (< pos (length glyph-alist)) - (let* ((glyph (aref glyph-alist pos))) +(defun devanagari-reorder-glyphs-for-composition (string start end) + (let ((pos start) + (ordered-glyphs nil)) + (while (< pos end) + (let ((glyph (aref string pos))) (setq pos (1+ pos)) (setq ordered-glyphs - (append ordered-glyphs (list (assq glyph devanagari-composition-rules)))))) + (append ordered-glyphs + (list (assq glyph devanagari-composition-rules)))))) (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y))))))) -;;(devanagari-compose-to-one-glyph "$(5"5!X![(B") => "2$(6!XP"5@![(B1" +! ;;(devanagari-compose-to-one-glyph "$(5"5!X![(B") => "4$(6!Xv#"5t%![0!X"5![1(B" (defun devanagari-compose-to-one-glyph (devanagari-string) (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition - (string-to-vector devanagari-string))) + devanagari-string 0 (length devanagari-string))) ;; List of glyphs to be composed. (cmp-glyph-list (list (car (car o-glyph-list)))) (o-glyph-list (cdr o-glyph-list))) @@ -1077,11 +1086,31 @@ (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list)) (apply 'compose-chars cmp-glyph-list)))) +(defun devanagari-composition-component (string &optional start end) + (or start (setq start 0)) + (or end (setq end (length string))) + (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition + string start end)) + ;; List of glyphs to be composed. + (cmp-glyph-list (list (car (car o-glyph-list))))) + (setq o-glyph-list (cdr o-glyph-list)) + (while o-glyph-list + (let* ((o-glyph (car o-glyph-list)) + (glyph (if (< 2 (length o-glyph)) + ;; default composition + (list (car (cdr (cdr o-glyph))) (car o-glyph)) + ;; composition with a specified rule + (list '(mr . ml) (car o-glyph))))) + (setq o-glyph-list (cdr o-glyph-list)) + (setq cmp-glyph-list (append cmp-glyph-list glyph)))) + ;; Convert glyphs to 1-column width if possible. + (devanagari-wide-to-narrow cmp-glyph-list))) + ;; Utility function for Phase 2.5 -;; Check whether given glyph is a Devanagari vertical modifier or not. + +;; Check whether GLYPH is a Devanagari vertical modifier or not. ;; If it is a vertical modifier, whether it should be 1-column shape or not ;; depends on previous non-vertical modifier. - ; return nil if it is not vertical modifier. (defun devanagari-vertical-modifier-p (glyph) (string-match (char-to-string glyph) "[$(5!"!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M(B]")) @@ -1092,12 +1121,13 @@ "[$(5![(B]")) (defun devanagari-wide-to-narrow-char (char) - "Return the corresponding narrow character if it exists." + "Convert Devanagari character CHAR to the corresponding narrow character. +If there's no corresponding narrow character, return CHAR as is." (let ((narrow (cdr (assq char devanagari-1-column-char)))) - (if narrow narrow char))) + (or narrow char))) ;; -;; Phase 2.5 Convert Appropriate Character to 1-column shape. +;; Phase 2.5 Convert appropriate character to 1-column shape. ;; ;; This is temporary and should be removed out when Emacs supports ;; variable width characters. @@ -1121,7 +1151,8 @@ (cond ((null src-list) '()) ; not glyph code ((not (numberp glyph)) - (cons glyph (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))) + (cons glyph + (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))) ; glyphs to be processed regardless of the value of "2-col-glyph" ((devanagari-non-vertical-modifier-p glyph) (cons (devanagari-wide-to-narrow-char glyph) @@ -1132,7 +1163,8 @@ (cons glyph (devanagari-wide-to-narrow-iter (cdr src-list) t)) (cons (devanagari-wide-to-narrow-char glyph) - (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))) + (devanagari-wide-to-narrow-iter (cdr src-list) + 2-col-glyph)))) ; normal glyph (t (if (cdr (assq glyph devanagari-1-column-char)) @@ -1147,65 +1179,18 @@ ;; ;; -;; Decomposition of composite font. +;; Decomposition of composite sequence. ;; -(defun devanagari-normalize-narrow-glyph (charlist) - (let ((wide-char (car (rassoc (car charlist) devanagari-1-column-char)))) - (if (null charlist) nil - (cons (if (null wide-char) (car charlist) wide-char) - (devanagari-normalize-narrow-glyph (cdr charlist)))))) - -(defvar devanagari-decomposition-rules - '( - (?$(5"p(B -10) - ) - ) - -(defun devanagari-reorder-glyphs-for-decomposition (glyphlist) - "This function re-orders glyph list for decomposition." - (sort glyphlist - '(lambda (x y) - (let ((xx (nth 1 (assoc x devanagari-decomposition-rules))) - (yy (nth 1 (assoc y devanagari-decomposition-rules)))) - (if (null xx) (setq xx 0)) - (if (null yy) (setq yy 0)) - (< xx yy))))) - -(defun devanagari-decompose-char (glyph) - "This function decomposes one Devanagari composite glyph to - basic Devanagari characters as a string." - (let ((glyphlist - (if (eq (car (split-char glyph)) 'composition) - (string-to-list (decompose-composite-char glyph)) - (list glyph)))) - (setq glyphlist (devanagari-normalize-narrow-glyph glyphlist)) - (setq glyphlist (devanagari-reorder-glyphs-for-decomposition glyphlist)) - (string-conversion-by-rule - (mapconcat 'char-to-string glyphlist "") 'glyph-to-char))) - ;;;###autoload (defun devanagari-decompose-string (str) - "Decompose Devanagari glyph string STR to basic Devanagari character string." - (let ((len (length str)) - (i 0) - (dst "")) - (while (< i len) - (setq dst (concat dst (devanagari-decompose-char (aref str i))) - i (1+ i))) - dst)) + "Decompose Devanagari string STR" + (decompose-string (copy-sequence str))) ;;;###autoload (defun devanagari-decompose-region (from to) (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) - (while (re-search-forward "." nil t) - (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) - (decmps (devanagari-decompose-string (buffer-substring match-b match-e)))) - (delete-char -1) - (insert decmps))))) + (decompose-region from to)) ;;; ;;; Composition @@ -1213,37 +1198,34 @@ ;;;###autoload (defun devanagari-compose-string (str &rest langs) - (let ((len (length str)) - (src (devanagari-decompose-string str)) (dst "") rest match-b match-e) - (while (string-match devanagari-composite-glyph-unit src) - (setq match-b (match-beginning 0) match-e (match-end 0)) - (setq dst - (concat dst - (substring src 0 match-b) - (devanagari-compose-to-one-glyph - (apply - 'char-to-glyph-devanagari - (cons (substring src match-b match-e) - langs))))) - (setq src (substring src match-e))) - (setq dst (concat dst src)) - dst)) + (setq str (copy-sequence str)) + (let ((idx 0) + rest match-b match-e) + (while (string-match devanagari-composite-glyph-unit str idx) + (let* ((match-b (match-beginning 0)) + (match-e (match-end 0)) + (cmps (devanagari-composition-component + (apply + 'char-to-glyph-devanagari + (cons (substring str match-b match-e) langs))))) + (compose-string str match-b match-e cmps) + (setq idx match-e)))) + str) ;;;###autoload (defun devanagari-compose-region (from to &rest langs) (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) - (while (re-search-forward devanagari-composite-glyph-unit nil t) - (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) - (cmps (devanagari-compose-to-one-glyph - (apply - 'char-to-glyph-devanagari - (cons (buffer-substring match-b match-e) - langs))))) - (delete-region match-b match-e) - (insert cmps))))) + (save-excursion + (save-restriction + (narrow-to-region from to) + (goto-char (point-min)) + (while (re-search-forward devanagari-composite-glyph-unit nil t) + (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) + (cmps (devanagari-composition-component + (apply + 'char-to-glyph-devanagari + (cons (buffer-substring match-b match-e) langs))))) + (compose-region match-b match-e cmps)))))) ;; For pre-write and post-read conversion @@ -1260,20 +1242,18 @@ ;;;###autoload (defun in-is13194-devanagari-post-read-conversion (len) - (let ((pos (point)) - (buffer-modified-p (buffer-modified-p))) - (prog1 - (devanagari-compose-from-is13194-region pos (+ pos len)) - (set-buffer-modified-p buffer-modified-p)))) + (let ((pos (point))) + (devanagari-compose-from-is13194-region pos (+ pos len)))) ;;;###autoload (defun devanagari-decompose-to-is13194-region (from to) "Decompose Devanagari characters in the region to IS 13194 characters." (interactive "r") - (save-restriction - (narrow-to-region from to) - (devanagari-decompose-region (point-min) (point-max)) - (devanagari-to-indian-region (point-min) (point-max)))) + (save-excursion + (save-restriction + (narrow-to-region from to) + (devanagari-decompose-region (point-min) (point-max)) + (devanagari-to-indian-region (point-min) (point-max))))) ;;;###autoload (defun in-is13194-devanagari-pre-write-conversion (from to) @@ -1304,8 +1284,6 @@ (indian-decode-itrans-region (point-min) (point-max)) (devanagari-compose-from-is13194-region (point-min) (point-max)))) -;; Test comment. - ;; (provide 'devan-util)
--- a/lisp/language/lao-util.el Wed Dec 15 00:47:31 1999 +0000 +++ b/lisp/language/lao-util.el Wed Dec 15 00:47:53 1999 +0000 @@ -30,6 +30,14 @@ (interactive) (set-language-environment "Lao")) +;; Setting information of Thai characters. + +(defconst lao-category-table (make-category-table)) +(define-category ?c "Lao consonant" lao-category-table) +(define-category ?s "Lao semi-vowel" lao-category-table) +(define-category ?v "Lao upper/lower vowel" lao-category-table) +(define-category ?t "Lao tone" lao-category-table) + (let ((l '((?(1!(B consonant "LETTER KOR KAI'" "CHICKEN") (?(1"(B consonant "LETTER KHOR KHAI'" "EGG") (?(1#(B invalid nil) @@ -127,11 +135,375 @@ )) elm) (while l - (setq elm (car l)) - (put-char-code-property (car elm) 'phonetic-type (car (cdr elm))) - (put-char-code-property (car elm) 'name (nth 2 elm)) - (put-char-code-property (car elm) 'meaning (nth 3 elm)) - (setq l (cdr l)))) + (setq elm (car l) l (cdr l)) + (let ((char (car elm)) + (ptype (nth 1 elm))) + (cond ((eq ptype 'consonant) + (modify-category-entry char ?c lao-category-table)) + ((memq ptype '(vowel-upper vowel-lower)) + (modify-category-entry char ?v lao-category-table)) + ((eq ptype 'semivowel-lower) + (modify-category-entry char ?s lao-category-table)) + ((eq ptype 'tone) + (modify-category-entry char ?t lao-category-table))) + (put-char-code-property char 'phonetic-type ptype) + (put-char-code-property char 'name (nth 2 elm)) + (put-char-code-property char 'meaning (nth 3 elm))))) + +;; The general composing rules are as follows: +;; +;; T +;; V T V T +;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C +;; v v +;; T +;; V T V T +;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CvT -> C +;; s s s s s +;; v v + + +;; where C: consonant, V: vowel upper, v: vowel lower, +;; T: tone mark, s: semivowel lower + +(defvar lao-composition-pattern + "\\cc\\(\\ct\\|\\cv\\ct?\\|\\cs\\(\\ct\\|\\cv\\ct?\\)?\\)" + "Regular expression matching a Lao composite sequence.") + +;;;###autoload +(defun lao-compose-string (str) + (with-category-table lao-category-table + (let ((idx 0)) + (while (setq idx (string-match lao-composition-pattern str idx)) + (compose-string str idx (match-end 0)) + (setq idx (match-end 0)))) + str)) + +;;; LRT: Lao <-> Roman Transcription + +;; Upper vowels and tone-marks are put on the letter. +;; Semi-vowel-sign-lo and lower vowels are put under the letter. + +(defconst lao-transcription-consonant-alist + (sort '(;; single consonants + ("k" . "(1!(B") + ("kh" . "(1"(B") + ("qh" . "(1$(B") + ("ng" . "(1'(B") + ("j" . "(1((B") + ("s" . "(1J(B") + ("x" . "(1*(B") + ("y" . "(1-(B") + ("d" . "(14(B") + ("t" . "(15(B") + ("th" . "(16(B") + ("dh" . "(17(B") + ("n" . "(19(B") + ("b" . "(1:(B") + ("p" . "(1;(B") + ("hp" . "(1<(B") + ("fh" . "(1=(B") + ("ph" . "(1>(B") + ("f" . "(1?(B") + ("m" . "(1A(B") + ("gn" . "(1B(B") + ("l" . "(1E(B") + ("r" . "(1C(B") + ("v" . "(1G(B") + ("w" . "(1G(B") + ("hh" . "(1K(B") + ("O" . "(1M(B") + ("h" . "(1N(B") + ("nh" . "(1|(B") + ("mh" . "(1}(B") + ("lh" . ["(1K\(B"]) + ;; double consonants + ("ngh" . ["(1K'(B"]) + ("yh" . ["(1K](B"]) + ("wh" . ["(1KG(B"]) + ("hl" . ["(1KE(B"]) + ("hy" . ["(1K-(B"]) + ("hn" . ["(1K9(B"]) + ("hm" . ["(1KA(B"]) + ) + (function (lambda (x y) (> (length (car x)) (length (car y))))))) + +(defconst lao-transcription-semi-vowel-alist + '(("r" . "(1\(B"))) + +(defconst lao-transcription-vowel-alist + (sort '(("a" . "(1P(B") + ("ar" . "(1R(B") + ("i" . "(1T(B") + ("ii" . "(1U(B") + ("eu" . "(1V(B") + ("ur" . "(1W(B") + ("u" . "(1X(B") + ("uu" . "(1Y(B") + ("e" . ["(1`P(B"]) + ("ee" . "(1`(B") + ("ae" . ["(1aP(B"]) + ("aa" . "(1a(B") + ("o" . ["(1bP(B"]) + ("oo" . "(1b(B") + ("oe" . ["(1`RP(B"]) + ("or" . "(1m(B") + ("er" . ["(1`T(B"]) + ("ir" . ["(1`U(B"]) + ("ua" . ["(1[GP(B"]) + ("uaa" . ["(1[G(B"]) + ("ie" . ["(1`Q]P(B"]) + ("ia" . ["(1`Q](B"]) + ("ea" . ["(1`VM(B"]) + ("eaa" . ["(1`WM(B"]) + ("ai" . "(1d(B") + ("ei" . "(1c(B") + ("ao" . ["(1`[R(B"]) + ("aM" . "(1S(B")) + (function (lambda (x y) (> (length (car x)) (length (car y))))))) + +;; Maa-sakod is put at the tail. +(defconst lao-transcription-maa-sakod-alist + '(("k" . "(1!(B") + ("g" . "(1'(B") + ("y" . "(1-(B") + ("d" . "(14(B") + ("n" . "(19(B") + ("b" . "(1:(B") + ("m" . "(1A(B") + ("v" . "(1G(B") + ("w" . "(1G(B") + )) + +(defconst lao-transcription-tone-alist + '(("'" . "(1h(B") + ("\"" . "(1i(B") + ("^" . "(1j(B") + ("+" . "(1k(B") + ("~" . "(1l(B"))) + +(defconst lao-transcription-punctuation-alist + '(("\\0" . "(1p(B") + ("\\1" . "(1q(B") + ("\\2" . "(1r(B") + ("\\3" . "(1s(B") + ("\\4" . "(1t(B") + ("\\5" . "(1u(B") + ("\\6" . "(1v(B") + ("\\7" . "(1w(B") + ("\\8" . "(1x(B") + ("\\9" . "(1y(B") + ("\\\\" . "(1f(B") + ("\\$" . "(1O(B"))) + +(defconst lao-transcription-pattern + (concat + "\\(" + (mapconcat 'car lao-transcription-consonant-alist "\\|") + "\\)\\(" + (mapconcat 'car lao-transcription-semi-vowel-alist "\\|") + "\\)?\\(\\(" + (mapconcat 'car lao-transcription-vowel-alist "\\|") + "\\)\\(" + (mapconcat 'car lao-transcription-maa-sakod-alist "\\|") + "\\)?\\(" + (mapconcat (lambda (x) (regexp-quote (car x))) + lao-transcription-tone-alist "\\|") + "\\)?\\)?\\|" + (mapconcat (lambda (x) (regexp-quote (car x))) + lao-transcription-punctuation-alist "\\|") + ) + "Regexp of Roman transcription pattern for one Lao syllable.") + +(defconst lao-transcription-pattern + (concat + "\\(" + (regexp-opt (mapcar 'car lao-transcription-consonant-alist)) + "\\)\\(" + (regexp-opt (mapcar 'car lao-transcription-semi-vowel-alist)) + "\\)?\\(\\(" + (regexp-opt (mapcar 'car lao-transcription-vowel-alist)) + "\\)\\(" + (regexp-opt (mapcar 'car lao-transcription-maa-sakod-alist)) + "\\)?\\(" + (regexp-opt (mapcar 'car lao-transcription-tone-alist)) + "\\)?\\)?\\|" + (regexp-opt (mapcar 'car lao-transcription-punctuation-alist)) + ) + "Regexp of Roman transcription pattern for one Lao syllable.") + +(defconst lao-vowel-reordering-rule + '(("(1P(B" (0 ?(1P(B) (0 ?(1Q(B)) + ("(1R(B" (0 ?(1R(B)) + ("(1T(B" (0 ?(1U(B)) + ("(1U(B" (0 ?(1U(B)) + ("(1V(B" (0 ?(1V(B)) + ("(1W(B" (0 ?(1W(B)) + ("(1X(B" (0 ?(1X(B)) + ("(1Y(B" (0 ?(1Y(B)) + ("(1`P(B" (?(1`(B 0 ?(1P(B) (?(1`(B 0 ?(1Q(B)) + ("(1`(B" (?(1`(B 0)) + ("(1aP(B" (?(1a(B 0 ?(1P(B) (?(1a(B 0 ?(1Q(B)) + ("(1a(B" (?(1a(B 0)) + ("(1bP(B" (?(1b(B 0 ?(1P(B) (0 ?(1[(B) (?(1-(B ?(1b(B 0 ?(1Q(B) (?(1G(B ?(1b(B 0 ?(1Q(B)) + ("(1b(B" (?(1b(B 0)) + ("(1`RP(B" (?(1`(B 0 ?(1R(B ?(1P(B) (0 ?(1Q(B ?(1M(B)) + ("(1m(B" (0 ?(1m(B) (0 ?(1M(B)) + ("(1`T(B" (?(1`(B 0 ?(1T(B)) + ("(1`U(B" (?(1`(B 0 ?(1U(B)) + ("(1[GP(B" (0 ?(1[(B ?(1G(B ?(1P(B) (0 ?(1Q(B ?(1G(B)) + ("(1[G(B" (0 ?(1[(B ?(1G(B) (0 ?(1G(B)) + ("(1`Q]P(B" (?(1`(B 0 ?(1Q(B ?(1](B ?(1P(B) (0 ?(1Q(B ?(1](B)) + ("(1`Q](B" (?(1`(B 0 ?(1Q(B ?(1](B) (0 ?(1](B)) + ("(1`VM(B" (?(1`(B 0 ?(1V(B ?(1M(B)) + ("(1`WM(B" (?(1`(B 0 ?(1W(B ?(1M(B)) + ("(1d(B" (?(1d(B 0)) + ("(1c(B" (?(1c(B 0)) + ("(1`[R(B" (?(1`(B 0 ?(1[(B ?(1R(B)) + ("(1S(B" (0 ?(1S(B))) + "Alist of Lao vowel string vs the corresponding re-ordering rule. +Each element has this form: + (VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...) + +VOWEL is a vowel string (e.g. \"(1`Q]P(B\"). + +NO-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL following a +consonant. It is a list vowel characters or 0. The element 0 +indicate the place to embed a consonant. + +Optional WITH-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL +follwoing a consonant and preceding a maa-sakod character. If it is +nil, NO-MAA-SAKOD-RULE is used. The maa-sakod character is alwasy +appended at the tail. + +For instance, rule `(\"(1`WM(B\" (?(1`(B t ?(1W(B ?(1M(B))' tells that this vowel +string following a consonant `(1!(B' should be re-ordered as \"(1`!WM(B\". + +Optional (MAA-SAKOD-n RULE-n) are rules specially applied to maa-sakod +character MAA-SAKOD-n.") + +;;;###autoload +(defun lao-transcribe-single-roman-syllable-to-lao (from to &optional str) + "Transcribe a Romanized Lao syllable in the region FROM and TO to Lao string. +Only the first syllable is transcribed. +The value has the form: (START END LAO-STRING), where +START and END are the beggining and end positions of the Roman Lao syllable, +LAO-STRING is the Lao character transcription of it. + +Optional 3rd arg STR, if non-nil, is a string to search for Roman Lao +syllable. In that case, FROM and TO are indexes to STR." + (if str + (if (setq from (string-match lao-transcription-pattern str from)) + (progn + (if (>= from to) + (setq from nil) + (setq to (match-end 0))))) + (save-excursion + (goto-char from) + (if (setq to (re-search-forward lao-transcription-pattern to t)) + (setq from (match-beginning 0)) + (setq from nil)))) + (if from + (let* ((consonant (match-string 1 str)) + (semivowel (match-string 3 str)) + (vowel (match-string 5 str)) + (maa-sakod (match-string 8 str)) + (tone (match-string 9 str)) + lao-consonant lao-semivowel lao-vowel lao-maa-sakod lao-tone + clen cidx) + (setq to (match-end 0)) + (if (not consonant) + (setq str (cdr (assoc (match-string 0 str) + lao-transcription-punctuation-alist))) + (setq lao-consonant + (cdr (assoc consonant lao-transcription-consonant-alist))) + (if (vectorp lao-consonant) + (setq lao-consonant (aref lao-consonant 0))) + (setq clen (length lao-consonant)) + (if semivowel + ;; Include semivowel in STR. + (setq lao-semivowel + (cdr (assoc semivowel lao-transcription-semi-vowel-alist)) + str (if (= clen 1) + (concat lao-consonant lao-semivowel) + (concat (substring lao-consonant 0 1) lao-semivowel + (substring lao-consonant 1)))) + (setq str lao-consonant)) + (if vowel + (let (rule) + (setq lao-vowel + (cdr (assoc vowel lao-transcription-vowel-alist))) + (if (vectorp lao-vowel) + (setq lao-vowel (aref lao-vowel 0))) + (setq rule (assoc lao-vowel lao-vowel-reordering-rule)) + (if (null maa-sakod) + (setq rule (nth 1 rule)) + (setq lao-maa-sakod + (cdr (assoc maa-sakod lao-transcription-maa-sakod-alist)) + rule + (or (cdr (assq (aref lao-maa-sakod 0) (nthcdr 2 rule))) + (nth 2 rule) + (nth 1 rule)))) + (or rule + (error "Lao vowel %S has no re-ordering rule" lao-vowel)) + (setq lao-consonant str str "") + (while rule + (if (= (car rule) 0) + (setq str (concat str lao-consonant) + cidx (length str)) + (setq str (concat str (list (car rule))))) + (setq rule (cdr rule))) + (or cidx + (error "Lao vowel %S has malformed re-ordering rule" vowel)) + ;; Set CIDX to after upper or lower vowel if any. + (let ((len (length str))) + (while (and (< cidx len) + (memq (get-char-code-property (aref str cidx) + 'phonetic-type) + '(vowel-lower vowel-upper))) + (setq cidx (1+ cidx)))) + (if lao-maa-sakod + (setq str (concat str lao-maa-sakod))) + (if tone + (setq lao-tone + (cdr (assoc tone lao-transcription-tone-alist)) + str (concat (substring str 0 cidx) lao-tone + (substring str cidx))))))) + (list from to (lao-compose-string str))))) + +;;;###autoload +(defun lao-transcribe-roman-to-lao-string (str) + "Transcribe Romanized Lao string STR to Lao character string." + (let ((from 0) + (to (length str)) + (lao-str "") + val) + (while (setq val (lao-transcribe-single-roman-syllable-to-lao from to str)) + (let ((start (car val)) + (end (nth 1 val)) + (lao (nth 2 val))) + (if (> start from) + (setq lao-str (concat lao-str (substring str from start) lao)) + (setq lao-str (concat lao-str lao))) + (setq from end))) + (if (< from to) + (concat lao-str (substring str from to)) + lao-str))) + +;;;###autoload +(defun lao-composition-function (from to pattern &optional string) + "Compose Lao text in the region FROM and TO. +The text matches the regular expression PATTERN. +Optional 4th argument STRING, if non-nil, is a string containing text +to compose. + +The return value is number of composed characters." + (if (< (1+ from) to) + (prog1 (- to from) + (if string + (compose-string from to) + (compose-region from to)) + (- to from)))) ;; (provide 'lao-util)