Mercurial > emacs
diff lisp/language/devan-util.el @ 26894:e0a13ff5901d
Mostly rewritten.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 15 Dec 1999 00:47:53 +0000 |
parents | 6402c89c30c3 |
children | 60eb71a9f901 |
line wrap: on
line diff
--- a/lisp/language/devan-util.el Wed Dec 15 00:47:31 1999 +0000 +++ b/lisp/language/devan-util.el Wed Dec 15 00:47:53 1999 +0000 @@ -49,58 +49,64 @@ ;;; Basic functions. ;;;###autoload -(defun indian-to-devanagari (ch) - "Convert IS 13194 characters to Devanagari basic characters." - (let ((charcodes (split-char ch))) +(defun indian-to-devanagari (char) + "Convert IS 13194 character CHAR to Devanagari basic characters. +If CHAR is not IS 13194, return CHAR as is." + (let ((charcodes (split-char char))) (if (eq (car charcodes) 'indian-is13194) (make-char 'indian-2-column ?\x21 (nth 1 charcodes)) - ch))) + char))) ;;;###autoload -(defun devanagari-to-indian (ch) - "Convert Devanagari basic characters to IS 13194 characters." - (let* ((charcodes (split-char ch)) - (charset (car charcodes)) - (code-h (car (cdr charcodes)))) +(defun devanagari-to-indian (char) + "Convert Devanagari basic character CHAR to IS 13194 characters. +If CHAR is not Devanagari basic character, return CHAR as is." + (let ((charcodes (split-char char))) (if (and (eq (car charcodes) 'indian-2-column) (= (nth 1 charcodes) ?\x21)) (make-char 'indian-is13194 (nth 2 charcodes)) - ch))) + char))) ;;;###autoload (defun indian-to-devanagari-region (from to) - "Convert IS 13194 characters in region to Devanagari basic characters." + "Convert IS 13194 characters in region to Devanagari basic characters. +When called from a program, expects two arguments, +positions (integers or markers) specifying the region." (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) -; (while (re-search-forward "\\cd" nil t) - (while (re-search-forward "." nil t) - (let* ((devanagari-char (indian-to-devanagari (preceding-char)))) - (delete-char -1) - (insert devanagari-char))))) + (save-excursion + (goto-char from) + (while (< (point) to) + (let ((char (following-char))) + (if (eq (char-charset char) 'indian-is13194) + (progn + (delete-char 1) + (insert (indian-to-devanagari char))) + (forward-char 1)))))) ;;;###autoload (defun devanagari-to-indian-region (from to) - "Convert Devanagari basic characters in region to Indian characters." + "Convert Devanagari basic characters in region to Indian characters. +When called from a program, expects two arguments, +positions (integers or markers) specifying the region." (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) -; (while (re-search-forward "\\cD" nil t) ; Devanagari Character Code. - (while (re-search-forward "." nil t) - (let* ((indian-char (devanagari-to-indian (preceding-char)))) - (delete-char -1) - (insert indian-char))))) + (save-excursion + (goto-char from) + (while (< (point) to) + (let ((char (following-char))) + (if (eq (char-charset char) 'indian-2-column) + (progn + (delete-char -1) + (insert (devanagari-to-indian char))) + (forward-char 1)))))) ;;;###autoload -(defun indian-to-devanagari-string (str) - "Convert Indian String to Devanagari Basic Character String." - (let* ((len (length str)) +(defun indian-to-devanagari-string (string) + "Convert Indian characters in STRING to Devanagari Basic characters." + (let* ((len (length string)) (i 0) (vec (make-vector len 0))) (while (< i len) - (aset vec i (indian-to-devanagari (aref str i))) + (aset vec i (indian-to-devanagari (aref string i))) (setq i (1+ i))) (concat vec))) @@ -256,12 +262,12 @@ ;; Finally, convert 2-column glyphs to 1-column glyph ;; if such a glyph exist. ;; -;; => $(6![(B (ml.mr) $(6!X(B / $(6!D(B (ml.mr) $(6"F(B (mr ml) $(6!\(B +;; => $(6!X(B (ml.mr) $(6![(B / $(6!D(B (ml.mr) $(6"F(B (mr ml) $(6!\(B ;; ;; Compose the glyph. ;; -;; => 2$(6!X@![(B1/2$(6!D@"FP!\(B1 -;; => 2$(6!X@![(B12$(6!D@"FP!\(B1 +;; => 4$(6!Xt%![0!X![1(B/4$(6!Dt%"Fv#!\0!D"F!\1(B +;; => 4$(6!Xt%![0!X![14!Dt%"Fv#!\0!D"F!\1(B ;; ;; @@ -269,7 +275,7 @@ ;; ;; ;; IMPORTANT: -;; There may be many rules which you many want to be suppressed. +;; There may be many rules that you many want to suppress. ;; In that case, please comment out that rule. ;; ;; RULES WILL BE EVALUATED FROM FIRST TO LAST. @@ -277,7 +283,7 @@ ;; ;; TO DO: ;; Prepare multiple specific list of rules for each languages -;; which adopts Devanagari script. +;; that adopt Devanagari script. ;; (defconst devanagari-char-to-glyph-rules @@ -558,20 +564,18 @@ ;; glyphs-to-characters conversion. ;; -(defun max-match-len (regexp-str) - "Return the possible length of matched string of given regexp. -Only [...] pattern of regexp is recognized. -The last character of inside of [....] is used for its length." - (let ((dest-str regexp-str)) - (while (string-match "\\[\\([^\]]\\)+\\]" dest-str) - (setq dest-str - (concat (substring dest-str 0 (match-beginning 0)) - (substring dest-str (match-beginning 1) (match-end 1)) - (substring dest-str (match-end 0))))) - (length dest-str))) +(defun max-match-len (regexp) + "Return the maximum length of text that can match the pattern REGEXP. +Only [...] pattern of regexp is recognized." + (let ((len 0) + (index 0)) + (while (string-match "\\[\\([^\]]\\)+\\]" regexp index) + (setq len (+ len (- (match-beginning 0) index) 1) + index (match-end 0))) + len)) -;; Return t iff LIST1 and LIST2 has a same member. -(defun rule-intersection (list1 list2) +;; Return t iff at least one member appears in both LIST1 and LIST2. +(defun intersecting-p (list1 list2) (let ((found nil)) (while (and list1 (not found)) (if (memq (car list1) list2) @@ -579,28 +583,32 @@ (setq list1 (cdr list1)))) found)) -(defun string-conversion-by-rule (src-str symbol &rest specs) - "Convert string SRC-STR to a new string according to -the rules described in the each character's SYMBOL property. The -rules are described in the forms of '((regexp str <specs>) ...), and -the character sequence in the string which matches to 'regexp' are -replaced with str. If SPECS are not specified, only rules with no -<specs> would be applied. If SPECS are specified, then rules with no -<specs> specified and rules with <spec> matches with SPECS would be -applied. Rules are tested in the order of the list, thus more -specific rules should be placed in front of less important rules. No -composite character is supported, thus such must be converted by -decompose-char before applying to this function. If rule is given in -the forms of regexp '...\\(...\\)...', then inside the parenthesis is -the subject of the match. Otherwise, the entire expression is the -subject of the match." +(defun string-conversion-by-rule (source symbol &rest specs) + "Convert string SOURCE by rules stored in SYMBOL property of each character. +The remaining arguments forms a list SPECS that restricts applicable rules. + +The rules has the form ((REGEXP STR RULE-SPEC ...) ...). +Each character sequence in STRING that matches REGEXP is +replaced by STR. + +If SPECS is nil, only rules with no RULE-SPECs is applied. Otherwise +rules with no RULE-SPECS and rules that have at least one member of +SPECS in RULE-SPECs is applied. + +Rules are tested in the order of the list, thus more specific rules +should be placed in front of less specific rules. + +If rule is given in the forms of regexp '...\\(...\\)...', a character +sequence that matches the pattern inside of the parenthesis is the +subject of the match. Otherwise, the entire expression is the subject +of the match." (let ((pos 0) (dst-str "")) - (while (< pos (length src-str)) + (while (< pos (length source)) (let ((found nil) (rules (get-char-code-property (string-to-char - (substring src-str pos)) symbol))) + (substring source pos)) symbol))) (while rules (let* ((rule (car rules)) (regexp (car rule)) @@ -608,7 +616,7 @@ (rule-specs (cdr (cdr rule))) search-pos) (if (not (or (null rule-specs) - (rule-intersection specs rule-specs))) + (intersecting-p specs rule-specs))) (setq rules (cdr rules)) (if (null (string-match "\\\\(.+\\\\)" regexp)) (progn @@ -619,7 +627,7 @@ (string-match "^[^\\\\]*" regexp) (match-end 0)))))) (if (< search-pos 0) (setq search-pos 0)) - (if (string-match regexp src-str search-pos) + (if (string-match regexp source search-pos) (if (= (match-beginning 1) pos) (progn (setq dst-str (concat dst-str replace-str)) @@ -631,7 +639,7 @@ (setq rules (cdr rules)))))) ;; proceed to next position (if (not found) - (setq dst-str (concat dst-str (substring src-str pos (1+ pos))) + (setq dst-str (concat dst-str (substring source pos (1+ pos))) pos (1+ pos))))) dst-str)) @@ -641,12 +649,12 @@ ;; ;;;###autoload -(defun char-to-glyph-devanagari (src-str &rest langs) - "Convert Devanagari characters in the string to Devanagari glyphs. +(defun char-to-glyph-devanagari (string &rest langs) + "Convert Devanagari characters in STRING to Devanagari glyphs. Ligatures and special rules are processed." (apply 'string-conversion-by-rule - (append (list src-str 'char-to-glyph) langs))) + (append (list string 'char-to-glyph) langs))) ;; Example: ;;(char-to-glyph-devanagari "$(5!X![!F!h!D!\(B") => "$(5!X!["F!D!\(B" @@ -656,7 +664,7 @@ ;; Phase 2: Compose Glyphs to form One Glyph. ;; -;; Each list consist of glyph, application-priority and application-direction. +;; Each list consists of glyph, application-priority and application-direction. ;; ;; Glyphs will be ordered from low priority number to high priority number. ;; If application-priority is omitted, it is assumed to be 0. @@ -1044,21 +1052,22 @@ ;; Determine composition priority and rule of the array of Glyphs. ;; Sort the glyphs with their priority. -(defun devanagari-reorder-glyphs-for-composition (glyph-alist) - (let* ((pos 0) - (ordered-glyphs '())) - (while (< pos (length glyph-alist)) - (let* ((glyph (aref glyph-alist pos))) +(defun devanagari-reorder-glyphs-for-composition (string start end) + (let ((pos start) + (ordered-glyphs nil)) + (while (< pos end) + (let ((glyph (aref string pos))) (setq pos (1+ pos)) (setq ordered-glyphs - (append ordered-glyphs (list (assq glyph devanagari-composition-rules)))))) + (append ordered-glyphs + (list (assq glyph devanagari-composition-rules)))))) (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y))))))) -;;(devanagari-compose-to-one-glyph "$(5"5!X![(B") => "2$(6!XP"5@![(B1" +! ;;(devanagari-compose-to-one-glyph "$(5"5!X![(B") => "4$(6!Xv#"5t%![0!X"5![1(B" (defun devanagari-compose-to-one-glyph (devanagari-string) (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition - (string-to-vector devanagari-string))) + devanagari-string 0 (length devanagari-string))) ;; List of glyphs to be composed. (cmp-glyph-list (list (car (car o-glyph-list)))) (o-glyph-list (cdr o-glyph-list))) @@ -1077,11 +1086,31 @@ (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list)) (apply 'compose-chars cmp-glyph-list)))) +(defun devanagari-composition-component (string &optional start end) + (or start (setq start 0)) + (or end (setq end (length string))) + (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition + string start end)) + ;; List of glyphs to be composed. + (cmp-glyph-list (list (car (car o-glyph-list))))) + (setq o-glyph-list (cdr o-glyph-list)) + (while o-glyph-list + (let* ((o-glyph (car o-glyph-list)) + (glyph (if (< 2 (length o-glyph)) + ;; default composition + (list (car (cdr (cdr o-glyph))) (car o-glyph)) + ;; composition with a specified rule + (list '(mr . ml) (car o-glyph))))) + (setq o-glyph-list (cdr o-glyph-list)) + (setq cmp-glyph-list (append cmp-glyph-list glyph)))) + ;; Convert glyphs to 1-column width if possible. + (devanagari-wide-to-narrow cmp-glyph-list))) + ;; Utility function for Phase 2.5 -;; Check whether given glyph is a Devanagari vertical modifier or not. + +;; Check whether GLYPH is a Devanagari vertical modifier or not. ;; If it is a vertical modifier, whether it should be 1-column shape or not ;; depends on previous non-vertical modifier. - ; return nil if it is not vertical modifier. (defun devanagari-vertical-modifier-p (glyph) (string-match (char-to-string glyph) "[$(5!"!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M(B]")) @@ -1092,12 +1121,13 @@ "[$(5![(B]")) (defun devanagari-wide-to-narrow-char (char) - "Return the corresponding narrow character if it exists." + "Convert Devanagari character CHAR to the corresponding narrow character. +If there's no corresponding narrow character, return CHAR as is." (let ((narrow (cdr (assq char devanagari-1-column-char)))) - (if narrow narrow char))) + (or narrow char))) ;; -;; Phase 2.5 Convert Appropriate Character to 1-column shape. +;; Phase 2.5 Convert appropriate character to 1-column shape. ;; ;; This is temporary and should be removed out when Emacs supports ;; variable width characters. @@ -1121,7 +1151,8 @@ (cond ((null src-list) '()) ; not glyph code ((not (numberp glyph)) - (cons glyph (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))) + (cons glyph + (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))) ; glyphs to be processed regardless of the value of "2-col-glyph" ((devanagari-non-vertical-modifier-p glyph) (cons (devanagari-wide-to-narrow-char glyph) @@ -1132,7 +1163,8 @@ (cons glyph (devanagari-wide-to-narrow-iter (cdr src-list) t)) (cons (devanagari-wide-to-narrow-char glyph) - (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))) + (devanagari-wide-to-narrow-iter (cdr src-list) + 2-col-glyph)))) ; normal glyph (t (if (cdr (assq glyph devanagari-1-column-char)) @@ -1147,65 +1179,18 @@ ;; ;; -;; Decomposition of composite font. +;; Decomposition of composite sequence. ;; -(defun devanagari-normalize-narrow-glyph (charlist) - (let ((wide-char (car (rassoc (car charlist) devanagari-1-column-char)))) - (if (null charlist) nil - (cons (if (null wide-char) (car charlist) wide-char) - (devanagari-normalize-narrow-glyph (cdr charlist)))))) - -(defvar devanagari-decomposition-rules - '( - (?$(5"p(B -10) - ) - ) - -(defun devanagari-reorder-glyphs-for-decomposition (glyphlist) - "This function re-orders glyph list for decomposition." - (sort glyphlist - '(lambda (x y) - (let ((xx (nth 1 (assoc x devanagari-decomposition-rules))) - (yy (nth 1 (assoc y devanagari-decomposition-rules)))) - (if (null xx) (setq xx 0)) - (if (null yy) (setq yy 0)) - (< xx yy))))) - -(defun devanagari-decompose-char (glyph) - "This function decomposes one Devanagari composite glyph to - basic Devanagari characters as a string." - (let ((glyphlist - (if (eq (car (split-char glyph)) 'composition) - (string-to-list (decompose-composite-char glyph)) - (list glyph)))) - (setq glyphlist (devanagari-normalize-narrow-glyph glyphlist)) - (setq glyphlist (devanagari-reorder-glyphs-for-decomposition glyphlist)) - (string-conversion-by-rule - (mapconcat 'char-to-string glyphlist "") 'glyph-to-char))) - ;;;###autoload (defun devanagari-decompose-string (str) - "Decompose Devanagari glyph string STR to basic Devanagari character string." - (let ((len (length str)) - (i 0) - (dst "")) - (while (< i len) - (setq dst (concat dst (devanagari-decompose-char (aref str i))) - i (1+ i))) - dst)) + "Decompose Devanagari string STR" + (decompose-string (copy-sequence str))) ;;;###autoload (defun devanagari-decompose-region (from to) (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) - (while (re-search-forward "." nil t) - (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) - (decmps (devanagari-decompose-string (buffer-substring match-b match-e)))) - (delete-char -1) - (insert decmps))))) + (decompose-region from to)) ;;; ;;; Composition @@ -1213,37 +1198,34 @@ ;;;###autoload (defun devanagari-compose-string (str &rest langs) - (let ((len (length str)) - (src (devanagari-decompose-string str)) (dst "") rest match-b match-e) - (while (string-match devanagari-composite-glyph-unit src) - (setq match-b (match-beginning 0) match-e (match-end 0)) - (setq dst - (concat dst - (substring src 0 match-b) - (devanagari-compose-to-one-glyph - (apply - 'char-to-glyph-devanagari - (cons (substring src match-b match-e) - langs))))) - (setq src (substring src match-e))) - (setq dst (concat dst src)) - dst)) + (setq str (copy-sequence str)) + (let ((idx 0) + rest match-b match-e) + (while (string-match devanagari-composite-glyph-unit str idx) + (let* ((match-b (match-beginning 0)) + (match-e (match-end 0)) + (cmps (devanagari-composition-component + (apply + 'char-to-glyph-devanagari + (cons (substring str match-b match-e) langs))))) + (compose-string str match-b match-e cmps) + (setq idx match-e)))) + str) ;;;###autoload (defun devanagari-compose-region (from to &rest langs) (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) - (while (re-search-forward devanagari-composite-glyph-unit nil t) - (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) - (cmps (devanagari-compose-to-one-glyph - (apply - 'char-to-glyph-devanagari - (cons (buffer-substring match-b match-e) - langs))))) - (delete-region match-b match-e) - (insert cmps))))) + (save-excursion + (save-restriction + (narrow-to-region from to) + (goto-char (point-min)) + (while (re-search-forward devanagari-composite-glyph-unit nil t) + (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) + (cmps (devanagari-composition-component + (apply + 'char-to-glyph-devanagari + (cons (buffer-substring match-b match-e) langs))))) + (compose-region match-b match-e cmps)))))) ;; For pre-write and post-read conversion @@ -1260,20 +1242,18 @@ ;;;###autoload (defun in-is13194-devanagari-post-read-conversion (len) - (let ((pos (point)) - (buffer-modified-p (buffer-modified-p))) - (prog1 - (devanagari-compose-from-is13194-region pos (+ pos len)) - (set-buffer-modified-p buffer-modified-p)))) + (let ((pos (point))) + (devanagari-compose-from-is13194-region pos (+ pos len)))) ;;;###autoload (defun devanagari-decompose-to-is13194-region (from to) "Decompose Devanagari characters in the region to IS 13194 characters." (interactive "r") - (save-restriction - (narrow-to-region from to) - (devanagari-decompose-region (point-min) (point-max)) - (devanagari-to-indian-region (point-min) (point-max)))) + (save-excursion + (save-restriction + (narrow-to-region from to) + (devanagari-decompose-region (point-min) (point-max)) + (devanagari-to-indian-region (point-min) (point-max))))) ;;;###autoload (defun in-is13194-devanagari-pre-write-conversion (from to) @@ -1304,8 +1284,6 @@ (indian-decode-itrans-region (point-min) (point-max)) (devanagari-compose-from-is13194-region (point-min) (point-max)))) -;; Test comment. - ;; (provide 'devan-util)