comparison lisp/language/devan-util.el @ 89483:2f877ed80fa6

*** empty log message ***
author Kenichi Handa <handa@m17n.org>
date Mon, 08 Sep 2003 12:53:41 +0000
parents 375f2633d815 ea8374ccb41f
children 68c22ea6027c
comparison
equal deleted inserted replaced
88123:375f2633d815 89483:2f877ed80fa6
1 ;;; devan-util.el --- Support for composing Devanagari characters 1 ;;; devan-util.el --- Support for composing Devanagari characters -*-coding: iso-2022-7bit;-*-
2 2
3 ;; Copyright (C) 2001 Free Software Foundation, Inc. 3 ;; Copyright (C) 2001 Free Software Foundation, Inc.
4 4
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org> 5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org>
6 ;; Keywords: multilingual, Devanagari 6 ;; Keywords: multilingual, Devanagari
28 28
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and 29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and
30 ;; composition of Devanagari script characters. 30 ;; composition of Devanagari script characters.
31 31
32 ;;; Code: 32 ;;; Code:
33
34 ;;;###autoload
35 33
36 ;; Devanagari Composable Pattern 34 ;; Devanagari Composable Pattern
37 ;; C .. Consonants 35 ;; C .. Consonants
38 ;; V .. Vowel 36 ;; V .. Vowel
39 ;; H .. Halant 37 ;; H .. Halant
49 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? 47 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)?
50 48
51 (defconst devanagari-consonant 49 (defconst devanagari-consonant
52 "[$,15U(B-$,15y68(B-$,16?(B]") 50 "[$,15U(B-$,15y68(B-$,16?(B]")
53 51
52 ;;("$,16B(B" . nil)
53 ;;("$,16A(B" . nil)
54 ;;("$,16C(B" . nil)
55
56
54 (defconst devanagari-composable-pattern 57 (defconst devanagari-composable-pattern
55 (concat 58 (concat
56 "\\([$,15E(B-$,15T6@6A(B][$,15A5B(B]?\\)\\|[$,15C6D(B]" 59 "\\([$,15E(B-$,15T6@6A(B][$,15A5B(B]?\\)\\|[$,15C6D(B]"
57 "\\|\\(" 60 "\\|\\("
58 "\\(?:\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?" 61 "\\(?:\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?"
59 "[$,15U(B-$,15y68(B-$,16?(B]\\(?:$,16-(B\\|[$,15~(B-$,16-6B6C(B]?[$,15B5A(B]?\\)?" 62 "[$,15U(B-$,15y68(B-$,16?(B]\\(?:$,16-(B\\|[$,15~(B-$,16-6B6C(B]?[$,15B5A(B]?\\)?"
60 "\\)") 63 "\\)")
61 "Regexp matching a composable sequence of Devanagari characters.") 64 "Regexp matching a composable sequence of Devanagari characters.")
62 65
66 (dolist (range '((#x0903 . #x0903)
67 (#x0905 . #x0939)
68 (#x0958 . #x0961)))
69 (set-char-table-range indian-composable-pattern range
70 devanagari-composable-pattern))
71
72 ;;;###autoload
63 (defun devanagari-compose-region (from to) 73 (defun devanagari-compose-region (from to)
64 (interactive "r") 74 (interactive "r")
65 (save-excursion 75 (save-excursion
66 (save-restriction 76 (save-restriction
67 (narrow-to-region from to) 77 (narrow-to-region from to)
68 (goto-char (point-min)) 78 (goto-char (point-min))
69 (while (re-search-forward devanagari-composable-pattern nil t) 79 (while (re-search-forward devanagari-composable-pattern nil t)
70 (devanagari-compose-syllable-region (match-beginning 0) 80 (devanagari-compose-syllable-region (match-beginning 0)
71 (match-end 0)))))) 81 (match-end 0))))))
72 (defun devanagari-compose-string (string) 82 (defun devanagari-compose-string (string)
73 (with-temp-buffer 83 (with-temp-buffer
74 (insert (decompose-string string)) 84 (insert (decompose-string string))
75 (devanagari-compose-region (point-min) (point-max)) 85 (devanagari-compose-region (point-min) (point-max))
76 (buffer-string))) 86 (buffer-string)))
81 (let ((buffer-modified-p (buffer-modified-p))) 91 (let ((buffer-modified-p (buffer-modified-p)))
82 (narrow-to-region (point) (+ (point) len)) 92 (narrow-to-region (point) (+ (point) len))
83 (devanagari-compose-region (point-min) (point-max)) 93 (devanagari-compose-region (point-min) (point-max))
84 (set-buffer-modified-p buffer-modified-p) 94 (set-buffer-modified-p buffer-modified-p)
85 (- (point-max) (point-min)))))) 95 (- (point-max) (point-min))))))
86
87 (defun devanagari-range (from to)
88 "Make the list of the integers of range FROM to TO."
89 (let (result)
90 (while (<= from to) (setq result (cons to result) to (1- to))) result))
91 96
92 (defun devanagari-regexp-of-hashtbl-keys (hashtbl) 97 (defun devanagari-regexp-of-hashtbl-keys (hashtbl)
93 "Return a regular expression that matches all keys in hashtable HASHTBL." 98 "Return a regular expression that matches all keys in hashtable HASHTBL."
94 (let ((max-specpdl-size 1000)) 99 (let ((max-specpdl-size 1000))
95 (regexp-opt 100 (regexp-opt
97 (let (dummy) 102 (let (dummy)
98 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) 103 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl)
99 dummy) 104 dummy)
100 (function (lambda (x y) (> (length x) (length y)))))))) 105 (function (lambda (x y) (> (length x) (length y))))))))
101 106
102 (defun devanagari-composition-function (from to pattern &optional string) 107 ;;;###autoload
103 "Compose Devanagari characters in REGION, or STRING if specified. 108 (defun devanagari-composition-function (pos &optional string)
104 Assume that the REGION or STRING must fully match the composable 109 "Compose Devanagari characters after the position POS.
105 PATTERN regexp." 110 If STRING is not nil, it is a string, and POS is an index to the string.
106 (if string (devanagari-compose-syllable-string string) 111 In this case, compose characters after POS of the string."
107 (devanagari-compose-syllable-region from to)) 112 (if string
108 (- to from)) 113 ;; Not yet implemented.
109 114 nil
110 ;; Register a function to compose Devanagari characters. 115 (goto-char pos)
111 (mapc 116 (if (looking-at devanagari-composable-pattern)
112 (function (lambda (ucs) 117 (prog1 (match-end 0)
113 (aset composition-function-table (decode-char 'ucs ucs) 118 (devanagari-compose-syllable-region pos (match-end 0))))))
114 (list (cons devanagari-composable-pattern
115 'devanagari-composition-function)))))
116 (nconc '(#x0903) (devanagari-range #x0905 #x0939) (devanagari-range #x0958 #x0961)))
117 119
118 ;; Notes on conversion steps. 120 ;; Notes on conversion steps.
119 121
120 ;; 1. chars to glyphs 122 ;; 1. chars to glyphs
121 ;; 123 ;;
488 dev-glyph-glyph-2) 490 dev-glyph-glyph-2)
489 hash)) 491 hash))
490 (defvar dev-glyph-glyph-2-regexp 492 (defvar dev-glyph-glyph-2-regexp
491 (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-2-hash)) 493 (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-2-hash))
492 494
493
494 (defun dev-charseq (from &optional to) 495 (defun dev-charseq (from &optional to)
495 (if (null to) (setq to from)) 496 (if (null to) (setq to from))
496 (mapcar (function (lambda (x) (indian-glyph-char x 'devanagari))) 497 (number-sequence (decode-char 'devanagari-cdac from)
497 (devanagari-range from to))) 498 (decode-char 'devanagari-cdac to)))
498 499
499 (defvar dev-glyph-cvn 500 (defvar dev-glyph-cvn
500 (append 501 (append
501 (dev-charseq #x2b) 502 (dev-charseq #x2b)
502 (dev-charseq #x3c #xc1) 503 (dev-charseq #x3c #xc1)
562 (buffer-string))) 563 (buffer-string)))
563 564
564 (defun devanagari-compose-syllable-region (from to) 565 (defun devanagari-compose-syllable-region (from to)
565 "Compose devanagari syllable in region FROM to TO." 566 "Compose devanagari syllable in region FROM to TO."
566 (let ((glyph-str nil) (cons-num 0) glyph-str-list 567 (let ((glyph-str nil) (cons-num 0) glyph-str-list
567 (last-halant nil) (preceding-r nil) (last-modifier nil) 568 (last-halant nil) (preceding-r nil) (last-modifier nil)
568 (last-char (char-before to)) match-str 569 (last-char (char-before to)) match-str
569 glyph-block split-pos) 570 glyph-block split-pos)
570 (save-excursion 571 (save-excursion
571 (save-restriction 572 (save-restriction
572 ;;; *** char-to-glyph conversion *** 573 ;;; *** char-to-glyph conversion ***
573 ;; Special rule 1. -- Last halant must be preserved. 574 ;; Special rule 1. -- Last halant must be preserved.
574 (if (eq last-char ?$,16-(B) 575 (if (eq last-char ?$,16-(B)
575 (progn 576 (progn
576 (setq last-halant t) 577 (setq last-halant t)
577 (narrow-to-region from (1- to))) 578 (narrow-to-region from (1- to)))
578 (narrow-to-region from to) 579 (narrow-to-region from to)
579 ;; note if the last char is modifier. 580 ;; note if the last char is modifier.
580 (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B)) 581 (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B))
581 (setq last-modifier t))) 582 (setq last-modifier t)))
582 (goto-char (point-min)) 583 (goto-char (point-min))
583 ;; Special rule 2. -- preceding "r halant" must be modifier. 584 ;; Special rule 2. -- preceding "r halant" must be modifier.
584 (when (looking-at "$,15p6-(B.") 585 (when (looking-at "$,15p6-(B.")
585 (setq preceding-r t) 586 (setq preceding-r t)
586 (goto-char (+ 2 (point)))) 587 (goto-char (+ 2 (point))))
587 ;; translate the rest characters into glyphs 588 ;; translate the rest characters into glyphs
588 (while (re-search-forward dev-char-glyph-regexp nil t) 589 (while (not (eobp))
589 (setq match-str (match-string 0)) 590 (if (looking-at dev-char-glyph-regexp)
590 (setq glyph-str 591 (let ((end (match-end 0)))
591 (concat glyph-str 592 (setq match-str (match-string 0)
592 (gethash match-str dev-char-glyph-hash))) 593 glyph-str
593 ;; count the number of consonant-glyhs. 594 (concat glyph-str
594 (if (string-match devanagari-consonant match-str) 595 (gethash match-str dev-char-glyph-hash)))
595 (setq cons-num (1+ cons-num)))) 596 ;; count the number of consonant-glyhs.
596 ;; preceding-r must be attached before the anuswar if exists. 597 (if (string-match devanagari-consonant match-str)
597 (if preceding-r 598 (setq cons-num (1+ cons-num)))
598 (if last-modifier 599 (goto-char end))
599 (setq glyph-str (concat (substring glyph-str 0 -1) 600 (setq glyph-str (concat glyph-str (string (following-char))))
600 "$,4"'(B" (substring glyph-str -1))) 601 (forward-char 1)))
601 (setq glyph-str (concat glyph-str "$,4"'(B")))) 602 ;; preceding-r must be attached before the anuswar if exists.
602 (if last-halant (setq glyph-str (concat glyph-str "$,4""(B"))) 603 (if preceding-r
603 ;;; *** glyph-to-glyph conversion *** 604 (if last-modifier
604 (when (string-match dev-glyph-glyph-regexp glyph-str) 605 (setq glyph-str (concat (substring glyph-str 0 -1)
605 (setq glyph-str 606 "$,4"'(B" (substring glyph-str -1)))
606 (replace-match (gethash (match-string 0 glyph-str) 607 (setq glyph-str (concat glyph-str "$,4"'(B"))))
607 dev-glyph-glyph-hash) 608 (if last-halant (setq glyph-str (concat glyph-str "$,4""(B")))
608 nil t glyph-str)) 609 ;;; *** glyph-to-glyph conversion ***
609 (if (and (> cons-num 1) 610 (when (string-match dev-glyph-glyph-regexp glyph-str)
610 (string-match dev-glyph-glyph-2-regexp glyph-str)) 611 (setq glyph-str
611 (setq glyph-str 612 (replace-match (gethash (match-string 0 glyph-str)
612 (replace-match (gethash (match-string 0 glyph-str) 613 dev-glyph-glyph-hash)
613 dev-glyph-glyph-2-hash) 614 nil t glyph-str))
614 nil t glyph-str)))) 615 (if (and (> cons-num 1)
615 ;;; *** glyph reordering *** 616 (string-match dev-glyph-glyph-2-regexp glyph-str))
616 (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str)) 617 (setq glyph-str
617 (setq glyph-block (substring glyph-str 0 (1+ split-pos))) 618 (replace-match (gethash (match-string 0 glyph-str)
618 (setq glyph-str (substring glyph-str (1+ split-pos))) 619 dev-glyph-glyph-2-hash)
619 (setq 620 nil t glyph-str))))
620 glyph-block 621 ;;; *** glyph reordering ***
621 (if (string-match dev-glyph-right-modifier-regexp glyph-block) 622 (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str))
622 (sort (string-to-list glyph-block) 623 (setq glyph-block (substring glyph-str 0 (1+ split-pos)))
623 (function (lambda (x y) 624 (setq glyph-str (substring glyph-str (1+ split-pos)))
624 (< (get-char-code-property x 'composition-order) 625 (setq
625 (get-char-code-property y 'composition-order))))) 626 glyph-block
626 (sort (string-to-list glyph-block) 627 (if (string-match dev-glyph-right-modifier-regexp glyph-block)
627 (function (lambda (x y) 628 (sort (string-to-list glyph-block)
628 (let ((xo (get-char-code-property x 'composition-order)) 629 (function (lambda (x y)
629 (yo (get-char-code-property y 'composition-order))) 630 (< (get-char-code-property x 'composition-order)
630 (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) 631 (get-char-code-property y 'composition-order)))))
631 (setq glyph-str-list (nconc glyph-str-list glyph-block))) 632 (sort (string-to-list glyph-block)
632 ;; concatenate and attach reference-points. 633 (function (lambda (x y)
633 (setq glyph-str 634 (let ((xo (get-char-code-property x 'composition-order))
634 (cdr 635 (yo (get-char-code-property y 'composition-order)))
635 (apply 636 (if (= xo 2) nil (if (= yo 2) t (< xo yo)))))))))
636 'nconc 637 (setq glyph-str-list (nconc glyph-str-list glyph-block)))
637 (mapcar 638 ;; concatenate and attach reference-points.
638 (function (lambda (x) 639 (setq glyph-str
639 (list 640 (cdr
640 (or (get-char-code-property x 'reference-point) 641 (apply
641 '(5 . 3) ;; default reference point. 642 'nconc
642 ) 643 (mapcar
643 x))) 644 (function (lambda (x)
644 glyph-str-list)))))) 645 (list
646 (or (get-char-code-property x 'reference-point)
647 '(5 . 3) ;; default reference point.
648 )
649 x)))
650 glyph-str-list))))))
645 (compose-region from to glyph-str))) 651 (compose-region from to glyph-str)))
646 652
647 (provide 'devan-util) 653 (provide 'devan-util)
648 654
649 ;;; devan-util.el ends here 655 ;;; devan-util.el ends here