Mercurial > emacs
comparison lisp/language/devan-util.el @ 89483:2f877ed80fa6
*** empty log message ***
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Mon, 08 Sep 2003 12:53:41 +0000 |
parents | 375f2633d815 ea8374ccb41f |
children | 68c22ea6027c |
comparison
equal
deleted
inserted
replaced
88123:375f2633d815 | 89483:2f877ed80fa6 |
---|---|
1 ;;; devan-util.el --- Support for composing Devanagari characters | 1 ;;; devan-util.el --- Support for composing Devanagari characters -*-coding: iso-2022-7bit;-*- |
2 | 2 |
3 ;; Copyright (C) 2001 Free Software Foundation, Inc. | 3 ;; Copyright (C) 2001 Free Software Foundation, Inc. |
4 | 4 |
5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org> | 5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org> |
6 ;; Keywords: multilingual, Devanagari | 6 ;; Keywords: multilingual, Devanagari |
28 | 28 |
29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and | 29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and |
30 ;; composition of Devanagari script characters. | 30 ;; composition of Devanagari script characters. |
31 | 31 |
32 ;;; Code: | 32 ;;; Code: |
33 | |
34 ;;;###autoload | |
35 | 33 |
36 ;; Devanagari Composable Pattern | 34 ;; Devanagari Composable Pattern |
37 ;; C .. Consonants | 35 ;; C .. Consonants |
38 ;; V .. Vowel | 36 ;; V .. Vowel |
39 ;; H .. Halant | 37 ;; H .. Halant |
49 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? | 47 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? |
50 | 48 |
51 (defconst devanagari-consonant | 49 (defconst devanagari-consonant |
52 "[$,15U(B-$,15y68(B-$,16?(B]") | 50 "[$,15U(B-$,15y68(B-$,16?(B]") |
53 | 51 |
52 ;;("$,16B(B" . nil) | |
53 ;;("$,16A(B" . nil) | |
54 ;;("$,16C(B" . nil) | |
55 | |
56 | |
54 (defconst devanagari-composable-pattern | 57 (defconst devanagari-composable-pattern |
55 (concat | 58 (concat |
56 "\\([$,15E(B-$,15T6@6A(B][$,15A5B(B]?\\)\\|[$,15C6D(B]" | 59 "\\([$,15E(B-$,15T6@6A(B][$,15A5B(B]?\\)\\|[$,15C6D(B]" |
57 "\\|\\(" | 60 "\\|\\(" |
58 "\\(?:\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?" | 61 "\\(?:\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?" |
59 "[$,15U(B-$,15y68(B-$,16?(B]\\(?:$,16-(B\\|[$,15~(B-$,16-6B6C(B]?[$,15B5A(B]?\\)?" | 62 "[$,15U(B-$,15y68(B-$,16?(B]\\(?:$,16-(B\\|[$,15~(B-$,16-6B6C(B]?[$,15B5A(B]?\\)?" |
60 "\\)") | 63 "\\)") |
61 "Regexp matching a composable sequence of Devanagari characters.") | 64 "Regexp matching a composable sequence of Devanagari characters.") |
62 | 65 |
66 (dolist (range '((#x0903 . #x0903) | |
67 (#x0905 . #x0939) | |
68 (#x0958 . #x0961))) | |
69 (set-char-table-range indian-composable-pattern range | |
70 devanagari-composable-pattern)) | |
71 | |
72 ;;;###autoload | |
63 (defun devanagari-compose-region (from to) | 73 (defun devanagari-compose-region (from to) |
64 (interactive "r") | 74 (interactive "r") |
65 (save-excursion | 75 (save-excursion |
66 (save-restriction | 76 (save-restriction |
67 (narrow-to-region from to) | 77 (narrow-to-region from to) |
68 (goto-char (point-min)) | 78 (goto-char (point-min)) |
69 (while (re-search-forward devanagari-composable-pattern nil t) | 79 (while (re-search-forward devanagari-composable-pattern nil t) |
70 (devanagari-compose-syllable-region (match-beginning 0) | 80 (devanagari-compose-syllable-region (match-beginning 0) |
71 (match-end 0)))))) | 81 (match-end 0)))))) |
72 (defun devanagari-compose-string (string) | 82 (defun devanagari-compose-string (string) |
73 (with-temp-buffer | 83 (with-temp-buffer |
74 (insert (decompose-string string)) | 84 (insert (decompose-string string)) |
75 (devanagari-compose-region (point-min) (point-max)) | 85 (devanagari-compose-region (point-min) (point-max)) |
76 (buffer-string))) | 86 (buffer-string))) |
81 (let ((buffer-modified-p (buffer-modified-p))) | 91 (let ((buffer-modified-p (buffer-modified-p))) |
82 (narrow-to-region (point) (+ (point) len)) | 92 (narrow-to-region (point) (+ (point) len)) |
83 (devanagari-compose-region (point-min) (point-max)) | 93 (devanagari-compose-region (point-min) (point-max)) |
84 (set-buffer-modified-p buffer-modified-p) | 94 (set-buffer-modified-p buffer-modified-p) |
85 (- (point-max) (point-min)))))) | 95 (- (point-max) (point-min)))))) |
86 | |
87 (defun devanagari-range (from to) | |
88 "Make the list of the integers of range FROM to TO." | |
89 (let (result) | |
90 (while (<= from to) (setq result (cons to result) to (1- to))) result)) | |
91 | 96 |
92 (defun devanagari-regexp-of-hashtbl-keys (hashtbl) | 97 (defun devanagari-regexp-of-hashtbl-keys (hashtbl) |
93 "Return a regular expression that matches all keys in hashtable HASHTBL." | 98 "Return a regular expression that matches all keys in hashtable HASHTBL." |
94 (let ((max-specpdl-size 1000)) | 99 (let ((max-specpdl-size 1000)) |
95 (regexp-opt | 100 (regexp-opt |
97 (let (dummy) | 102 (let (dummy) |
98 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) | 103 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) |
99 dummy) | 104 dummy) |
100 (function (lambda (x y) (> (length x) (length y)))))))) | 105 (function (lambda (x y) (> (length x) (length y)))))))) |
101 | 106 |
102 (defun devanagari-composition-function (from to pattern &optional string) | 107 ;;;###autoload |
103 "Compose Devanagari characters in REGION, or STRING if specified. | 108 (defun devanagari-composition-function (pos &optional string) |
104 Assume that the REGION or STRING must fully match the composable | 109 "Compose Devanagari characters after the position POS. |
105 PATTERN regexp." | 110 If STRING is not nil, it is a string, and POS is an index to the string. |
106 (if string (devanagari-compose-syllable-string string) | 111 In this case, compose characters after POS of the string." |
107 (devanagari-compose-syllable-region from to)) | 112 (if string |
108 (- to from)) | 113 ;; Not yet implemented. |
109 | 114 nil |
110 ;; Register a function to compose Devanagari characters. | 115 (goto-char pos) |
111 (mapc | 116 (if (looking-at devanagari-composable-pattern) |
112 (function (lambda (ucs) | 117 (prog1 (match-end 0) |
113 (aset composition-function-table (decode-char 'ucs ucs) | 118 (devanagari-compose-syllable-region pos (match-end 0)))))) |
114 (list (cons devanagari-composable-pattern | |
115 'devanagari-composition-function))))) | |
116 (nconc '(#x0903) (devanagari-range #x0905 #x0939) (devanagari-range #x0958 #x0961))) | |
117 | 119 |
118 ;; Notes on conversion steps. | 120 ;; Notes on conversion steps. |
119 | 121 |
120 ;; 1. chars to glyphs | 122 ;; 1. chars to glyphs |
121 ;; | 123 ;; |
488 dev-glyph-glyph-2) | 490 dev-glyph-glyph-2) |
489 hash)) | 491 hash)) |
490 (defvar dev-glyph-glyph-2-regexp | 492 (defvar dev-glyph-glyph-2-regexp |
491 (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-2-hash)) | 493 (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-2-hash)) |
492 | 494 |
493 | |
494 (defun dev-charseq (from &optional to) | 495 (defun dev-charseq (from &optional to) |
495 (if (null to) (setq to from)) | 496 (if (null to) (setq to from)) |
496 (mapcar (function (lambda (x) (indian-glyph-char x 'devanagari))) | 497 (number-sequence (decode-char 'devanagari-cdac from) |
497 (devanagari-range from to))) | 498 (decode-char 'devanagari-cdac to))) |
498 | 499 |
499 (defvar dev-glyph-cvn | 500 (defvar dev-glyph-cvn |
500 (append | 501 (append |
501 (dev-charseq #x2b) | 502 (dev-charseq #x2b) |
502 (dev-charseq #x3c #xc1) | 503 (dev-charseq #x3c #xc1) |
562 (buffer-string))) | 563 (buffer-string))) |
563 | 564 |
564 (defun devanagari-compose-syllable-region (from to) | 565 (defun devanagari-compose-syllable-region (from to) |
565 "Compose devanagari syllable in region FROM to TO." | 566 "Compose devanagari syllable in region FROM to TO." |
566 (let ((glyph-str nil) (cons-num 0) glyph-str-list | 567 (let ((glyph-str nil) (cons-num 0) glyph-str-list |
567 (last-halant nil) (preceding-r nil) (last-modifier nil) | 568 (last-halant nil) (preceding-r nil) (last-modifier nil) |
568 (last-char (char-before to)) match-str | 569 (last-char (char-before to)) match-str |
569 glyph-block split-pos) | 570 glyph-block split-pos) |
570 (save-excursion | 571 (save-excursion |
571 (save-restriction | 572 (save-restriction |
572 ;;; *** char-to-glyph conversion *** | 573 ;;; *** char-to-glyph conversion *** |
573 ;; Special rule 1. -- Last halant must be preserved. | 574 ;; Special rule 1. -- Last halant must be preserved. |
574 (if (eq last-char ?$,16-(B) | 575 (if (eq last-char ?$,16-(B) |
575 (progn | 576 (progn |
576 (setq last-halant t) | 577 (setq last-halant t) |
577 (narrow-to-region from (1- to))) | 578 (narrow-to-region from (1- to))) |
578 (narrow-to-region from to) | 579 (narrow-to-region from to) |
579 ;; note if the last char is modifier. | 580 ;; note if the last char is modifier. |
580 (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B)) | 581 (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B)) |
581 (setq last-modifier t))) | 582 (setq last-modifier t))) |
582 (goto-char (point-min)) | 583 (goto-char (point-min)) |
583 ;; Special rule 2. -- preceding "r halant" must be modifier. | 584 ;; Special rule 2. -- preceding "r halant" must be modifier. |
584 (when (looking-at "$,15p6-(B.") | 585 (when (looking-at "$,15p6-(B.") |
585 (setq preceding-r t) | 586 (setq preceding-r t) |
586 (goto-char (+ 2 (point)))) | 587 (goto-char (+ 2 (point)))) |
587 ;; translate the rest characters into glyphs | 588 ;; translate the rest characters into glyphs |
588 (while (re-search-forward dev-char-glyph-regexp nil t) | 589 (while (not (eobp)) |
589 (setq match-str (match-string 0)) | 590 (if (looking-at dev-char-glyph-regexp) |
590 (setq glyph-str | 591 (let ((end (match-end 0))) |
591 (concat glyph-str | 592 (setq match-str (match-string 0) |
592 (gethash match-str dev-char-glyph-hash))) | 593 glyph-str |
593 ;; count the number of consonant-glyhs. | 594 (concat glyph-str |
594 (if (string-match devanagari-consonant match-str) | 595 (gethash match-str dev-char-glyph-hash))) |
595 (setq cons-num (1+ cons-num)))) | 596 ;; count the number of consonant-glyhs. |
596 ;; preceding-r must be attached before the anuswar if exists. | 597 (if (string-match devanagari-consonant match-str) |
597 (if preceding-r | 598 (setq cons-num (1+ cons-num))) |
598 (if last-modifier | 599 (goto-char end)) |
599 (setq glyph-str (concat (substring glyph-str 0 -1) | 600 (setq glyph-str (concat glyph-str (string (following-char)))) |
600 "$,4"'(B" (substring glyph-str -1))) | 601 (forward-char 1))) |
601 (setq glyph-str (concat glyph-str "$,4"'(B")))) | 602 ;; preceding-r must be attached before the anuswar if exists. |
602 (if last-halant (setq glyph-str (concat glyph-str "$,4""(B"))) | 603 (if preceding-r |
603 ;;; *** glyph-to-glyph conversion *** | 604 (if last-modifier |
604 (when (string-match dev-glyph-glyph-regexp glyph-str) | 605 (setq glyph-str (concat (substring glyph-str 0 -1) |
605 (setq glyph-str | 606 "$,4"'(B" (substring glyph-str -1))) |
606 (replace-match (gethash (match-string 0 glyph-str) | 607 (setq glyph-str (concat glyph-str "$,4"'(B")))) |
607 dev-glyph-glyph-hash) | 608 (if last-halant (setq glyph-str (concat glyph-str "$,4""(B"))) |
608 nil t glyph-str)) | 609 ;;; *** glyph-to-glyph conversion *** |
609 (if (and (> cons-num 1) | 610 (when (string-match dev-glyph-glyph-regexp glyph-str) |
610 (string-match dev-glyph-glyph-2-regexp glyph-str)) | 611 (setq glyph-str |
611 (setq glyph-str | 612 (replace-match (gethash (match-string 0 glyph-str) |
612 (replace-match (gethash (match-string 0 glyph-str) | 613 dev-glyph-glyph-hash) |
613 dev-glyph-glyph-2-hash) | 614 nil t glyph-str)) |
614 nil t glyph-str)))) | 615 (if (and (> cons-num 1) |
615 ;;; *** glyph reordering *** | 616 (string-match dev-glyph-glyph-2-regexp glyph-str)) |
616 (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str)) | 617 (setq glyph-str |
617 (setq glyph-block (substring glyph-str 0 (1+ split-pos))) | 618 (replace-match (gethash (match-string 0 glyph-str) |
618 (setq glyph-str (substring glyph-str (1+ split-pos))) | 619 dev-glyph-glyph-2-hash) |
619 (setq | 620 nil t glyph-str)))) |
620 glyph-block | 621 ;;; *** glyph reordering *** |
621 (if (string-match dev-glyph-right-modifier-regexp glyph-block) | 622 (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str)) |
622 (sort (string-to-list glyph-block) | 623 (setq glyph-block (substring glyph-str 0 (1+ split-pos))) |
623 (function (lambda (x y) | 624 (setq glyph-str (substring glyph-str (1+ split-pos))) |
624 (< (get-char-code-property x 'composition-order) | 625 (setq |
625 (get-char-code-property y 'composition-order))))) | 626 glyph-block |
626 (sort (string-to-list glyph-block) | 627 (if (string-match dev-glyph-right-modifier-regexp glyph-block) |
627 (function (lambda (x y) | 628 (sort (string-to-list glyph-block) |
628 (let ((xo (get-char-code-property x 'composition-order)) | 629 (function (lambda (x y) |
629 (yo (get-char-code-property y 'composition-order))) | 630 (< (get-char-code-property x 'composition-order) |
630 (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) | 631 (get-char-code-property y 'composition-order))))) |
631 (setq glyph-str-list (nconc glyph-str-list glyph-block))) | 632 (sort (string-to-list glyph-block) |
632 ;; concatenate and attach reference-points. | 633 (function (lambda (x y) |
633 (setq glyph-str | 634 (let ((xo (get-char-code-property x 'composition-order)) |
634 (cdr | 635 (yo (get-char-code-property y 'composition-order))) |
635 (apply | 636 (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) |
636 'nconc | 637 (setq glyph-str-list (nconc glyph-str-list glyph-block))) |
637 (mapcar | 638 ;; concatenate and attach reference-points. |
638 (function (lambda (x) | 639 (setq glyph-str |
639 (list | 640 (cdr |
640 (or (get-char-code-property x 'reference-point) | 641 (apply |
641 '(5 . 3) ;; default reference point. | 642 'nconc |
642 ) | 643 (mapcar |
643 x))) | 644 (function (lambda (x) |
644 glyph-str-list)))))) | 645 (list |
646 (or (get-char-code-property x 'reference-point) | |
647 '(5 . 3) ;; default reference point. | |
648 ) | |
649 x))) | |
650 glyph-str-list)))))) | |
645 (compose-region from to glyph-str))) | 651 (compose-region from to glyph-str))) |
646 | 652 |
647 (provide 'devan-util) | 653 (provide 'devan-util) |
648 | 654 |
649 ;;; devan-util.el ends here | 655 ;;; devan-util.el ends here |