Mercurial > emacs
changeset 41469:f091948e4d1c
Completely re-written.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Sun, 25 Nov 2001 02:40:11 +0000 |
parents | d566b1e9a5bb |
children | a43bf477cba7 |
files | lisp/language/devan-util.el lisp/language/devanagari.el lisp/language/indian.el |
diffstat | 3 files changed, 684 insertions(+), 1989 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/language/devan-util.el Sun Nov 25 02:36:28 2001 +0000 +++ b/lisp/language/devan-util.el Sun Nov 25 02:40:11 2001 +0000 @@ -1,10 +1,9 @@ -;;; devan-util.el --- support for Devanagari Script Composition - -;; Copyright (C) 1996, 2001 Free Software Foundation, Inc. +;;; devan-util.el --- Support for composing Devanagari characters -;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> +;; Copyright (C) 2001 Free Software Foundation, Inc. -;; Keywords: multilingual, Indian, Devanagari +;; Maintainer: KAWABATA, Taichi <batta@beige.ocn.ne.jp> +;; Keywords: multilingual, Devanagari ;; This file is part of GNU Emacs. @@ -23,1266 +22,616 @@ ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, ;; Boston, MA 02111-1307, USA. +;; Created: Feb. 17. 2001 + ;;; Commentary: -;; History: -;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> -;; 1997.3.24 fixed some bugs. - -;; Future work :: -;; Decompose the input characters and process them on the character basis. - -;; Devanagari script composition rules and related programs. +;; This file provides character(Unicode) to glyph(CDAC) conversion and +;; composition of Devanagari script characters. ;;; Code: -;;; -;;; Steps toward composition of Devanagari Characters. -;;; - -;;; Basic functions. - ;;;###autoload -(defun indian-to-devanagari (char) - "Convert IS 13194 character CHAR to Devanagari basic characters. -If CHAR is not IS 13194, return CHAR as is." - (let ((charcodes (split-char char))) - (if (eq (car charcodes) 'indian-is13194) - (make-char 'indian-2-column ?\x21 (nth 1 charcodes)) - char))) - -;;;###autoload -(defun devanagari-to-indian (char) - "Convert Devanagari basic character CHAR to IS 13194 characters. -If CHAR is not Devanagari basic character, return CHAR as is." - (let ((charcodes (split-char char))) - (if (and (eq (car charcodes) 'indian-2-column) - (= (nth 1 charcodes) ?\x21)) - (make-char 'indian-is13194 (nth 2 charcodes)) - char))) - -;;;###autoload -(defun indian-to-devanagari-region (from to) - "Convert IS 13194 characters in region to Devanagari basic characters. -When called from a program, expects two arguments, -positions (integers or markers) specifying the region." - (interactive "r") - (save-excursion - (goto-char from) - (while (< (point) to) - (let ((char (following-char))) - (if (eq (char-charset char) 'indian-is13194) - (progn - (delete-char 1) - (insert (indian-to-devanagari char))) - (forward-char 1)))))) - -;;;###autoload -(defun devanagari-to-indian-region (from to) - "Convert Devanagari basic characters in region to Indian characters. -When called from a program, expects two arguments, -positions (integers or markers) specifying the region." - (interactive "r") - (save-excursion - (goto-char from) - (while (< (point) to) - (let ((char (following-char))) - (if (eq (char-charset char) 'indian-2-column) - (progn - (delete-char 1) - (insert (devanagari-to-indian char))) - (forward-char 1)))))) - -;;;###autoload -(defun indian-to-devanagari-string (string) - "Convert Indian characters in STRING to Devanagari Basic characters." - (let* ((len (length string)) - (i 0) - (vec (make-vector len 0))) - (while (< i len) - (aset vec i (indian-to-devanagari (aref string i))) - (setq i (1+ i))) - (concat vec))) - -;; Phase 0 - Determine whether the characters can be composed. -;; -;;; -;;; Regular expressions to split characters for composition. -;;; -;; -;; Indian script word contains one or more syllables. -;; In BNF, it can be expressed as follows: -;; -;; Word ::= {Syllable} [Cons-Syllable] -;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable -;; Vowel-Syllable ::= V[D] -;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D] -;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] [Pure-Cons] Pure-Cons -;; Pure-Cons ::= Full-Cons H -;; Full-Cons ::= C [N] -;; -;; {} repeat, [] optional -;; -;; C - Consonant ($(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E(B -;; $(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X(B) -;; N - Nukta ($(5!i(B) -;; H - Halant($(5!h(B) or Virama -;; V - Vowel ($(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2#&#'#*(B) -;; ("$(5#&#'#*(B" can be obtained by IS13194 vowels with nukta.) -;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu ($(5!!!"(B) -;; (Visaraga ($(5!#(B) is excluded.) -;; M - Matra ($(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g#K#L#M(B) -;; ("$(5#K#L#M(B" can be obtained by IS13194 matras with nukta.) -;; -;; In Emacs, one syllable of Indian language is considered to be one -;; composite glyph. If we expand the above expression for -;; cons-vowel-syllable, it would be: -;; -;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] [D] -;; -;; Therefore, in worst case, the one syllable may contain -;; following characters. -;; -;; C N H C N H C N H C N H C N M D -;; -;; The example is a sanskrit word "kArtsnya", where five consecutive -;; consonants appear. -;; -;; On the other hand, consonant-syllable, which appears at the end of -;; the word, would have the following expression: -;; -;; [C [N] H] [C [N] H] [C [N] H] C [N] H -;; -;; This is acceptable BEFORE proper consonant-syllable is input. The -;; string which doesn't match with the above expression is invalid and -;; thus must be fixed. -;; -;; Note: -;; Third case can be considered, which is an acceptable syllable and can -;; not add any code more. -;; -;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] D -;; -;; However, to make editing possible even in this condition, we will -;; not consider about this case. -;; -;; Note: -;; Currently, it seems that the only following consonants would have -;; Nukta sign attatched. -;; ($(5!3!4!5!:!?!@!I(B) -;; Therefore, [$(5!3(B-$(5!X(B]$(5!i(B? can be re-written as -;; \\([$(5!3!4!5!:!?!@!I(B]$(5!i(B\\)\\|[$(5!3(B-$(5!X(B] - -(defconst devanagari-full-cons - "\\(\\([$(5!3!4!5!:!?!@!I(B]$(5!i(B\\)\\|[$(5!3(B-$(5!X$.$E"%(B]\\)" - "Devanagari full consonant") - -(defconst devanagari-pure-cons - (concat "\\(" devanagari-full-cons "$(5!h(B\\)") - "Devanagari pure consonant") - -(defconst devanagari-matra - "\\(\\([$(5!_![!\(B]$(5!i(B\\)\\|[$(5!Z(B-$(5!g#K#L#M(B]\\)" - "Devanagari Matra Signs. '$(5#K#L#M(B' can also be created from the combination -of '$(5!_![!\(B' and nukta sign.") - -(defconst devanagari-vowel - "\\(\\([$(5!*!&!'(B]$(5!i(B\\)\\|[$(5!$(B-$(5!2#&#'#*(B]\\)" - "Devanagari Vowels. '$(5#&#'#*(B' can also be created from the combination -of '$(5!*!&!'(B' and nukta sign.") - -(defconst devanagari-vowel-syllable - (concat devanagari-vowel "[$(5!!!"(B]?") - "Devanagari vowel syllable.") - -(defconst devanagari-cons-syllable - (concat devanagari-pure-cons "?" devanagari-pure-cons "?" - devanagari-pure-cons "?" devanagari-pure-cons "$") - "Devanagari consonant syllable") - -(defconst devanagari-cons-vowel-syllable - (concat "\\(" - devanagari-pure-cons "?" devanagari-pure-cons "?" - devanagari-pure-cons "?" devanagari-pure-cons "\\)?" - devanagari-full-cons devanagari-matra "?[$(5!!!"(B]?") - "Devanagari consonant vowel syllable.") - -;; -;; Also, digits and virams should be processed other than syllables. -;; -;; In IS 13194, Avagrah is obtained by Nukta after Viram, and -;; OM is obtained by Nukta after Chandrabindu -;; - -(defconst devanagari-digit-viram-visarga - "[$(5!q(B-$(5!z!j!#(B]") - -(defconst devanagari-other-sign - "\\([$(5!!!j(B]$(5!i(B\\)\\|\\([$(5#!#J(B]\\)") - -(defconst devanagari-composite-glyph-unit - (concat "\\(" devanagari-cons-syllable - "\\)\\|\\(" devanagari-vowel-syllable - "\\)\\|\\(" devanagari-cons-vowel-syllable - "\\)\\|\\(" devanagari-other-sign - "\\)\\|\\(" devanagari-digit-viram-visarga "\\)") - "Regexp matching to Devanagari string to be composed form one glyph.") - -;;(put-charset-property charset-devanagari-1-column -;; 'char-to-glyph 'devanagari-compose-string) -;;(put-charset-property charset-devanagari-2-column -;; 'char-to-glyph 'devanagari-compose-string) - -;; Sample -;; -;;(string-match devanagari-cons-vowel-syllable-examine "$(5!X![(B") => 0 -;;(string-match devanagari-cons-vowel-syllable-examine "$(5!F!h!D!\(B") => 0 -;;(string-match devanagari-cons-vowel-syllable-examine "$(5!X![!F!h!D!\(B") => 0 - -;; -;; Steps toward the composition -;; Converting Character Codes to Composite Glyph. -;; -;; Example : $(5!X![(B/$(5!F!h!D!\(B -;; -;; First, convert Characters to appropriate glyphs. -;; -;; => $(5!X![(B/$(5"F!D!\(B -;; -;; Then, determine the base glyph, apply-orders and apply-rules. -;; -;; => $(5!X(B (ml.mr) $(5![(B / $(5!D(B (ml.mr) $(5"F(B (mr ml) $(5!\(B -;; -;; Finally, convert 2-column glyphs to 1-column glyph -;; if such a glyph exist. -;; -;; => $(6!X(B (ml.mr) $(6![(B / $(6!D(B (ml.mr) $(6"F(B (mr ml) $(6!\(B -;; -;; Compose the glyph. -;; -;; => 4$(6!Xt%![0!X![1(B/4$(6!Dt%"Fv#!\0!D"F!\1(B -;; => 4$(6!Xt%![0!X![14!Dt%"Fv#!\0!D"F!\1(B -;; - -;; -;; Phase 1: Converting Character Code to Glyph Code. -;; -;; -;; IMPORTANT: -;; There may be many rules that you many want to suppress. -;; In that case, please comment out that rule. -;; -;; RULES WILL BE EVALUATED FROM FIRST TO LAST. -;; PUT MORE SPECIFIC RULES FIRST. -;; -;; TO DO: -;; Prepare multiple specific list of rules for each languages -;; that adopt Devanagari script. -;; - -(defconst devanagari-char-to-glyph-rules - '( - - ;; `r' at the top of syllable and followed by other consonants. - ;; ("[^$(5!h(B]\\($(5!O!h(B\\)[$(5!3(B-$(5!X(B]" "$(5"p(B") - ("^\\($(5!O!h(B\\)[$(5!3(B-$(5!X(B]" "$(5"p(B") - - ;; Ligature Rules - ("\\($(5!3!h!B!h!O!h!M(B\\)" "$(5$!(B" sanskrit) - ("\\($(5!3!h!B!h!T(B\\)" "$(5$"(B" sanskrit) - ("\\($(5!3!h!B!h!M(B\\)" "$(5$#(B" sanskrit) - ("\\($(5!3!h!F!h!M(B\\)" "$(5$$(B") - ("\\($(5!3!h!O!h!M(B\\)" "$(5$%(B") - ("\\($(5!3!h!O(B\\)" "$(5"#(B") ; Post "r" - ("\\($(5!3!h!T!h!M(B\\)" "$(5$&(B" sanskrit) - ("\\($(5!3!h(B\\)$(5!3!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"3(B") ; Special Half Form - ("\\($(5!3!h!3(B\\)" "$(5$'(B") - ("\\($(5!3!h(B\\)$(5!B!h!O(B" "$(5"3(B") ; Special Rules for "k-tr" - ("\\($(5!3!h!B(B\\)" "$(5$((B") - ("\\($(5!3!h!F(B\\)" "$(5$)(B") - ("\\($(5!3!h!L(B\\)" "$(5$*(B") - ("\\($(5!3!h!M(B\\)" "$(5$+(B") - ("\\($(5!3!h!Q(B\\)" "$(5$,(B") - ("\\($(5!3!h!T(B\\)" "$(5$-(B") - ("\\($(5!3!h!V!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"l(B") ; Half Form - ("\\($(5$.!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"l(B") ; Half Form - ("\\($(5!3!h!V(B\\)" "$(5$.(B") - ("\\($(5!3!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"3(B") ; Half Form - ("\\($(5!3!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"s(B") ; Nukta Half Form - ("\\($(5!3!i(B\\)" "$(5#3(B") ; Nukta - ("\\($(5!4!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"4(B") ; Half Form - ("\\($(5!4!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"t(B") ; Nukta Half Form - ("\\($(5!4!i(B\\)" "$(5#4(B") ; Nukta - ("\\($(5!5!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"`(B") ; Half Form - ("\\($(5!5!h!O(B\\)" "$(5"$(B") ; Post "r" - ("\\($(5!5!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"5(B") ; Half Form - ("\\($(5!5!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"u(B") ; Nukta Half Form - ("\\($(5!5!i(B\\)" "$(5#5(B") ; Nukta - ("\\($(5!6!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"a(B") ; Half Form - ("\\($(5!6!h!F(B\\)" "$(5$/(B") - ; Slot - ("\\($(5!6!h!O(B\\)" "$(5!6"q(B") ; Post "r" - ("\\($(5!6!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"6(B") ; Half Form - ("\\($(5!7!h!3!h!B!h!M(B\\)" "$(5$0(B" sanskrit) - ("\\($(5!7!h!3!h!V!h!T(B\\)" "$(5$1(B" sanskrit) - ("\\($(5!7!h!3!h!B(B\\)" "$(5$2(B" sanskrit) - ("\\($(5!7!h!3!h!V(B\\)" "$(5$3(B" sanskrit) - ("\\($(5!7!h!3!h!O(B\\)" "$(5$9"q(B") ; Special Rule. May be precomposed font needed. - ("\\($(5!7!h!6!h!O(B\\)" "$(5$4(B" sanskrit) - ("\\($(5!7!h!3!h!M(B\\)" "$(5$5(B" sanskrit) - ("\\($(5!7!h!4!h!M(B\\)" "$(5$6(B" sanskrit) - ("\\($(5!7!h!5!h!M(B\\)" "$(5$7(B" sanskrit) - ("\\($(5!7!h!6!h!M(B\\)" "$(5$8(B" sanskrit) - ("\\($(5!7!h!3(B\\)" "$(5$9(B") - ("\\($(5!7!h!4(B\\)" "$(5$:(B") - ("\\($(5!7!h!5!h!O(B\\)" "$(5$;"q(B") ; Special Rule. May be precomposed font needed. - ("\\($(5!7!h!5(B\\)" "$(5$;(B") - ("\\($(5!7!h!6(B\\)" "$(5$<(B") - ("\\($(5!7!h!7(B\\)" "$(5$=(B") - ("\\($(5!7!h!F(B\\)" "$(5$>(B") - ("\\($(5!7!h!L(B\\)" "$(5$?(B") - ("\\($(5!7!h!M(B\\)" "$(5$@(B") - ("\\($(5!8!h(B\\)[$(5!8!<(B]$(5!h(B" "$(5"8(B") ; Half Form - ("\\($(5!8!h!8(B\\)" "$(5$A(B") - ("\\($(5!8!h!<(B\\)" "$(5$B(B") - ("\\($(5!8!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"8"q(B") ; Half Form Post "r" - ("\\($(5!8!h!O(B\\)" "$(5!8"q(B") ; Post "r" - ("\\($(5!8!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"8(B") ; Half Form - ("\\($(5!9!h!M(B\\)" "$(5$C(B") - ("\\($(5!:!h!O(B\\)" "$(5$D(B") - ("\\($(5!:!h!<!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"m(B") ; Half Form - ("\\($(5!:!h!<(B\\)" "$(5$E(B") - ("\\($(5!:!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5":(B") ; Half Form - ("\\($(5!:!i!h!O(B\\)" "$(5"!(B") ; Nukta Post "r" - ("\\($(5!:!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"z(B") ; Nukta Half Form - ("\\($(5!:!i(B\\)" "$(5#:(B") ; Nukta - ("\\($(5!;!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5";(B") ; Half Form - ("\\($(5!<!h(B\\)$(5!8!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"<(B") ; Special Half Form - ("\\($(5!<!h!8(B\\)" "$(5$F(B") - ("\\($(5!<!h(B\\)$(5!:!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"<(B") ; Special Half Form - ("\\($(5!<!h!:(B\\)" "$(5$G(B") - ("\\($(5!<!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"<(B") ; Half Form - ("\\($(5!=!h!3(B\\)" "$(5$H(B") - ("\\($(5!=!h!=(B\\)" "$(5$I(B") - ("\\($(5!=!h!>(B\\)" "$(5$J(B") - ("\\($(5!=!h!M(B\\)" "$(5$K(B") - ("\\($(5!>!h!M(B\\)" "$(5$L(B") - ("\\($(5!?!h!5!h!M(B\\)" "$(5$M(B" sanskrit) - ("\\($(5!?!h!6!h!O(B\\)" "$(5$N(B" sanskrit) - ("\\($(5!?!h!O!h!M(B\\)" "$(5$O(B") - ("\\($(5!?!h!5(B\\)" "$(5$P(B") - ("\\($(5!?!h!6(B\\)" "$(5$Q(B") - ("\\($(5!?!h!?(B\\)" "$(5$R(B") - ("\\($(5!?!h!L(B\\)" "$(5$S(B") - ("\\($(5!?!h!M(B\\)" "$(5$T(B") - ("\\($(5!?!i(B\\)" "$(5#?(B") ; Nukta - ("\\($(5!@!h!M(B\\)" "$(5$`(B") - ("\\($(5!@!i(B\\)" "$(5#@(B") ; Nukta - ("\\($(5!A!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"A(B") ; Half Form - ("\\($(5!B!h(B\\)$(5!B!h!O(B" "$(5"B(B") ; Special Rule for "t-tr" - ("\\($(5!B!h!B!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"c(B") ; Half Form - ("\\($(5!B!h!B(B\\)" "$(5$a(B") - ("\\($(5!B!h!F(B\\)" "$(5$b(B") - ("\\($(5!B!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"d(B") ; Half Form Post "r" - ("\\($(5!B!h!O(B\\)" "$(5"%(B") ; Post "r" - ("\\($(5!B!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"B(B") ; Half Form - ("\\($(5!C!h!O(B\\)" "$(5!C"q(B") ; Post "r" - ("\\($(5!C!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"C(B") ; Half Form - ("\\($(5!D!h!D!h!M(B\\)" "$(5$c(B") - ("\\($(5!D!h!E!h!M(B\\)" "$(5$d(B") - ("\\($(5!D!h!K!h!M(B\\)" "$(5$e(B") - ("\\($(5!D!h!K!h!O(B\\)" "$(5$r"r(B") ; Special Case for "dbhr" ; *** - ("\\($(5!D!h!O!h!M(B\\)" "$(5$f(B") - ("\\($(5!D!h!T!h!M(B\\)" "$(5$g(B") - ("\\($(5!D!h!5!h!O(B\\)" "$(5$h(B") - ("\\($(5!D!h!6!h!O(B\\)" "$(5$i(B") - ("\\($(5!D!h!D!h!T(B\\)" "$(5$j(B") - ("\\($(5!D!h!E!h!T(B\\)" "$(5$k(B") - ("\\($(5!D!h(B\\)$(5!E!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5!D!h(B") ; Special Half Form (for ddhra) - ("\\($(5!D!h!5(B\\)" "$(5$l(B") - ("\\($(5!D!h!6(B\\)" "$(5$m(B") - ("\\($(5!D!h!D(B\\)" "$(5$n(B") - ("\\($(5!D!h!E(B\\)" "$(5$o(B") - ("\\($(5!D!h!F(B\\)" "$(5$p(B") - ("\\($(5!D!h(B\\)$(5!J!h(B" "$(5!D!h(B") ; Suppressing "db-" - ("\\($(5!D!h!J(B\\)" "$(5$q(B") - ("\\($(5!D!h!K(B\\)" "$(5$r(B") - ("\\($(5!D!h!L(B\\)" "$(5$s(B") - ("\\($(5!D!h!M(B\\)" "$(5$t(B") - ("\\($(5!D!h!T(B\\)" "$(5$u(B") - ("\\($(5!E!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"e(B") ; Half Form - ("\\($(5!E!h!F(B\\)" "$(5$v(B") - ("\\($(5!E!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"f(B") ; Half Form Post "r" - ("\\($(5!E!h!O(B\\)" "$(5!E"q(B") ; Post "r" - ("\\($(5!E!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"E(B") ; Half Form - ("\\($(5!F!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"k(B") ; Half Form - ("\\($(5!F!h!F(B\\)" "$(5$w(B") - ("\\($(5!F!h!O(B\\)" "$(5!F"q(B") - ("\\($(5!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"F(B") ; Half Form - ("\\($(5!G!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"G(B") ; Nukta Half Form - ("\\($(5!H!h(B\\)$(5!B!h!O(B" "$(5"H(B") ; Special Rule for "p-tr" - ("\\($(5!H!h!B!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"g(B") ; Half Form - ("\\($(5!H!h!B(B\\)" "$(5$x(B") - ("\\($(5!H!h!F(B\\)" "$(5$y(B") - ("\\($(5!H!h!Q(B\\)" "$(5$z(B") - ("\\($(5!H!h!O(B\\)" "$(5"&(B") ; Post "r" - ("\\($(5!H!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"H(B") ; Half Form - ("\\($(5!I!h!O(B\\)" "$(5"'(B") ; Post "r" - ("\\($(5!I!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"I(B") ; Half Form - ("\\($(5!I!i!h!O(B\\)" "$(5""(B") ; Nukta Post "r" - ("\\($(5!I!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"y(B") ; Nukta Half Form - ("\\($(5!I!i(B\\)" "$(5#I(B") ; Nukta - ("\\($(5!J!h(B\\)$(5!F!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Special Half Form - ("\\($(5!J!h!F(B\\)" "$(5${(B") - ("\\($(5!J!h(B\\)$(5!J!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Special Half Form - ("\\($(5!J!h!J(B\\)" "$(5$|(B") - ("\\($(5!J!h(B\\)$(5!T!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Special Half Form - ("\\($(5!J!h!T(B\\)" "$(5$}(B") - ("\\($(5!J!h!O(B\\)" "$(5!J"q(B") ; Post "r" - ("\\($(5!J!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Half Form - ("\\($(5!K!h!F(B\\)" "$(5$~(B") - ("\\($(5!K!h!O(B\\)" "$(5!K"q(B") ; Post "r" - ("\\($(5!K!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"K(B") ; Half Form - ("\\($(5!L!h!F(B\\)" "$(5#P(B") - ("\\($(5!L!h!Q(B\\)" "$(5#Q(B") - ("\\($(5!L!h!O(B\\)" "$(5!L"q(B") ; Post "r" - ("\\($(5!L!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"L(B") ; Half Form - ("\\($(5!M!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"M(B") ; Half Form - ("\\($(5!N!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"N(B") ; Half Form - ;; special form for "ru". - ("\\($(5!O!](B\\)" "$(5",(B") - ("\\($(5!O!^(B\\)" "$(5"-(B") - ("\\($(5!P!](B\\)" "$(5".(B") - ("\\($(5!P!^(B\\)" "$(5"/(B") - ;; - ("\\($(5!Q!h!Q(B\\)" "$(5#`(B" sanskrit) - ("\\($(5!Q!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"Q(B") ; Half Form - ("\\($(5!R!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"R(B") ; Half Form - ("\\($(5!S!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"S(B") ; Half Form - ("\\($(5!T!h!F(B\\)" "$(5#a(B") - ("\\($(5!T!h!T(B\\)" "$(5#b(B") - ("\\($(5!T!h!O(B\\)" "$(5!T"q(B") ; Post "r" - ("\\($(5!T!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"T(B") ; Half Form - ("\\($(5!U!h!8!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"h(B") ; Half Form - ("\\($(5!U!h!8(B\\)" "$(5#c(B") - ("\\($(5!U!h!F(B\\)" "$(5#d(B") - ("\\($(5!U!h!J(B\\)" "$(5#e(B") - ("\\($(5!U!h!Q(B\\)" "$(5#f(B") - ("\\($(5!U!h(B\\)$(5!T!h!O(B" "$(5"U(B") ; Special Half Form - ("\\($(5!U!h!T!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"j(B") ; Half Form -; ("\\($(5!U!h!T(B\\)" "$(5#g(B") - ("\\($(5!U!h!O!h!T(B\\)" "$(5#g(B") - ("\\($(5!U!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"i(B") ; Half Form - ("\\($(5!U!h!O(B\\)" "$(5")(B") ; Post "r" - ("\\($(5!U!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"U(B") ; Half Form - ("\\($(5!V!h!=!h!O!h!M(B\\)" "$(5#h(B") - ("\\($(5!V!h!=!h!M(B\\)" "$(5#i(B") - ("\\($(5!V!h!=!h!T(B\\)" "$(5#j(B") - ("\\($(5!V!h!=(B\\)" "$(5#k(B") - ("\\($(5!V!h!>(B\\)" "$(5#l(B") - ("\\($(5!V!h!O(B\\)" "$(5!V"q(B") ; Post "r" - ("\\($(5!V!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"V(B") ; Half Form - ("\\($(5!W!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"W"F(B") ; Special Half Form - ("\\($(5!W!h!F(B\\)" "$(5#m(B") - ("\\($(5!W!h!O(B\\)" "$(5#n(B") - ("\\($(5!W!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"W(B") ; Half Form - ("\\($(5!X!h!A(B\\)" "$(5#p(B") - ("\\($(5!X!h!F(B\\)" "$(5#q(B") - ("\\($(5!X!h!L(B\\)" "$(5#r(B") - ("\\($(5!X!h!M(B\\)" "$(5#s(B") - ("\\($(5!X!h!O(B\\)" "$(5#t(B") - ("\\($(5!X!h!Q(B\\)" "$(5#u(B") - ("\\($(5!X!h!T(B\\)" "$(5#v(B") - ;; Special Ligature Rules - ("\\($(5!X!_(B\\)" "$(5#R(B") - - ;; For consonants other than listed above, glyph-composition will - ;; be applied. If the consonant which is preceding "$(5!O(B" does not - ;; have the vertical line (such as "$(5!?(B"), "$(5"r(B" is put beneath the - ;; consonant. - ;; - ("[$(5!7!9!=!>!?!@!D!O!P!R!S!X(B]\\($(5!h!O(B\\)" "$(5"r(B") - ("[$(5!6!8!C!E!F!H!J!K!L!M!T!V(B]\\($(5!h!O(B\\)" "$(5"q(B") - ("$(5!?!i(B\\($(5!h!O(B\\)" "$(5"r(B") - ("$(5!@!i(B\\($(5!h!O(B\\)" "$(5"r(B") - - ;; Nukta with Non-Consonants - ("\\($(5!!!i(B\\)" "$(5#!(B") - ("\\($(5!&!i(B\\)" "$(5#&(B") - ("\\($(5!'!i(B\\)" "$(5#'(B") - ("\\($(5!*!i(B\\)" "$(5#*(B") - ("\\($(5![!i(B\\)" "$(5#L(B") - ("\\($(5!\!i(B\\)" "$(5#M(B") - ("\\($(5!_!i(B\\)" "$(5#K(B") - ("\\($(5!j!i(B\\)" "$(5#J(B") - - ;; Special rule for "r + some vowels" - ("\\($(5!O!_!i(B\\)" "$(5#*"p(B") - ("\\($(5!O![!i(B\\)" "$(5#&"p(B") - ("\\($(5!O!\!i(B\\)" "$(5#'"p(B") - ("\\($(5!O!_(B\\)" "$(5!*"p(B") - ;; If everything fails, "y" will connect to the front consonant. - ("\\($(5!h!M(B\\)" "$(5"](B") - ) - "Alist of regexps of Devanagari character sequences vs composed characters.") - -(let ((rules devanagari-char-to-glyph-rules)) - (while rules - (let ((rule (car rules)) - (chars) (char) (glyphs) (glyph)) - (setq rules (cdr rules)) - (string-match "\\\\(\\(.+\\)\\\\)" (car rule)) - (setq chars (substring (car rule) (match-beginning 1) (match-end 1))) - (setq char (string-to-char chars)) - (setq glyphs (cdr rule)) - (setq glyph (string-to-char (car glyphs))) - (put-char-code-property - char 'char-to-glyph - ;; We don't "cons" it since priority is top to down. - (append (get-char-code-property char 'char-to-glyph) (list rule))) - - (if (and (< ?(5z(B glyph) ; Glyphs only. - (null (get-char-code-property glyph 'glyph-to-char))) - ; One glyph may corresponds to multiple characters, - ; e.g., surrounding vowel in Tamil, etc. - ; but for Devanagari, we put this restriction - ; to make sure the fact that one glyph corresponds to one char. - (put-char-code-property - glyph 'glyph-to-char - (cons (list (car glyphs) chars) - (get-char-code-property glyph 'glyph-to-char) - )))))) - -;; -;; Function used in both characters-to-glyphs conversion and -;; glyphs-to-characters conversion. -;; - -(defun max-match-len (regexp) - "Return the maximum length of text that can match the pattern REGEXP. -Only [...] pattern of regexp is recognized." - (let ((len 0) - (index 0)) - (while (string-match "\\[\\([^\]]\\)+\\]" regexp index) - (setq len (+ len (- (match-beginning 0) index) 1) - index (match-end 0))) - len)) - -;; Return t iff at least one member appears in both LIST1 and LIST2. -(defun intersecting-p (list1 list2) - (let ((found nil)) - (while (and list1 (not found)) - (if (memq (car list1) list2) - (setq found t) - (setq list1 (cdr list1)))) - found)) - -(defun string-conversion-by-rule (source symbol &rest specs) - "Convert string SOURCE by rules stored in SYMBOL property of each character. -The remaining arguments forms a list SPECS that restricts applicable rules. - -The rules has the form ((REGEXP STR RULE-SPEC ...) ...). -Each character sequence in STRING that matches REGEXP is -replaced by STR. - -If SPECS is nil, only rules with no RULE-SPECs is applied. Otherwise -rules with no RULE-SPECS and rules that have at least one member of -SPECS in RULE-SPECs is applied. - -Rules are tested in the order of the list, thus more specific rules -should be placed in front of less specific rules. -If rule is given in the forms of regexp '...\\(...\\)...', a character -sequence that matches the pattern inside of the parenthesis is the -subject of the match. Otherwise, the entire expression is the subject -of the match." - (let ((pos 0) - (dst-str "")) - (while (< pos (length source)) - (let ((found nil) - (rules (get-char-code-property - (string-to-char - (substring source pos)) symbol))) - (while rules - (let* ((rule (car rules)) - (regexp (car rule)) - (replace-str (car (cdr rule))) - (rule-specs (cdr (cdr rule))) - search-pos) - (if (not (or (null rule-specs) - (intersecting-p specs rule-specs))) - (setq rules (cdr rules)) - (if (null (string-match "\\\\(.+\\\\)" regexp)) - (progn - (setq regexp (concat "\\(" regexp "\\)")) - (setq search-pos pos)) - (setq search-pos (- pos (max-match-len - (substring regexp - (string-match "^[^\\\\]*" regexp) - (match-end 0)))))) - (if (< search-pos 0) (setq search-pos 0)) - (if (string-match regexp source search-pos) - (if (= (match-beginning 1) pos) - (progn - (setq dst-str (concat dst-str replace-str)) - (setq rules nil) ; Get out of the loop. - (setq found t) - ;; proceed `pos' for replaced characters. - (setq pos (match-end 1))) - (setq rules (cdr rules))) - (setq rules (cdr rules)))))) - ;; proceed to next position - (if (not found) - (setq dst-str (concat dst-str (substring source pos (1+ pos))) - pos (1+ pos))))) - dst-str)) - - -;; -;; Convert Character Code to Glyph Code -;; - -;;;###autoload -(defun char-to-glyph-devanagari (string &rest langs) - "Convert Devanagari characters in STRING to Devanagari glyphs. -Ligatures and special rules are processed." - (apply - 'string-conversion-by-rule - (append (list string 'char-to-glyph) langs))) - -;; Example: -;;(char-to-glyph-devanagari "$(5!X![!F!h!D!\(B") => "$(5!X!["F!D!\(B" -;;(char-to-glyph-devanagari "$(5!O!Z!V!h!=!h!O![!M(B") => ??? - -;; -;; Phase 2: Compose Glyphs to form One Glyph. -;; - -;; Each list consists of glyph, application-priority and application-direction. -;; -;; Glyphs will be ordered from low priority number to high priority number. -;; If application-priority is omitted, it is assumed to be 0. -;; If application-direction is omitted, it is asumbed to be '(mr . ml). +;; Devanagari Composable Pattern +;; C .. Consonants +;; V .. Vowel +;; H .. Halant +;; M .. Matra +;; V .. Vowel +;; A .. Anuswar +;; D .. Chandrabindu +;; (N .. Zerowidth Non Joiner) +;; (J .. Zerowidth Joiner. ) +;; 1. vowel +;; V(A/D)? +;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya) +;; ((CH)?(CH)?(CH)?CH)?C(MA?|D|H)? -(defconst devanagari-composition-rules - '((?$(5!!(B 0 (tr . br)) - (?$(5!"(B 0 (mr . mr)) - (?$(5!#(B 0) - (?$(5!$(B 0) - (?$(5!%(B 0) - (?$(5!&(B 0) - (?$(5!'(B 0) - (?$(5!((B 0) - (?$(5!)(B 0) - (?$(5!*(B 0) - (?$(5!+(B 0) - (?$(5!,(B 0) - (?$(5!-(B 0) - (?$(5!.(B 0) - (?$(5!/(B 0) - (?$(5!0(B 0) - (?$(5!1(B 0) - (?$(5!2(B 0) - (?$(5!3(B 0) - (?$(5!4(B 0) - (?$(5!5(B 0) - (?$(5!6(B 0) - (?$(5!7(B 0) - (?$(5!8(B 0) - (?$(5!9(B 0) - (?$(5!:(B 0) - (?$(5!;(B 0) - (?$(5!<(B 0) - (?$(5!=(B 0) - (?$(5!>(B 0) - (?$(5!?(B 0) - (?$(5!@(B 0) - (?$(5!A(B 0) - (?$(5!B(B 0) - (?$(5!C(B 0) - (?$(5!D(B 0) - (?$(5!E(B 0) - (?$(5!F(B 0) - (?$(5!G(B 0) - (?$(5!H(B 0) - (?$(5!I(B 0) - (?$(5!J(B 0) - (?$(5!K(B 0) - (?$(5!L(B 0) - (?$(5!M(B 0) - (?$(5!N(B 0) - (?$(5!O(B 0) - (?$(5!P(B 0) - (?$(5!Q(B 0) - (?$(5!R(B 0) - (?$(5!S(B 0) - (?$(5!T(B 0) - (?$(5!U(B 0) - (?$(5!V(B 0) - (?$(5!W(B 0) - (?$(5!X(B 0) - (?$(5!Y(B 0) - (?$(5!Z(B 0) - (?$(5![(B 0 (ml . mr)) - (?$(5!\(B 0) - (?$(5!](B 0 (br . tr)) - (?$(5!^(B 0 (br . tr)) - (?$(5!_(B 0 (br . tr)) - (?$(5!`(B 0 (mr . mr)) ; (tc . bc) - (?$(5!a(B 0 (mr . mr)) - (?$(5!b(B 0 (mr . mr)) - (?$(5!c(B 0 (mr . mr)) - (?$(5!d(B 0) - (?$(5!e(B 0) - (?$(5!f(B 0) - (?$(5!g(B 0) - (?$(5!h(B 0 (br . tr)) - (?$(5!i(B 0 (br . tr)) - (?$(5!j(B 0) - (nil 0) - (nil 0) - (nil 0) - (nil 0) - (nil 0) - (nil 0) - (?$(5!q(B 0) - (?$(5!r(B 0) - (?$(5!s(B 0) - (?$(5!t(B 0) - (?$(5!u(B 0) - (?$(5!v(B 0) - (?$(5!w(B 0) - (?$(5!x(B 0) - (?$(5!y(B 0) - (?$(5!z(B 0) - (nil 0) - (nil 0) - (nil 0) - (nil 0) - (?$(5"!(B 0) - (?$(5""(B 0) - (?$(5"#(B 0) - (?$(5"$(B 0) - (?$(5"%(B 0) - (?$(5"&(B 0) - (?$(5"'(B 0) - (?$(5"((B 0) - (?$(5")(B 0) - (?$(5"*(B 0) - (?$(5"+(B 0) - (?$(5",(B 0) - (?$(5"-(B 0) - (?$(5".(B 0) - (?$(5"/(B 0) - (?$(5"0(B 0) - (?$(5"1(B 0) - (?$(5"2(B 0) - (?$(5"3(B 0) - (?$(5"4(B 0) - (?$(5"5(B 0) - (?$(5"6(B 0) - (?$(5"7(B 0) - (?$(5"8(B 0) - (?$(5"9(B 0) - (?$(5":(B 0) - (?$(5";(B 0) - (?$(5"<(B 0) - (?$(5"=(B 0) - (?$(5">(B 0) - (?$(5"?(B 0) - (?$(5"@(B 0) - (?$(5"A(B 0) - (?$(5"B(B 0) - (?$(5"C(B 0) - (?$(5"D(B 0) - (?$(5"E(B 0) - (?$(5"F(B 0) - (?$(5"G(B 0) - (?$(5"H(B 0) - (?$(5"I(B 0) - (?$(5"J(B 0) - (?$(5"K(B 0) - (?$(5"L(B 0) - (?$(5"M(B 0) - (?$(5"N(B 0) - (?$(5"O(B 0) - (?$(5"P(B 0) - (?$(5"Q(B 0) - (?$(5"R(B 0) - (?$(5"S(B 0) - (?$(5"T(B 0) - (?$(5"U(B 0) - (?$(5"V(B 0) - (?$(5"W(B 0) - (?$(5"X(B 0) - (?$(5"Y(B 0) - (?$(5"Z(B 0) - (?$(5"[(B 0) - (?$(5"\(B 0) - (?$(5"](B 0) - (?$(5"^(B 0) - (?$(5"_(B 0) - (?$(5"`(B 0) - (?$(5"a(B 0) - (?$(5"b(B 0) - (?$(5"c(B 0) - (?$(5"d(B 0) - (?$(5"e(B 0) - (?$(5"f(B 0) - (?$(5"g(B 0) - (?$(5"h(B 0) - (?$(5"i(B 0) - (?$(5"j(B 0) - (?$(5"k(B 0) - (?$(5"l(B 0) - (?$(5"m(B 0) - (?$(5"n(B 0) - (?$(5"o(B 0) - (?$(5"p(B 10 (mr . mr)) - (?$(5"q(B 0 (br . br)) - (?$(5"r(B 0 (br . tr)) - (?$(5"s(B 0) - (?$(5"t(B 0) - (?$(5"u(B 0) - (?$(5"v(B 0) - (?$(5"w(B 0) - (?$(5"x(B 0) - (?$(5"y(B 0) - (?$(5"z(B 0) - (?$(5"{(B 0) - (?$(5"|(B 0) - (?$(5"}(B 0) - (?$(5"~(B 0) - (?$(5#!(B 0) - (?$(5#"(B 0) - (?$(5##(B 0) - (?$(5#$(B 0) - (?$(5#%(B 0) - (?$(5#&(B 0) - (?$(5#'(B 0) - (?$(5#((B 0) - (?$(5#)(B 0) - (?$(5#*(B 0) - (?$(5#+(B 0) - (?$(5#,(B 0) - (?$(5#-(B 0) - (?$(5#.(B 0) - (?$(5#/(B 0) - (?$(5#0(B 0) - (?$(5#1(B 0) - (?$(5#2(B 0) - (?$(5#3(B 0) - (?$(5#4(B 0) - (?$(5#5(B 0) - (?$(5#6(B 0) - (?$(5#7(B 0) - (?$(5#8(B 0) - (?$(5#9(B 0) - (?$(5#:(B 0) - (?$(5#;(B 0) - (?$(5#<(B 0) - (?$(5#=(B 0) - (?$(5#>(B 0) - (?$(5#?(B 0) - (?$(5#@(B 0) - (?$(5#A(B 0) - (?$(5#B(B 0) - (?$(5#C(B 0) - (?$(5#D(B 0) - (?$(5#E(B 0) - (?$(5#F(B 0) - (?$(5#G(B 0) - (?$(5#H(B 0) - (?$(5#I(B 0) - (?$(5#J(B 0) - (?$(5#K(B 0 (br . tr)) - (?$(5#L(B 0 (br . tr)) - (?$(5#M(B 0 (br . tr)) - (?$(5#N(B 0) - (?$(5#O(B 0) - (?$(5#P(B 0) - (?$(5#Q(B 0) - (?$(5#R(B 0) - (?$(5#S(B 0) - (?$(5#T(B 0) - (?$(5#U(B 0) - (?$(5#V(B 0) - (?$(5#W(B 0) - (?$(5#X(B 0) - (?$(5#Y(B 0) - (?$(5#Z(B 0) - (?$(5#[(B 0) - (?$(5#\(B 0) - (?$(5#](B 0) - (?$(5#^(B 0) - (?$(5#_(B 0) - (?$(5#`(B 0) - (?$(5#a(B 0) - (?$(5#b(B 0) - (?$(5#c(B 0) - (?$(5#d(B 0) - (?$(5#e(B 0) - (?$(5#f(B 0) - (?$(5#g(B 0) - (?$(5#h(B 0) - (?$(5#i(B 0) - (?$(5#j(B 0) - (?$(5#k(B 0) - (?$(5#l(B 0) - (?$(5#m(B 0) - (?$(5#n(B 0) - (?$(5#o(B 0) - (?$(5#p(B 0) - (?$(5#q(B 0) - (?$(5#r(B 0) - (?$(5#s(B 0) - (?$(5#t(B 0) - (?$(5#u(B 0) - (?$(5#v(B 0) - (?$(5#w(B 0) - (?$(5#x(B 0) - (?$(5#y(B 0) - (?$(5#z(B 0) - (?$(5#{(B 0) - (?$(5#|(B 0) - (?$(5#}(B 0) - (?$(5#~(B 0) - (?$(5$!(B 0) - (?$(5$"(B 0) - (?$(5$#(B 0) - (?$(5$$(B 0) - (?$(5$%(B 0) - (?$(5$&(B 0) - (?$(5$'(B 0) - (?$(5$((B 0) - (?$(5$)(B 0) - (?$(5$*(B 0) - (?$(5$+(B 0) - (?$(5$,(B 0) - (?$(5$-(B 0) - (?$(5$.(B 0) - (?$(5$/(B 0) - (?$(5$0(B 0) - (?$(5$1(B 0) - (?$(5$2(B 0) - (?$(5$3(B 0) - (?$(5$4(B 0) - (?$(5$5(B 0) - (?$(5$6(B 0) - (?$(5$7(B 0) - (?$(5$8(B 0) - (?$(5$9(B 0) - (?$(5$:(B 0) - (?$(5$;(B 0) - (?$(5$<(B 0) - (?$(5$=(B 0) - (?$(5$>(B 0) - (?$(5$?(B 0) - (?$(5$@(B 0) - (?$(5$A(B 0) - (?$(5$B(B 0) - (?$(5$C(B 0) - (?$(5$D(B 0) - (?$(5$E(B 0) - (?$(5$F(B 0) - (?$(5$G(B 0) - (?$(5$H(B 0) - (?$(5$I(B 0) - (?$(5$J(B 0) - (?$(5$K(B 0) - (?$(5$L(B 0) - (?$(5$M(B 0) - (?$(5$N(B 0) - (?$(5$O(B 0) - (?$(5$P(B 0) - (?$(5$Q(B 0) - (?$(5$R(B 0) - (?$(5$S(B 0) - (?$(5$T(B 0) - (?$(5$U(B 0) - (?$(5$V(B 0) - (?$(5$W(B 0) - (?$(5$X(B 0) - (?$(5$Y(B 0) - (?$(5$Z(B 0) - (?$(5$[(B 0) - (?$(5$\(B 0) - (?$(5$](B 0) - (?$(5$^(B 0) - (?$(5$_(B 0) - (?$(5$`(B 0) - (?$(5$a(B 0) - (?$(5$b(B 0) - (?$(5$c(B 0) - (?$(5$d(B 0) - (?$(5$e(B 0) - (?$(5$f(B 0) - (?$(5$g(B 0) - (?$(5$h(B 0) - (?$(5$i(B 0) - (?$(5$j(B 0) - (?$(5$k(B 0) - (?$(5$l(B 0) - (?$(5$m(B 0) - (?$(5$n(B 0) - (?$(5$o(B 0) - (?$(5$p(B 0) - (?$(5$q(B 0) - (?$(5$r(B 0) - (?$(5$s(B 0) - (?$(5$t(B 0) - (?$(5$u(B 0) - (?$(5$v(B 0) - (?$(5$w(B 0) - (?$(5$x(B 0) - (?$(5$y(B 0) - (?$(5$z(B 0) - (?$(5${(B 0) - (?$(5$|(B 0) - (?$(5$}(B 0) - (?$(5$~(B 0) - )) +(defconst devanagari-consonant + "[$,15U(B-$,15y68(B-$,16?(B]") -;; Determine composition priority and rule of the array of Glyphs. -;; Sort the glyphs with their priority. - -(defun devanagari-reorder-glyphs-for-composition (string start end) - (let ((pos start) - (ordered-glyphs nil)) - (while (< pos end) - (let ((glyph (aref string pos))) - (setq pos (1+ pos)) - (setq ordered-glyphs - (append ordered-glyphs - (list (assq glyph devanagari-composition-rules)))))) - (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y))))))) - -! ;;(devanagari-compose-to-one-glyph "$(5"5!X![(B") => "4$(6!Xv#"5t%![0!X"5![1(B" - -(defun devanagari-compose-to-one-glyph (devanagari-string) - (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition - devanagari-string 0 (length devanagari-string))) - ;; List of glyphs to be composed. - (cmp-glyph-list (list (car (car o-glyph-list)))) - (o-glyph-list (cdr o-glyph-list))) - (while o-glyph-list - (let* ((o-glyph (car o-glyph-list)) - (glyph (if (< 2 (length o-glyph)) - ;; default composition - (list (car (cdr (cdr o-glyph))) (car o-glyph)) - ;; composition with a specified rule - (list '(mr . ml) (car o-glyph))))) - (setq o-glyph-list (cdr o-glyph-list)) - (setq cmp-glyph-list (append cmp-glyph-list glyph)))) - ;; Before applying compose-chars, convert glyphs to - ;; 1-column width if possible. - (setq cmp-glyph-list (devanagari-wide-to-narrow cmp-glyph-list)) - (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list)) - (apply 'compose-chars cmp-glyph-list)))) - -(defun devanagari-composition-component (string &optional start end) - (or start (setq start 0)) - (or end (setq end (length string))) - (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition - string start end)) - ;; List of glyphs to be composed. - (cmp-glyph-list (list (car (car o-glyph-list))))) - (setq o-glyph-list (cdr o-glyph-list)) - (while o-glyph-list - (let* ((o-glyph (car o-glyph-list)) - (glyph (if (< 2 (length o-glyph)) - ;; default composition - (list (car (cdr (cdr o-glyph))) (car o-glyph)) - ;; composition with a specified rule - (list '(mr . ml) (car o-glyph))))) - (setq o-glyph-list (cdr o-glyph-list)) - (setq cmp-glyph-list (append cmp-glyph-list glyph)))) - ;; Convert glyphs to 1-column width if possible. - (devanagari-wide-to-narrow cmp-glyph-list))) - -;; Utility function for Phase 2.5 - -;; Check whether GLYPH is a Devanagari vertical modifier or not. -;; If it is a vertical modifier, whether it should be 1-column shape or not -;; depends on previous non-vertical modifier. -(defun devanagari-vertical-modifier-p (glyph) - (string-match (char-to-string glyph) - "[$(5!"!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M(B]")) - -(defun devanagari-non-vertical-modifier-p (glyph) - (string-match (char-to-string glyph) -; "[$(5!Z![!\!d!e!f!g(B]")) - "[$(5![(B]")) - -(defun devanagari-wide-to-narrow-char (char) - "Convert Devanagari character CHAR to the corresponding narrow character. -If there's no corresponding narrow character, return CHAR as is." - (let ((narrow (cdr (assq char devanagari-1-column-char)))) - (or narrow char))) +(defconst devanagari-composable-pattern + (concat + "\\([$,15E(B-$,15T6@6A(B][$,15A5B(B]?\\)\\|$,15C(B" + "\\|\\(" + "\\(?:\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?" + "[$,15U(B-$,15y68(B-$,16?(B]\\(?:$,16-(B\\|[$,15~(B-$,16-6B6C(B]?[$,15B5A(B]?\\)?" + "\\)") + "Regexp matching a composable sequence of Devanagari characters.") -;; -;; Phase 2.5 Convert appropriate character to 1-column shape. -;; -;; This is temporary and should be removed out when Emacs supports -;; variable width characters. -;; -;; This will convert the composing glyphs (2 column glyphs) -;; to narrow (1 column) glyphs if they exist. -;; -;; devanagari-wide-to-narrow-old converts glyphs simply. -;; devanagari-wide-to-narrow takes care of upper/lower apply-glyphs -;; with 2 column base-glyph. -;; -;; Execution Examples -;;(devanagari-wide-to-narrow '(?$(5!3(B (ml . ml) ?$(5!a(B)) -;;(devanagari-wide-to-narrow '(?$(5!F(B (ml . ml) ?$(5!a(B)) - -(defun devanagari-wide-to-narrow (src-list) - (devanagari-wide-to-narrow-iter src-list t)) - -(defun devanagari-wide-to-narrow-iter (src-list 2-col-glyph) - (let ((glyph (car src-list))) - (cond ((null src-list) '()) - ; not glyph code - ((not (numberp glyph)) - (cons glyph - (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))) - ; glyphs to be processed regardless of the value of "2-col-glyph" - ((devanagari-non-vertical-modifier-p glyph) - (cons (devanagari-wide-to-narrow-char glyph) - (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))) - ; glyphs which are depends on the value of "2-col-glyph" - ((devanagari-vertical-modifier-p glyph) - (if 2-col-glyph - (cons glyph - (devanagari-wide-to-narrow-iter (cdr src-list) t)) - (cons (devanagari-wide-to-narrow-char glyph) - (devanagari-wide-to-narrow-iter (cdr src-list) - 2-col-glyph)))) - ; normal glyph - (t - (if (cdr (assq glyph devanagari-1-column-char)) - (cons (devanagari-wide-to-narrow-char glyph) - (devanagari-wide-to-narrow-iter (cdr src-list) nil)) - (cons glyph - (devanagari-wide-to-narrow-iter (cdr src-list) t))))))) - - -;; -;; Summary -;; - -;; -;; Decomposition of composite sequence. -;; - -;;;###autoload -(defun devanagari-decompose-string (str) - "Decompose Devanagari string STR" - (decompose-string (copy-sequence str))) - -;;;###autoload -(defun devanagari-decompose-region (from to) - (interactive "r") - (decompose-region from to)) - -;;; -;;; Composition -;;; - -;;;###autoload -(defun devanagari-compose-string (str &rest langs) - (setq str (copy-sequence str)) - (let ((idx 0) - rest match-b match-e) - (while (string-match devanagari-composite-glyph-unit str idx) - (let* ((match-b (match-beginning 0)) - (match-e (match-end 0)) - (cmps (devanagari-composition-component - (apply - 'char-to-glyph-devanagari - (cons (substring str match-b match-e) langs))))) - (compose-string str match-b match-e cmps) - (setq idx match-e)))) - str) - -;;;###autoload -(defun devanagari-compose-region (from to &rest langs) +(defun devanagari-compose-region (from to) (interactive "r") (save-excursion (save-restriction (narrow-to-region from to) (goto-char (point-min)) - (while (re-search-forward devanagari-composite-glyph-unit nil t) - (let* ((match-b (match-beginning 0)) (match-e (match-end 0)) - (cmps (devanagari-composition-component - (apply - 'char-to-glyph-devanagari - (cons (buffer-substring match-b match-e) langs))))) - (compose-region match-b match-e cmps)))))) + (while (re-search-forward devanagari-composable-pattern nil t) + (devanagari-compose-syllable-region (match-beginning 0) + (match-end 0)))))) +(defun devanagari-compose-string (string) + (with-temp-buffer + (insert (decompose-string string)) + (devanagari-compose-region (point-min) (point-max)) + (buffer-string))) + +(defun range (from to) + "Make the list of the integers of range FROM to TO." + (let (result) + (while (<= from to) (setq result (cons to result) to (1- to))) result)) + +(defun regexp-of-hashtbl-keys (hashtbl) + "Returns the regular expression of hashtable keys." + (let ((max-specpdl-size 1000)) + (regexp-opt + (sort + (let (dummy) + (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) + dummy) + (function (lambda (x y) (> (length x) (length y)))))))) + +(defun devanagari-composition-function (from to pattern &optional string) + "Compose Devanagari characters in REGION, or STRING if specified. +Assume that the REGION or STRING must fully match the composable +PATTERN regexp." + (if string (devanagari-compose-syllable-string string) + (devanagari-compose-syllable-region from to)) + (- to from)) + +;; Register a function to compose Devanagari characters. +(mapc + (function (lambda (ucs) + (aset composition-function-table (decode-char 'ucs ucs) + (list (cons devanagari-composable-pattern + 'devanagari-composition-function))))) + (nconc '(#x0903) (range #x0905 #x0939) (range #x0958 #x0961))) + +;; Notes on conversion steps. -;; For pre-write and post-read conversion +;; 1. chars to glyphs +;; +;; Rules will not be applied to the halant appeared at the end of the +;; text. Also, the preceding/following "r" will be treated as special case. + +;; 2. glyphs reordering. +;; +;; The glyphs are split by halant, and each glyph groups are +;; re-ordered in the following order. +;; +;; Note that `consonant-glyph' mentioned here does not contain the +;; vertical bar (right modifier) attached at the right of the +;; consonant. +;; +;; If the glyph-group contains right modifier, +;; (1) consonant-glyphs/vowels, with nukta sign +;; (2) spacing +;; (3) right modifier (may be matra) +;; (4) top matra +;; (5) preceding "r" +;; (6) anuswar +;; (7) following "r" +;; (8) bottom matra or halant. +;; +;; Otherwise, +;; (1) consonant-glyph/vowels, with nukta sign +;; (3) left matra +;; (4) top matra +;; (5) preceding "r" +;; (6) anuswar +;; (7) following "r" +;; (8) bottom matra or halant. +;; (2) spacing + +;; 3. glyph to glyph +;; +;; For better display, some glyph display would be tuned. + +;; 4. Composition. +;; +;; left modifiers will be attached at the left. +;; others will be attached right. + +;; Problem:: +;; Can we generalize this methods to other Indian scripts? -;;;###autoload -(defun devanagari-compose-from-is13194-region (from to) - "Compose IS 13194 characters in the region to Devanagari characters." - (interactive "r") - (save-excursion - (save-restriction - (narrow-to-region from to) - (indian-to-devanagari-region (point-min) (point-max)) - (devanagari-compose-region (point-min) (point-max)) - (- (point-max) (point-min))))) +(defvar dev-char-glyph + '(("$,15E(B" . "$,4 K(B") + ("$,15F(B" . "$,4 K")(B") + ("$,15~(B" . "$,4")(B") + ("$,15G(B" . "$,4 \(B") + ("$,15(B" . "$,4"*(B") + ("$,155A(B" . "$,4"*(B\$,4"&(B") + ("$,15H(B" . "$,4 \"'(B") + ("$,15H5A(B" . "$,4 \"'"&(B") + ("$,16 (B" . "$,4"2(B") + ("$,16 5A(B" . "$,4"2"&(B") + ("$,15I(B" . "$,4 ](B") + ("$,16!(B" . "$,4"6(B") + ("$,15J(B" . "$,4 ^"P(B") + ("$,16"(B" . "$,4":(B") + ("$,15K(B" . "$,4 `"Q(B") + ("$,16#(B" . "$,4">(B") + ;;("$,15L(B" . nil) ; not implemented. + ("$,16$(B" . "$,4"?(B") + ("$,15M(B" . "$,4 b"L(B") + ("$,15M5A(B" . "$,4 b"$(B") + ("$,15M5B(B" . "$,4 b"$(B") + ("$,16%(B" . "\$,4"L(B") + ("$,15N(B" . "$,4 b"@(B") + ("$,15N5A(B" . "$,4 b"@"&(B") + ("$,16&(B" . "\$,4"@(B") + ("$,16&5A(B" . "\$,4"@(B\$,4"&(B") + ("$,15O(B" . "$,4 b(B") + ("$,16'(B" . "\$,4"D(B") + ("$,16'5A(B" . "\$,4"D(B\$,4"&(B") + ("$,15P(B" . "$,4 b"D(B") + ("$,15P5A(B" . "$,4 b"D"&(B") + ("$,16((B" . "\$,4"H(B") + ("$,16(5A(B" . "\$,4"H(B\$,4"&(B") + ("$,15Q(B" . "$,4 K")"L(B") ;; special rule for reodering. + ("$,15Q5A(B" . "$,4 K")"$(B") + ("$,15Q5B(B" . "$,4 K")"$(B") + ("$,16)(B" . "\$,4")"L(B") + ("$,16)5A(B" . "\$,4")"$(B") + ("$,16)5B(B" . "\$,4")"$(B") + ("$,15R(B" . "$,4 K")"@(B") + ("$,15R5A(B" . "$,4 K")"@"&(B") + ("$,16*(B" . "\$,4")"@(B") + ("$,16*5A(B" . "\$,4")"@"&(B") + ("$,15S(B" . "$,4 K")"D(B") + ("$,15S5A(B" . "$,4 K")"D"&(B") + ("$,16+(B" . "\$,4")"D(B") + ("$,16+5A(B" . "\$,4")"D"&(B") + ("$,15T(B" . "$,4 K")"H(B") + ("$,15T5A(B" . "$,4 K")"H"&(B") + ("$,16,(B" . "\$,4")"H(B") + ("$,16,5A(B" . "\$,4")"H"&(B") + ("$,16@(B" . "$,4 a"Q(B") + ;;("$,16B(B" . nil) + ;;("$,16A(B" . nil) + ;;("$,16C(B" . nil) + + ;; GRUTTALS + ("$,15U(B" . "$,4 e"R(B") + ("$,15U6-(B" . "$,4 c(B") + ("$,15U6-5p(B" . "$,4 g"R(B") + ("$,15U6-5d(B" . "$,4 h"R(B") + ("$,15U6-5w(B" . "$,4 i")(B") + ("$,15U6-5w6-(B" . "$,4 i(B") + + ("$,15V(B" . "$,4 j")(B") + ("$,15V6-(B" . "$,4 j(B") + ("$,15V6-5p(B" . "$,4 l")(B") + ("$,15V6-5p6-(B" . "$,4 l(B") + + ("$,15W(B" . "$,4 m")(B") + ("$,15W6-(B" . "$,4 m(B") + ("$,15W6-5p(B" . "$,4 o")(B") + ("$,15W6-5p6-(B" . "$,4 o(B") + + ("$,15X(B" . "$,4 p")(B") + ("$,15X6-(B" . "$,4 p(B") + ("$,15X6-5p(B" . "$,4 q")(B") + ("$,15X6-5p6-(B" . "$,4 q(B") + + ("$,15Y(B" . "$,4 r"S(B") + ;; PALATALS + ("$,15Z(B" . "$,4 s")(B") + ("$,15Z6-(B" . "$,4 s(B") + ("$,15Z6-5p(B" . "$,4 t")(B") + ("$,15Z6-5p6-(B" . "$,4 t(B") + + ("$,15[(B" . "$,4 u"T(B") + + ("$,15\(B" . "$,4 v")(B") + ("$,15\6-(B" . "$,4 v(B") + ("$,15\6-5p(B" . "$,4 x")(B") + ("$,15\6-5p6-(B" . "$,4 x(B") + ("$,15\6-5^(B" . "$,4 y")(B") + ("$,15\6-5^6-(B" . "$,4 y(B") -;;;###autoload -(defun in-is13194-devanagari-post-read-conversion (len) - (let ((pos (point))) - (devanagari-compose-from-is13194-region pos (+ pos len)))) + ("$,15](B" . "$,4 z")(B") + ("$,15]6-(B" . "$,4 z(B") + ("$,15]6-5p(B" . "$,4 {")(B") + ("$,15]6-5p6-(B" . "$,4 {(B") + + ("$,15^(B" . "$,4 |")(B") + ("$,15^6-(B" . "$,4 |(B") + ;; CEREBRALS + ("$,15_(B" . "$,4 }"U(B") + ("$,15_6-5_(B" . "$,4 ~"U(B") + ("$,15_6-5`(B" . "$,4 "U(B") + + ("$,15`(B" . "$,4! "V(B") + ("$,15`6-5`(B" . "$,4!!"V(B") + + ("$,15a(B" . "$,4!""W(B") + ("$,15a6-5a(B" . "$,4!$"W(B") + ("$,15a6-5b(B" . "$,4!%"W(B") + + ("$,15b(B" . "$,4!&"X(B") + + ("$,15c(B" . "$,4!(")(B") + ("$,15c6-(B" . "$,4!((B") + ;; DENTALS + ("$,15d(B" . "$,4!)")(B") + ("$,15d6-(B" . "$,4!)(B") + ("$,15d6-5p(B" . "$,4!*")(B") + ("$,15d6-5p6-(B" . "$,4!*(B") + ("$,15d6-5d(B" . "$,4!+")(B") + ("$,15d6-5d6-(B" . "$,4!+(B") + + ("$,15e(B" . "$,4!,")(B") + ("$,15e6-(B" . "$,4!,(B") + ("$,15e6-5p(B" . "$,4!-")(B") + ("$,15e6-5p6-(B" . "$,4!-(B") + + ("$,15f(B" . "$,4!."Y(B") + ("$,15f6#(B" . "$,4!/"Y(B") + ("$,15f6-5p(B" . "$,4!0"Y(B") + ("$,15f6-5f(B" . "$,4!1"Y(B") + ("$,15f6-5g(B" . "$,4!2"Y(B") + ("$,15f6-5n(B" . "$,4!3(B") + ("$,15f6-5o(B" . "$,4!4(B") + ("$,15f6-5u(B" . "$,4!5"Y(B") -;;;###autoload -(defun devanagari-decompose-to-is13194-region (from to) - "Decompose Devanagari characters in the region to IS 13194 characters." - (interactive "r") - (save-excursion - (save-restriction - (narrow-to-region from to) - (devanagari-decompose-region (point-min) (point-max)) - (devanagari-to-indian-region (point-min) (point-max))))) + ("$,15g(B" . "$,4!6")(B") + ("$,15g6-(B" . "$,4!6(B") + ("$,15g6-5p(B" . "$,4!7")(B") + ("$,15g6-5p6-(B" . "$,4!7(B") + + ("$,15h(B" . "$,4!8")(B") + ("$,15h6-(B" . "$,4!8(B") + ("$,15h6-5p(B" . "$,4!9")(B") + ("$,15h6-5p6-(B" . "$,4!9")(B") + ("$,15h6-5h(B" . "$,4!:")(B") + ("$,15h6-5h6-(B" . "$,4!:(B") + + ("$,15i(B" . "$,4!8"#")(B") + ;; LABIALS + ("$,15j(B" . "$,4!;")(B") + ("$,15j6-(B" . "$,4!;(B") + ("$,15j6-5p(B" . "$,4!<")(B") + ("$,15j6-5p6-(B" . "$,4!<(B") + + ("$,15k(B" . "$,4!a"[(B") + ("$,15k6-(B" . "$,4!=(B") + ("$,15k6-5p(B" . "$,4!c"[(B") + + ("$,15l(B" . "$,4!d")(B") + ("$,15l6-(B" . "$,4!d(B") + ("$,15l6-5p(B" . "$,4!e")(B") + ("$,15l6-5p6-(B" . "$,4!e(B") + + ("$,15m(B" . "$,4!f")(B") + ("$,15m6-(B" . "$,4!f(B") + ("$,15m6-5p(B" . "$,4!g")(B") + ("$,15m6-5p6-(B" . "$,4!g(B") + + ("$,15n(B" . "$,4!h")(B") + ("$,15n6-(B" . "$,4!h(B") + ("$,15n6-5p(B" . "$,4!i")(B") + ("$,15n6-5p6-(B" . "$,4!i(B") + ;; SEMIVOWELS + ("$,15o(B" . "$,4!j")(B") + ("$,15o6-(B" . "$,4!j(B") + ("$,15o6-5p(B" . "$,4!k")(B") + ("$,15o6-5p6-(B" . "$,4!k(B") + ("$,16-5o(B" . "$,4!l(B") ;; when every ohter lig. fails. + + ("$,15p(B" . "$,4!n"W(B") + ;; ("$,15p6-(B" . "\$,4"'(B") ;; special case. only the topmost pos. + ("$,15q(B" . "$,4!n"#"W(B") + ("$,15q6-(B" . "$,4!m(B") ;; IS 13194 speical rule. + ("$,15p6!(B" . "$,4!o"[(B") + ("$,15p6"(B" . "$,4!p"\(B") -;;;###autoload -(defun in-is13194-devanagari-pre-write-conversion (from to) - (let ((old-buf (current-buffer))) - (set-buffer (generate-new-buffer " *temp*")) - (if (stringp from) - (insert from) - (insert-buffer-substring old-buf from to)) - (devanagari-decompose-to-is13194-region (point-min) (point-max)) - ;; Should return nil as annotations. - nil)) + ("$,15r(B" . "$,4!q")(B") + ("$,15r6-(B" . "$,4!q(B") + ("$,15s(B" . "$,4!s(B") + ("$,15s6-(B" . "$,4!r(B") + ("$,15t(B" . "$,4!s"#(B") + ("$,15t6-(B" . "$,4!r"#(B") + + ("$,15u(B" . "$,4!t")(B") + ("$,15u6-(B" . "$,4!t(B") + ("$,15u6-5p(B" . "$,4!u")(B") + ("$,15u6-5p6-(B" . "$,4!u(B") + ;; SIBILANTS + ("$,15v(B" . "$,4!v")(B") + ("$,15v6-(B" . "$,4!v(B") + ("$,15v6-5u(B" . "$,4!w")(B") + ("$,15v6-5u6-(B" . "$,4!w(B") + ("$,15v6-5p(B" . "$,4!x")(B") + ("$,15v6-5p6-(B" . "$,4!x(B") + + ("$,15w(B" . "$,4!y")(B") + ("$,15w6-(B" . "$,4!y(B") + ("$,15x(B" . "$,4!z")(B") + ("$,15x6-(B" . "$,4!z(B") + ("$,15x6-5p(B" . "$,4!{")(B") + ("$,15x6-5p6-(B" . "$,4!{(B") -;; For input/output of ITRANS + ("$,15y(B" . "$,4!}(B") + ("$,15y6-(B" . "$,4!|(B") + ("$,15y6#(B" . "$,4!~(B") + ("$,15y6-5p(B" . "$,4!(B") + ("$,15y6-5n(B" . "$,4" (B") + ("$,15y6-5o(B" . "$,4"!(B") + ;; NUKTAS + ("$,168(B" . "$,4 f"R"S(B") + ("$,1686-(B" . "$,4 d(B") + ("$,169(B" . "$,4 k")(B") + ("$,1696-(B" . "$,4 k(B") + ("$,16:(B" . "$,4 n")(B") + ("$,16:6-(B" . "$,4 n(B") + ("$,16;(B" . "$,4 w")(B") + ("$,16;6-(B" . "$,4 w(B") + ("$,16<(B" . "$,4!#"W(B") + ("$,16=(B" . "$,4!'"X(B") + ("$,16>(B" . "$,4!b"[(B") + ("$,16>6-(B" . "$,4!>(B") + ("$,16?(B" . "$,4!j"#")(B") + ;; misc modifiers. + ("$,15A(B" . "\$,4"$(B") + ("$,15B(B" . "\$,4"&(B") + ("$,15C(B" . "$,4 F(B") + ("$,15|(B" . "$,4"#(B") + ("$,15}(B" . "$,4 E(B") + ("$,16-(B" . "$,4""(B") + ("$,16-5p(B" . "$,4"%(B") ;; following "r" + ;; ("$,160(B" . "$,4 D(B") + ;; ("$,16D(B" . "$,4 J(B") + ;; ("$,16F(B" . "") + ;; ("$,16G(B" . "") + ;; ("$,16H(B" . "") + ;; ("$,16I(B" . "") + ;; ("$,16J(B" . "") + ;; ("$,16K(B" . "") + ;; ("$,16L(B" . "") + ;; ("$,16M(B" . "") + ;; ("$,16N(B" . "") + ;; ("$,16O(B" . "") + ) + "Devanagari characters to glyphs conversion table. +Default value contains only the basic rules. You may add your own +preferred rule from the sanskrit fonts." ) + +(defvar dev-char-glyph-hash + (let* ((hash (makehash 'equal))) + (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) + dev-char-glyph) + hash)) + +(defvar dev-char-glyph-regexp + (regexp-of-hashtbl-keys dev-char-glyph-hash)) + +;; glyph-to-glyph conversion table. +;; it is supposed that glyphs are ordered in +;; [consonant/nukta] - [matra/halant] - [preceding-r] - [anuswar]. + +(defvar dev-glyph-glyph + '(("\$,4"'(B\$,4"&(B" . "\$,4"((B") + ("\$,4"'(B\$,4"$(B" . "\$,4"((B") + ("$,4"*(B\$,4"&(B" . "$,4"+(B") + ("$,4"*(B\$,4"'(B" . "$,4",(B") + ("$,4"*(B\$,4"'(B\$,4"&(B" . "$,4"-(B") + ("$,4"2(B\$,4"&(B" . "$,4"3(B") + ("$,4"2(B\$,4"'(B" . "$,4"4(B") + ("$,4"2(B\$,4"'(B\$,4"&(B" . "$,4"5(B") + ("$,4"#(B\$,4"6(B" . "$,4"7(B") + ("$,4"%(B\$,4"6(B" . "$,4"8(B") + ;;("$,4"6(B" . "$,4"9(B") + ("$,4"#(B\$,4":(B" . "$,4";(B") + ("$,4"%(B\$,4":(B" . "$,4"<(B") + ;;("$,4":(B" . "$,4"=(B") + ("\$,4"@(B\$,4"&(B" . "\$,4"A(B") + ("\$,4"@(B\$,4"'(B" . "\$,4"B(B") + ("\$,4"@(B\$,4"'(B\$,4"&(B" . "\$,4"C(B") + ("\$,4"D(B\$,4"&(B" . "\$,4"E(B") + ("\$,4"D(B\$,4"'(B" . "\$,4"F(B") + ("\$,4"D(B\$,4"'(B\$,4"&(B" . "\$,4"G(B") + ("\$,4"H(B\$,4"&(B" . "\$,4"I(B") + ("\$,4"H(B\$,4"'(B" . "\$,4"J(B") + ("\$,4"H(B\$,4"'(B\$,4"&(B" . "\$,4"K(B") + ("\$,4"L(B\$,4"&(B" . "\$,4"M(B") + ("\$,4"L(B\$,4"'(B" . "\$,4"N(B") + ("\$,4"L(B\$,4"'(B\$,4"&(B" . "\$,4"O(B") + )) +(defvar dev-glyph-glyph-hash + (let* ((hash (makehash 'equal))) + (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) + dev-glyph-glyph) + hash)) +(defvar dev-glyph-glyph-regexp + (regexp-of-hashtbl-keys dev-glyph-glyph-hash)) + + +;; yet another glyph-to-glyph conversions. +(defvar dev-glyph-glyph-2 + '(("$,4"*(B" . "$,4".(B") + ("$,4"+(B" . "$,4"/(B") + ("$,4",(B" . "$,4"0(B") + ("$,4"-(B" . "$,4"1(B"))) +(defvar dev-glyph-glyph-2-hash + (let* ((hash (makehash 'equal))) + (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) + dev-glyph-glyph-2) + hash)) +(defvar dev-glyph-glyph-2-regexp + (regexp-of-hashtbl-keys dev-glyph-glyph-2-hash)) + + +(defun dev-charseq (from &optional to) + (if (null to) (setq to from)) + (mapcar (function (lambda (x) (indian-glyph-char x 'devanagari))) + (range from to))) -;;;###autoload -(defun devanagari-encode-itrans-region (from to) - (interactive "r") - (save-restriction - (narrow-to-region from to) - (devanagari-decompose-to-is13194-region (point-min) (point-max)) - (indian-encode-itrans-region (point-min) (point-max)))) +(defvar dev-glyph-cvn + (append + (dev-charseq #x2b) + (dev-charseq #x3c #xc1) + (dev-charseq #xc3)) + "Devanagari Consonants/Vowels/Nukta Glyphs") + +(defvar dev-glyph-space + (dev-charseq #xf0 #xfe) + "Devanagari Spacing Glyphs") + +(defvar dev-glyph-right-modifier + (append + (dev-charseq #xc9) + (dev-charseq #xd2 #xd5)) + "Devanagari Modifiers attached at the right side.") + +(defvar dev-glyph-right-modifier-regexp + (concat "[" dev-glyph-right-modifier "]")) + +(defvar dev-glyph-left-matra + (dev-charseq #xca #xd1) + "Devanagari Matras attached at the left side.") + +(defvar dev-glyph-top-matra + (dev-charseq #xe0 #xef) + "Devanagari Matras attached at the top side.") + +(defvar dev-glyph-bottom-modifier + (append + (dev-charseq #xd6 #xdf) + (dev-charseq #xc2)) + "Devanagari Modifiers attached at the bottom.") + +(defvar dev-glyph-order + `((,dev-glyph-cvn . 1) + (,dev-glyph-space . 2) + (,dev-glyph-right-modifier . 3) + (,dev-glyph-left-matra . 3) ;; processed by reference point. + (,dev-glyph-top-matra . 4) + (,(dev-charseq #xc7 #xc8) . 5) + (,(dev-charseq #xc6) . 6) + (,(dev-charseq #xc5) . 7) + (,dev-glyph-bottom-modifier . 8))) + +(mapc + (function (lambda (x) + (mapc + (function (lambda (y) + (put-char-code-property y 'composition-order (cdr x)))) + (car x)))) + dev-glyph-order) + +(mapc + (function (lambda (x) + (put-char-code-property x 'reference-point '(3 . 5)))) + dev-glyph-left-matra) + +(defun devanagari-compose-syllable-string (string) + (with-temp-buffer + (insert (decompose-string string)) + (devanagari-compose-syllable-region (point-min) (point-max)) + (buffer-string))) -;;;###autoload -(defun devanagari-decode-itrans-region (from to) - (interactive "r") - (save-restriction - (narrow-to-region from to) - (indian-decode-itrans-region (point-min) (point-max)) - (devanagari-compose-from-is13194-region (point-min) (point-max)))) +(defun devanagari-compose-syllable-region (from to) + "Compose devanagari syllable in region FROM to TO." + (let ((glyph-str nil) (cons-num 0) glyph-str-list + (last-halant nil) (preceding-r nil) (last-modifier nil) + (last-char (char-before to)) match-str + glyph-block split-pos) + (save-excursion + (save-restriction + ;;; *** char-to-glyph conversion *** + ;; Special rule 1. -- Last halant must be preserved. + (if (eq last-char ?$,16-(B) + (progn + (setq last-halant t) + (narrow-to-region from (1- to))) + (narrow-to-region from to) + ;; note if the last char is modifier. + (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B)) + (setq last-modifier t))) + (goto-char (point-min)) + ;; Special rule 2. -- preceding "r halant" must be modifier. + (when (looking-at "$,15p6-(B.") + (setq preceding-r t) + (goto-char (+ 2 (point)))) + ;; translate the rest characters into glyphs + (while (re-search-forward dev-char-glyph-regexp nil t) + (setq match-str (match-string 0)) + (setq glyph-str + (concat glyph-str + (gethash match-str dev-char-glyph-hash))) + ;; count the number of consonant-glyhs. + (if (string-match devanagari-consonant match-str) + (setq cons-num (1+ cons-num)))) + ;; preceding-r must be attached before the anuswar if exists. + (if preceding-r + (if last-modifier + (setq glyph-str (concat (substring glyph-str 0 -1) + "$,4"'(B" (substring glyph-str -1))) + (setq glyph-str (concat glyph-str "$,4"'(B")))) + (if last-halant (setq glyph-str (concat glyph-str "$,4""(B"))) + ;;; *** glyph-to-glyph conversion *** + (when (string-match dev-glyph-glyph-regexp glyph-str) + (setq glyph-str + (replace-match (gethash (match-string 0 glyph-str) + dev-glyph-glyph-hash) + nil t glyph-str)) + (if (and (> cons-num 1) + (string-match dev-glyph-glyph-2-regexp glyph-str)) + (setq glyph-str + (replace-match (gethash (match-string 0 glyph-str) + dev-glyph-glyph-2-hash) + nil t glyph-str)))) + ;;; *** glyph reordering *** + (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str)) + (setq glyph-block (substring glyph-str 0 (1+ split-pos))) + (setq glyph-str (substring glyph-str (1+ split-pos))) + (setq + glyph-block + (if (string-match dev-glyph-right-modifier-regexp glyph-block) + (sort (string-to-list glyph-block) + (function (lambda (x y) + (< (get-char-code-property x 'composition-order) + (get-char-code-property y 'composition-order))))) + (sort (string-to-list glyph-block) + (function (lambda (x y) + (let ((xo (get-char-code-property x 'composition-order)) + (yo (get-char-code-property y 'composition-order))) + (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) + (setq glyph-str-list (nconc glyph-str-list glyph-block))) + ;; concatenate and attach reference-points. + (setq glyph-str + (cdr + (apply + 'nconc + (mapcar + (function (lambda (x) + (list + (or (get-char-code-property x 'reference-point) + '(5 . 3) ;; default reference point. + ) + x))) + glyph-str-list)))))) + (compose-region from to glyph-str))) -;; (provide 'devan-util) - -;; Local Variables: -;; coding: iso-2022-7bit -;; End: - -;;; devan-util.el ends here
--- a/lisp/language/devanagari.el Sun Nov 25 02:36:28 2001 +0000 +++ b/lisp/language/devanagari.el Sun Nov 25 02:40:11 2001 +0000 @@ -1,9 +1,8 @@ -;;; devanagari.el --- support for Devanagari -*- coding: iso-2022-7bit; -*- +;;; devanagari.el --- Support for Devanagari -*- coding: iso-2022-7bit; -*- -;; Copyright (C) 1996 Free Software Foundation, Inc. +;; Copyright (C) 1996, 2001 Free Software Foundation, Inc. -;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> - +;; Maintainer: KAWABATA, Taichi <batta@beige.ocn.ne.jp> ;; Keywords: multilingual, Indian, Devanagari ;; This file is part of GNU Emacs. @@ -25,481 +24,24 @@ ;;; Commentary: -;; History: -;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> -;; 1997.1.20 fixed some bugs. +;; This file defines language-info of Devanagari script, and provides +;; compatibility support of old implementation of Devanagari script. ;;; Code: -(make-coding-system - 'in-is13194-devanagari 2 ?D - "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)" - '(ascii indian-is13194 nil nil - nil ascii-eol) - '((safe-charsets ascii indian-is13194) - (post-read-conversion . in-is13194-devanagari-post-read-conversion) - (pre-write-conversion . in-is13194-devanagari-pre-write-conversion))) - -(define-coding-system-alias 'devanagari 'in-is13194-devanagari) - (set-language-info-alist - "Devanagari" '((charset indian-is13194 indian-2-column indian-1-column) - (coding-system in-is13194-devanagari) - (coding-priority in-is13194-devanagari) - (input-method . "devanagari-itrans") + "Devanagari" '((charset indian-is13194 mule-unicode-0100-24ff + indian-2-column indian-glyph ;; comment out later + ) + (coding-system in-is13194) + (coding-priority in-is13194) + (input-method . "dev-aiba") (features devan-util) (documentation . "\ Such languages using Devanagari script as Hindi and Marathi are supported in this language environment.")) '("Indian")) -;; -;; Devanagari Glyph List -;; -;; 0 1 2 3 4 5 6 7 8 9 a b c d e f -;;2120 $(5!!!"!#!$!%!&!'!(!)!*!+!,!-!.!/(B -;;2130 $(5!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?(B -;;2140 $(5!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O(B -;;2150 $(5!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_(B -;;2160 $(5!`!a!b!c!d!e!f!g!h!i!j!k!l!m!n!o(B -;;2170 $(5!p!q!r!s!t!u!v!w!x!y!z!{!|!}!~(B -;; -;; 0 1 2 3 4 5 6 7 8 9 a b c d e f -;;2220 $(5"!"""#"$"%"&"'"(")"*"+","-"."/(B -;;2230 $(5"0"1"2"3"4"5"6"7"8"9":";"<"=">"?(B -;;2240 $(5"@"A"B"C"D"E"F"G"H"I"J"K"L"M"N"O(B -;;2250 $(5"P"Q"R"S"T"U"V"W"X"Y"Z"["\"]"^"_(B -;;2260 $(5"`"a"b"c"d"e"f"g"h"i"j"k"l"m"n"o(B -;;2270 $(5"p"q"r"s"t"u"v"w"x"y"z"{"|"}"~(B -;; -;; 0 1 2 3 4 5 6 7 8 9 a b c d e f -;;2320 $(5#!#"###$#%#&#'#(#)#*#+#,#-#.#/(B -;;2330 $(5#0#1#2#3#4#5#6#7#8#9#:#;#<#=#>#?(B -;;2340 $(5#@#A#B#C#D#E#F#G#H#I#J#K#L#M#N#O(B -;;2350 $(5#P#Q#R#S#T#U#V#W#X#Y#Z#[#\#]#^#_(B -;;2360 $(5#`#a#b#c#d#e#f#g#h#i#j#k#l#m#n#o(B -;;2370 $(5#p#q#r#s#t#u#v#w#x#y#z#{#|#}#~(B -;; -;; 0 1 2 3 4 5 6 7 8 9 a b c d e f -;;2420 $(5$!$"$#$$$%$&$'$($)$*$+$,$-$.$/(B -;;2430 $(5$0$1$2$3$4$5$6$7$8$9$:$;$<$=$>$?(B -;;2440 $(5$@$A$B$C$D$E$F$G$H$I$J$K$L$M$N$O(B -;;2450 $(5$P$Q$R$S$T$U$V$W$X$Y$Z$[$\$]$^$_(B -;;2460 $(5$`$a$b$c$d$e$f$g$h$i$j$k$l$m$n$o(B -;;2470 $(5$p$q$r$s$t$u$v$w$x$y$z${$|$}$~(B -;; -;; 0123456789abcdef -;;2120 $(6!!!"!#!$!%!&!'!(!)!*!+!,!-!.!/(B -;;2130 $(6!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?(B -;;2140 $(6!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O(B -;;2150 $(6!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_(B -;;2160 $(6!`!a!b!c!d!e!f!g!h!i!j!k!l!m!n!o(B -;;2170 $(6!p!q!r!s!t!u!v!w!x!y!z!{!|!}!~(B -;; -;; 0123456789abcdef -;;2220 $(6"!"""#"$"%"&"'"(")"*"+","-"."/(B -;;2230 $(6"0"1"2"3"4"5"6"7"8"9":";"<"=">"?(B -;;2240 $(6"@"A"B"C"D"E"F"G"H"I"J"K"L"M"N"O(B -;;2250 $(6"P"Q"R"S"T"U"V"W"X"Y"Z"["\"]"^"_(B -;;2260 $(6"`"a"b"c"d"e"f"g"h"i"j"k"l"m"n"o(B -;;2270 $(6"p"q"r"s"t"u"v"w"x"y"z"{"|"}"~(B -;;2320 $(6#!#"###$#%#&#'#(#)#*#+#,#-#.#/(B -;;2330 $(6#0#1#2#3#4#5#6#7#8#9#:#;#<#=#>#?(B -;;2340 $(6#@#A#B#C#D#E#F#G#H#I#J#K#L#M#N#O(B -;;2350 $(6#P#Q#R#S#T#U#V#W#X#Y#Z#[#\#]#^#_(B -;;2360 $(6#`#a#b#c#d#e#f#g#h#i#j#k#l#m#n#o(B -;;2370 $(6#p#q#r#s#t#u#v#w#x#y#z#{#|#}#~(B -;; -;; 0123456789abcdef -;;2320 $(6$!$"$#$$$%$&$'$($)$*$+$,$-$.$/(B -;;2430 $(6$0$1$2$3$4$5$6$7$8$9$:$;$<$=$>$?(B -;;2440 $(6$@$A$B$C$D$E$F$G$H$I$J$K$L$M$N$O(B -;;2450 $(6$P$Q$R$S$T$U$V$W$X$Y$Z$[$\$]$^$_(B -;;2460 $(6$`$a$b$c$d$e$f$g$h$i$j$k$l$m$n$o(B -;;2470 $(6$p$q$r$s$t$u$v$w$x$y$z${$|$}$~(B -;; -;; -;; Modify the following table if you change the set of 1-column font. -;; -(defconst devanagari-1-column-char - '((?$(5!!(B . ?$(6!!(B) - (?$(5!"(B . ?$(6!"(B) - (?$(5!#(B . ?$(6!#(B) - (?$(5!$(B . nil) - (?$(5!%(B . nil) - (?$(5!&(B . ?$(6!&(B) - (?$(5!'(B . ?$(6!'(B) - (?$(5!((B . ?$(6!((B) - (?$(5!)(B . nil) - (?$(5!*(B . nil) - (?$(5!+(B . nil) - (?$(5!,(B . nil) - (?$(5!-(B . nil) - (?$(5!.(B . nil) - (?$(5!/(B . nil) - (?$(5!0(B . nil) - (?$(5!1(B . nil) - (?$(5!2(B . nil) - (?$(5!3(B . nil) - (?$(5!4(B . nil) - (?$(5!5(B . ?$(6!5(B) - (?$(5!6(B . nil) - (?$(5!7(B . nil) - (?$(5!8(B . nil) - (?$(5!9(B . nil) - (?$(5!:(B . nil) - (?$(5!;(B . nil) - (?$(5!<(B . nil) - (?$(5!=(B . ?$(6!=(B) - (?$(5!>(B . ?$(6!>(B) - (?$(5!?(B . ?$(6!?(B) - (?$(5!@(B . ?$(6!@(B) - (?$(5!A(B . nil) - (?$(5!B(B . ?$(6!B(B) - (?$(5!C(B . ?$(6!C(B) - (?$(5!D(B . ?$(6!D(B) - (?$(5!E(B . ?$(6!E(B) - (?$(5!F(B . ?$(6!F(B) - (?$(5!G(B . ?$(6!G(B) - (?$(5!H(B . ?$(6!H(B) - (?$(5!I(B . nil) - (?$(5!J(B . ?$(6!J(B) - (?$(5!K(B . ?$(6!K(B) - (?$(5!L(B . ?$(6!L(B) - (?$(5!M(B . ?$(6!M(B) - (?$(5!N(B . ?$(6!N(B) - (?$(5!O(B . ?$(6!O(B) - (?$(5!P(B . ?$(6!P(B) - (?$(5!Q(B . nil) - (?$(5!R(B . nil) - (?$(5!S(B . nil) - (?$(5!T(B . ?$(6!T(B) - (?$(5!U(B . nil) - (?$(5!V(B . ?$(6!V(B) - (?$(5!W(B . ?$(6!W(B) - (?$(5!X(B . ?$(6!X(B) - (?$(5!Y(B . nil) - (?$(5!Z(B . ?$(6!Z(B) - (?$(5![(B . ?$(6![(B) - (?$(5!\(B . ?$(6!\(B) - (?$(5!](B . ?$(6!](B) - (?$(5!^(B . ?$(6!^(B) - (?$(5!_(B . ?$(6!_(B) - (?$(5!`(B . ?$(6!`(B) - (?$(5!a(B . ?$(6!a(B) - (?$(5!b(B . ?$(6!b(B) - (?$(5!c(B . ?$(6!c(B) - (?$(5!d(B . ?$(6!d(B) - (?$(5!e(B . ?$(6!e(B) - (?$(5!f(B . ?$(6!f(B) - (?$(5!g(B . ?$(6!g(B) - (?$(5!h(B . ?$(6!h(B) - (?$(5!i(B . ?$(6!i(B) - (?$(5!j(B . ?$(6!j(B) - (nil . nil) - (nil . nil) - (nil . nil) - (nil . nil) - (nil . nil) - (nil . nil) - (?$(5!q(B . ?$(6!q(B) - (?$(5!r(B . ?$(6!r(B) - (?$(5!s(B . ?$(6!s(B) - (?$(5!t(B . ?$(6!t(B) - (?$(5!u(B . ?$(6!u(B) - (?$(5!v(B . ?$(6!v(B) - (?$(5!w(B . ?$(6!w(B) - (?$(5!x(B . ?$(6!x(B) - (?$(5!y(B . ?$(6!y(B) - (?$(5!z(B . ?$(6!z(B) - (nil . nil) - (nil . nil) - (nil . nil) - (nil . nil) - (?$(5"!(B . nil) - (?$(5""(B . nil) - (?$(5"#(B . nil) - (?$(5"$(B . ?$(6"$(B) - (?$(5"%(B . ?$(6"%(B) - (?$(5"&(B . ?$(6"&(B) - (?$(5"'(B . nil) - (?$(5"((B . nil) - (?$(5")(B . nil) - (?$(5"*(B . nil) - (?$(5"+(B . nil) - (?$(5",(B . ?$(6",(B) - (?$(5"-(B . nil) - (?$(5".(B . ?$(6".(B) - (?$(5"/(B . nil) - (?$(5"0(B . nil) - (?$(5"1(B . nil) - (?$(5"2(B . nil) - (?$(5"3(B . ?$(6"3(B) - (?$(5"4(B . ?$(6"4(B) - (?$(5"5(B . ?$(6"5(B) - (?$(5"6(B . ?$(6"6(B) - (?$(5"7(B . nil) - (?$(5"8(B . ?$(6"8(B) - (?$(5"9(B . nil) - (?$(5":(B . ?$(6":(B) - (?$(5";(B . ?$(6";(B) - (?$(5"<(B . ?$(6"<(B) - (?$(5"=(B . nil) - (?$(5">(B . nil) - (?$(5"?(B . nil) - (?$(5"@(B . nil) - (?$(5"A(B . ?$(6"A(B) - (?$(5"B(B . ?$(6"B(B) - (?$(5"C(B . ?$(6"C(B) - (?$(5"D(B . nil) - (?$(5"E(B . ?$(6"E(B) - (?$(5"F(B . ?$(6"F(B) - (?$(5"G(B . ?$(6"G(B) - (?$(5"H(B . ?$(6"H(B) - (?$(5"I(B . ?$(6"I(B) - (?$(5"J(B . ?$(6"J(B) - (?$(5"K(B . ?$(6"K(B) - (?$(5"L(B . ?$(6"L(B) - (?$(5"M(B . ?$(6"M(B) - (?$(5"N(B . ?$(6"N(B) - (?$(5"O(B . nil) - (?$(5"P(B . nil) - (?$(5"Q(B . ?$(6"Q(B) - (?$(5"R(B . nil) - (?$(5"S(B . nil) - (?$(5"T(B . ?$(6"T(B) - (?$(5"U(B . ?$(6"U(B) - (?$(5"V(B . ?$(6"V(B) - (?$(5"W(B . ?$(6"W(B) - (?$(5"X(B . nil) - (?$(5"Y(B . nil) - (?$(5"Z(B . nil) - (?$(5"[(B . nil) - (?$(5"\(B . nil) - (?$(5"](B . ?$(6"](B) - (?$(5"^(B . nil) - (?$(5"_(B . nil) - (?$(5"`(B . ?$(6"`(B) - (?$(5"a(B . ?$(6"a(B) - (?$(5"b(B . ?$(6"b(B) - (?$(5"c(B . ?$(6"c(B) - (?$(5"d(B . ?$(6"d(B) - (?$(5"e(B . ?$(6"e(B) - (?$(5"f(B . ?$(6"f(B) - (?$(5"g(B . ?$(6"g(B) - (?$(5"h(B . ?$(6"h(B) - (?$(5"i(B . ?$(6"i(B) - (?$(5"j(B . ?$(6"j(B) - (?$(5"k(B . ?$(6"k(B) - (?$(5"l(B . ?$(6"l(B) - (?$(5"m(B . ?$(6"m(B) - (?$(5"n(B . nil) - (?$(5"o(B . nil) - (?$(5"p(B . ?$(6"p(B) - (?$(5"q(B . ?$(6"q(B) - (?$(5"r(B . ?$(6"r(B) - (?$(5"s(B . ?$(6"s(B) - (?$(5"t(B . ?$(6"t(B) - (?$(5"u(B . ?$(6"u(B) - (?$(5"v(B . nil) - (?$(5"w(B . nil) - (?$(5"x(B . nil) - (?$(5"y(B . ?$(6"y(B) - (?$(5"z(B . ?$(6"z(B) - (?$(5"{(B . nil) - (?$(5"|(B . nil) - (?$(5"}(B . nil) - (?$(5"~(B . nil) - (?$(5#!(B . nil) - (?$(5#"(B . nil) - (?$(5##(B . nil) - (?$(5#$(B . nil) - (?$(5#%(B . nil) - (?$(5#&(B . nil) - (?$(5#'(B . nil) - (?$(5#((B . nil) - (?$(5#)(B . nil) - (?$(5#*(B . nil) - (?$(5#+(B . nil) - (?$(5#,(B . nil) - (?$(5#-(B . nil) - (?$(5#.(B . nil) - (?$(5#/(B . nil) - (?$(5#0(B . nil) - (?$(5#1(B . nil) - (?$(5#2(B . nil) - (?$(5#3(B . nil) - (?$(5#4(B . nil) - (?$(5#5(B . ?$(6#5(B) - (?$(5#6(B . nil) - (?$(5#7(B . nil) - (?$(5#8(B . nil) - (?$(5#9(B . nil) - (?$(5#:(B . nil) - (?$(5#;(B . nil) - (?$(5#<(B . nil) - (?$(5#=(B . nil) - (?$(5#>(B . nil) - (?$(5#?(B . ?$(6#?(B) - (?$(5#@(B . ?$(6#@(B) - (?$(5#A(B . nil) - (?$(5#B(B . nil) - (?$(5#C(B . nil) - (?$(5#D(B . nil) - (?$(5#E(B . nil) - (?$(5#F(B . nil) - (?$(5#G(B . nil) - (?$(5#H(B . nil) - (?$(5#I(B . nil) - (?$(5#J(B . ?$(6#J(B) - (?$(5#K(B . ?$(6#K(B) - (?$(5#L(B . ?$(6#L(B) - (?$(5#M(B . ?$(6#M(B) - (?$(5#N(B . nil) - (?$(5#O(B . nil) - (?$(5#P(B . nil) - (?$(5#Q(B . nil) - (?$(5#R(B . ?$(6#R(B) - (?$(5#S(B . nil) - (?$(5#T(B . nil) - (?$(5#U(B . nil) - (?$(5#V(B . nil) - (?$(5#W(B . nil) - (?$(5#X(B . nil) - (?$(5#Y(B . nil) - (?$(5#Z(B . nil) - (?$(5#[(B . nil) - (?$(5#\(B . nil) - (?$(5#](B . nil) - (?$(5#^(B . nil) - (?$(5#_(B . nil) - (?$(5#`(B . nil) - (?$(5#a(B . ?$(6#a(B) - (?$(5#b(B . ?$(6#b(B) - (?$(5#c(B . nil) - (?$(5#d(B . nil) - (?$(5#e(B . nil) - (?$(5#f(B . nil) - (?$(5#g(B . nil) - (?$(5#h(B . nil) - (?$(5#i(B . nil) - (?$(5#j(B . ?$(6#j(B) - (?$(5#k(B . ?$(6#k(B) - (?$(5#l(B . ?$(6#l(B) - (?$(5#m(B . nil) - (?$(5#n(B . nil) - (?$(5#o(B . nil) - (?$(5#p(B . nil) - (?$(5#q(B . nil) - (?$(5#r(B . nil) - (?$(5#s(B . nil) - (?$(5#t(B . nil) - (?$(5#u(B . nil) - (?$(5#v(B . nil) - (?$(5#w(B . nil) - (?$(5#x(B . nil) - (?$(5#y(B . nil) - (?$(5#z(B . nil) - (?$(5#{(B . nil) - (?$(5#|(B . nil) - (?$(5#}(B . nil) - (?$(5#~(B . nil) - (?$(5$!(B . nil) - (?$(5$"(B . nil) - (?$(5$#(B . nil) - (?$(5$$(B . nil) - (?$(5$%(B . nil) - (?$(5$&(B . nil) - (?$(5$'(B . nil) - (?$(5$((B . nil) - (?$(5$)(B . nil) - (?$(5$*(B . nil) - (?$(5$+(B . nil) - (?$(5$,(B . nil) - (?$(5$-(B . nil) - (?$(5$.(B . nil) - (?$(5$/(B . nil) - (?$(5$0(B . nil) - (?$(5$1(B . nil) - (?$(5$2(B . nil) - (?$(5$3(B . nil) - (?$(5$4(B . nil) - (?$(5$5(B . nil) - (?$(5$6(B . nil) - (?$(5$7(B . nil) - (?$(5$8(B . nil) - (?$(5$9(B . nil) - (?$(5$:(B . nil) - (?$(5$;(B . nil) - (?$(5$<(B . nil) - (?$(5$=(B . nil) - (?$(5$>(B . nil) - (?$(5$?(B . nil) - (?$(5$@(B . nil) - (?$(5$A(B . ?$(6$A(B) - (?$(5$B(B . nil) - (?$(5$C(B . nil) - (?$(5$D(B . nil) - (?$(5$E(B . ?$(6$E(B) - (?$(5$F(B . nil) - (?$(5$G(B . nil) - (?$(5$H(B . ?$(6$H(B) - (?$(5$I(B . ?$(6$I(B) - (?$(5$J(B . ?$(6$J(B) - (?$(5$K(B . nil) - (?$(5$L(B . nil) - (?$(5$M(B . nil) - (?$(5$N(B . ?$(6$N(B) - (?$(5$O(B . nil) - (?$(5$P(B . ?$(6$P(B) - (?$(5$Q(B . ?$(6$Q(B) - (?$(5$R(B . ?$(6$R(B) - (?$(5$S(B . nil) - (?$(5$T(B . nil) - (?$(5$U(B . nil) - (?$(5$V(B . nil) - (?$(5$W(B . nil) - (?$(5$X(B . nil) - (?$(5$Y(B . nil) - (?$(5$Z(B . nil) - (?$(5$[(B . nil) - (?$(5$\(B . nil) - (?$(5$](B . nil) - (?$(5$^(B . nil) - (?$(5$_(B . nil) - (?$(5$`(B . nil) - (?$(5$a(B . nil) - (?$(5$b(B . nil) - (?$(5$c(B . nil) - (?$(5$d(B . nil) - (?$(5$e(B . nil) - (?$(5$f(B . nil) - (?$(5$g(B . nil) - (?$(5$h(B . ?$(6$h(B) - (?$(5$i(B . ?$(6$i(B) - (?$(5$j(B . ?$(6$j(B) - (?$(5$k(B . nil) - (?$(5$l(B . ?$(6$l(B) - (?$(5$m(B . ?$(6$m(B) - (?$(5$n(B . ?$(6$n(B) - (?$(5$o(B . nil) - (?$(5$p(B . ?$(6$p(B) - (?$(5$q(B . ?$(6$q(B) - (?$(5$r(B . ?$(6$r(B) - (?$(5$s(B . nil) - (?$(5$t(B . nil) - (?$(5$u(B . ?$(6$u(B) - (?$(5$v(B . ?$(6$v(B) - (?$(5$w(B . nil) - (?$(5$x(B . ?$(6$x(B) - (?$(5$y(B . ?$(6$y(B) - (?$(5$z(B . nil) - (?$(5${(B . nil) - (?$(5$|(B . nil) - (?$(5$}(B . nil) - (?$(5$~(B . nil) - )) - (provide 'devanagari) ;;; devanagari.el ends here
--- a/lisp/language/indian.el Sun Nov 25 02:36:28 2001 +0000 +++ b/lisp/language/indian.el Sun Nov 25 02:40:11 2001 +0000 @@ -1,10 +1,9 @@ -;;; indian.el --- support for Indian Languages -*- coding: iso-2022-7bit; -*- - -;; Copyright (C) 1995 Free Software Foundation, Inc. +;;; indian.el --- Indian languages support -*- coding: iso-2022-7bit; -*- -;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> +;; Copyright (C) 1999, 2001 Free Software Foundation, Inc. -;; Keywords: multilingual, Indian +;; Maintainer: KAWABATA, Taichi <batta@beige.ocn.ne.jp> +;; Keywords: multilingual, Indian ;; This file is part of GNU Emacs. @@ -25,290 +24,95 @@ ;;; Commentary: -;; History: -;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> - -;; For Indian, the character set IS 13194 is supported. -;; -;; IS 13194 does not specifically assign glyphs for each characters. -;; Following code is not specific to each Indian language. -;; -;; Eventually, this code will support generic information about -;; following scripts. -;; -;; Devanagari -;; Bengali -;; Gurmukhi -;; Gujarati -;; Oriya -;; Tamil -;; Telgu -;; Kannada -;; Malayalam -;; -;; In this file, charsets other than charset-ascii and charset-indian-is13194 -;; should not be used except in the comment. +;; This file defines in-is13194 coding system and relationship between +;; indian-glyph character-set and various CDAC fonts. ;;; Code: -;; Followings are what you see when you refer to the Emacs -;; representations of IS 13194 charcters. However, this is merely -;; tentative apperance, and you must convert them by -;; indian-to-xxxxxx(specific script) function to use them. -;; Devanagari is not an exception of this rule. - -;; 0xa0 //(5!"#$%&'()*+,-./(B -;; 0xb0 (50123456789:;<=>?(B -;; 0xc0 (5@ABCDEFGHIJKLMNO(B -;; 0xd0 (5PQRSTUVWXYZ[\]^_(B -;; 0xe0 (5`abcdefghijklmno(B -;; 0xf0 (5pqrstuvwxyz{|}~(B// - -;; Note - In IS 13194, several symbols are obtained by special -;; combination of several characters and Nukta sign. -;; -;; Sanskrit Vowel R -> (5*(B + (5i(B -;; Sanskrit Vowel L -> (5&(B + (5i(B -;; Sanskrit Vowel LL -> (5'(B + (5i(B -;; Sanskrit Avagrah -> (5j(B + (5i(B -;; OM -> (5!(B + (5i(B -;; -;; Note - IS 13194 defines ATR(0xEF) and EXT(0xF0), but they are -;; not used in Emacs. -;; -;; Note - the above characters DO NOT represent any script. For -;; example, if you want to obtain Devanagari character, you must do -;; something like the following. -;; -;; (char-to-string (indian-to-devanagari ?(5$(B)) -;; "$(5!$(B" - -;;; ITRANS -;; -;; ITRANS is one of the most popular method to exchange indian scripts -;; electronically. Here is the table to convert between ITRANS code and -;; IS 13194 code. +(make-coding-system + 'in-is13194 2 ?D + "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)" + '(ascii indian-is13194 nil nil + nil ascii-eol) + '((safe-charsets ascii indian-is13194) + (post-read-conversion . in-is13194-post-read-conversion) + (pre-write-conversion . in-is13194-pre-write-conversion))) -(defvar indian-itrans-consonant-alist - '( - ("k" . "(53(B") - ("kh" . "(54(B") - ("g" . "(55(B") - ("gh" . "(56(B") - ("N^" . "(57(B") - ("ch" . "(58(B") - ("chh" . "(59(B") - ("j" . "(5:(B") - ("jh" . "(5;(B") - ("JN" . "(5<(B") - ("T" . "(5=(B") - ("Th" . "(5>(B") - ("D" . "(5?(B") - ("Dh" . "(5@(B") - ("N" . "(5A(B") - ("t" . "(5B(B") - ("th" . "(5C(B") - ("d" . "(5D(B") - ("dh" . "(5E(B") - ("n" . "(5F(B") - ("nh" . "(5G(B") ; For transcription of non-Devanagari Languages. - ("p" . "(5H(B") - ("ph" . "(5I(B") - ("b" . "(5J(B") - ("bh" . "(5K(B") - ("m" . "(5L(B") - ("y" . "(5M(B") - ("yh" . "(5N(B") ; For transcription of non-Devanagari Languages. - ("r" . "(5O(B") - ("rh" . "(5P(B") ; For transcription of non-Devanagari Languages. - ("l" . "(5Q(B") - ("v" . "(5T(B") - ("sh" . "(5U(B") - ("shh" . "(5V(B") - ("s" . "(5W(B") - ("h" . "(5X(B") - ("ld" . "(5R(B") - ("L" . "(5R(B") - ("ksh" . "$(5!3!h!V(B") - ("GY" . "***GY***") ; Must check out later. - ;; special consonants - ("q" . "(53i(B") - ("K" . "(54i(B") - ("G" . "(55i(B") - ("z" . "(5:i(B") - ("f" . "(5Ii(B") - (".D" . "(5?i(B") - (".Dh" . "(5@i(B") - )) +(defvar indian-script-table + '[ + devanagari + sanskrit + bengali + tamil + telugu + assamese + oriya + kannada + malayalam + gujarati + punjabi + ] + "Vector of Indian script names.") -(defvar indian-itrans-vowel-sign-alist - '( - ;; Special treatment unique to IS 13194 Transliteration - ("" . "(5h(B") - ("a" . "") - ;; Matra (Vowel Sign) - ("aa" . "(5Z(B") - ("A" . "(5Z(B") - ("i" . "(5[(B") - ("ii" . "(5\(B") - ("I" . "(5\(B") - ("u" . "(5](B") - ("uu" . "(5^(B") - ("U" . "(5^(B") - ("R^i" . "(5_(B") ; These must be checked out later. - ("R^I" . "(5_i(B") - ("L^i" . "(5[i(B") - ("L^I" . "(5\i(B") - ("E" . "(5`(B") ; For transcription of non-Devanangri Languages. - ("e" . "(5a(B") - ("ai" . "(5b(B") - ;; ("e.c" . "(5c(B") ; Tentatively suppressed. - ("O" . "(5d(B") ; For transcription of non-Devanagari Languages. - ("o" . "(5e(B") - ("au" . "(5f(B") - ;; ("o.c" . "(5g(B") ; Tentatively suppressed. - )) +(let ((len (length indian-script-table)) + (i 0)) + (while (< i len) + (put (aref indian-script-table i) 'indian-glyph-code-offset (* 256 i)) + (setq i (1+ i)))) -;; -;; Independent vowels and other signs. -;; +(defvar indian-default-script 'devanagari + "Default script for Indian languages. +Each Indian language environment sets this value +to one of `indian-script-table' (which see). +The default value is `devanagari'.") -(defvar indian-itrans-other-letters-alist - '( - ("a" . "(5$(B") - ("aa" . "(5%(B") - ("A" . "(5%(B") - ("i" . "(5&(B") - ("ii" . "(5'(B") - ("I" . "(5'(B") - ("u" . "(5((B") - ("uu" . "(5)(B") - ("U" . "(5)(B") - ("R^i" . "(5*(B") - ("R^I" . "(5*i(B") - ("L^i" . "(5&i(B") - ("L^I" . "(5'i(B") - ("E" . "(5+(B") ; For transcription of non-Devanagari Languages. - ("e" . "(5,(B") - ("ai" . "(5-(B") - ;; ("e.c" . "(5.(B") ; Candra E - ("O" . "(5/(B") ; For transcription of non-Devanagari Languages. - ("o" . "(50(B") - ("au" . "(51(B") - ;; ("o.c" . "(52(B") ; Candra O - ("M" . "(5$(B") - ("H" . "(5#(B") - ("AUM" . "(5!i(B") - ("OM" . "(5!i(B") - (".r" . "(5Oh(B") - (".n" . "(5"(B") - (".N" . "(5!(B") - (".h" . "(5h(B") ; Halant - (".." . "(5j(B") - (".a" . "(5ji(B") ; Avagrah - ("0" . "(5q(B") - ("1" . "(5r(B") - ("2" . "(5s(B") - ("3" . "(5t(B") - ("4" . "(5u(B") - ("5" . "(5v(B") - ("6" . "(5w(B") - ("7" . "(5x(B") - ("8" . "(5y(B") - ("9" . "(5z(B") - )) +(defun indian-glyph-char (index &optional script) + "Return character of charset `indian-glyph' made from glyph index INDEX. +The variable `indian-default-script' specifies the script of the glyph. +Optional argument SCRIPT, if non-nil, overrides `indian-default-script'. +See also the function `indian-char-glyph'." + (or script + (setq script indian-default-script)) + (let ((offset (get script 'indian-glyph-code-offset))) + (or (integerp offset) + (error "Invalid script name: %s" script)) + (or (and (>= index 0) (< index 256)) + (error "Invalid glyph index: %d" index)) + (setq index (+ offset index)) + (make-char 'indian-glyph (+ (/ index 96) 32) (+ (% index 96) 32)))) -;; Regular expression matching single Indian character represented -;; by ITRANS. - -(defvar indian-itrans-regexp - (let ((consonant "\\([cs]hh?\\)\\|[kgjTDnpbyr]h?\\|\\(N\\^?\\)\\|\\(jN\\)\\|[mvqKGzfs]\\|\\(ld?\\)\\|\\(ksh\\)\\|\\(GY\\)\\|\\(\\.Dh?\\)") - (vowel "\\(a[aiu]\\)\\|\\(ii\\)\\|\\(uu\\)\\|\\([RL]\\^[iI]\\)\\|[AIEOeoaiu]") - (misc "[MH0-9]\\|\\(AUM\\)\\|\\(OM\\)\\|\\(\\.[rnNh\\.a]\\)") - (lpre "\\(") (rpre "\\)") (orre "\\|")) - (concat lpre misc rpre orre - lpre lpre consonant rpre "?" lpre vowel rpre rpre orre - lpre consonant rpre ))) - -;; -;; Regular expression matching single ITRANS unit for IS 13194 characters. -;; +(defvar indian-glyph-max-char + (indian-glyph-char + 255 (aref indian-script-table (1- (length indian-script-table)))) + "The maximum valid code of characters in the charset `indian-glyph'") -(defvar itrans-indian-regexp - (let ((vowel "[(5$(B-(52(B]") - (consonant "[(53(B-(5X(B]") - (matra "[(5Z(B-(5g(B]") - (misc "[(5q(B-(5z(B]") - (lpre "\\(") (rpre "\\)") (orre "\\|")) - (concat misc orre - lpre consonant matra "?" rpre orre - vowel))) - -;; -;; IS13194 - ITRANS conversion table for string matching above regexp. -;; - -(defvar indian-itrans-alist - (let ((cl indian-itrans-consonant-alist) - (ml indian-itrans-other-letters-alist) rules) - (while cl - (let ((vl indian-itrans-vowel-sign-alist)) - (while vl - (setq rules - (cons (cons (concat (car (car cl)) (car (car vl))) - (concat (cdr (car cl)) (cdr (car vl)))) - rules)) - (setq vl (cdr vl)))) - (setq cl (cdr cl))) - (while ml - (setq rules (cons (cons (car (car ml)) - (cdr (car ml))) - rules)) - (setq ml (cdr ml))) - rules)) - -;; -;; Utility program to convert from ITRANS to IS 13194 in specified region. -;; +(defun indian-char-glyph (char) + "Return information about the glphy code for CHAR of `indian-glyph' charset. +The value is (INDEX . SCRIPT), where INDEX is the glyph index +in the font that Indian script name SCRIPT specifies. +See also the function `indian-glyph-char'." + (let ((split (split-char char)) + code) + (or (eq (car split) 'indian-glyph) + (error "Charset of `%c' is not indian-glyph" char)) + (or (<= char indian-glyph-max-char) + (error "Invalid indian-glyph char: %d" char)) + (setq code (+ (* (- (nth 1 split) 32) 96) (nth 2 split) -32)) + (cons (% code 256) (aref indian-script-table (/ code 256))))) -(defun indian-decode-itrans-region (from to) - "Convert `ITRANS' mnemonics of the current region to Indian characters. -When called from a program, expects two arguments, -positions (integers or markers) specifying the stretch of the region." - (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) - (while (re-search-forward indian-itrans-regexp nil t) - (let* ((itrans (buffer-substring (match-beginning 0) (match-end 0))) - (ch (cdr (assoc itrans indian-itrans-alist)))) - (if ch - (progn - (delete-region (match-beginning 0) (match-end 0)) - (insert ch))))) - (goto-char (point-min)) - (while (re-search-forward "\\((5h(B\\)[^\\c0]" nil t) - (delete-region (match-beginning 1) (match-end 1))))) +(define-ccl-program ccl-encode-indian-glyph-font + `(0 + ;; Shorten (r1 = (((((r1 - 32) * 96) + r2) - 32) % 256)) + (r1 = ((((r1 * 96) + r2) - ,(+ (* 32 96) 32)) % 256)))) -;; -;; Utility program to convert from IS 13194 to ITRANS in specified region. -;; +(setq font-ccl-encoder-alist + (cons (cons "-CDAC" 'ccl-encode-indian-glyph-font) + font-ccl-encoder-alist)) -(defun indian-encode-itrans-region (from to) - "Convert indian region to ITRANS mnemonics." - (interactive "r") - (save-restriction - (narrow-to-region from to) - (goto-char (point-min)) - (while (re-search-forward itrans-indian-regexp nil t) - (let* ((indian (buffer-substring (match-beginning 0) (match-end 0))) - (ch (car (rassoc indian indian-itrans-alist)))) - (if ch - (progn - (delete-region (match-beginning 0) (match-end 0)) - (insert ch))))) - (goto-char (point-min)))) +(setq font-ccl-encoder-alist + (cons '("ISO10646.*-1" . ccl-encode-unicode-font) + font-ccl-encoder-alist)) (provide 'indian) - + ;;; indian.el ends here