changeset 17087:fb13faeea9aa

Fix handling of several characters.
author Kenichi Handa <handa@m17n.org>
date Wed, 26 Feb 1997 12:39:10 +0000
parents a210bd246871
children 76d2eb70aa0b
files lisp/language/devan-util.el
diffstat 1 files changed, 126 insertions(+), 87 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/language/devan-util.el	Wed Feb 26 12:39:10 1997 +0000
+++ b/lisp/language/devan-util.el	Wed Feb 26 12:39:10 1997 +0000
@@ -27,6 +27,7 @@
 
 ;; History:
 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
+;; 1997.1.20 fixed some bugs.
 
 ;; Devanagari script composition rules and related programs.
 
@@ -104,7 +105,7 @@
 ;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable
 ;; Vowel-Syllable ::= V[D]
 ;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D]
-;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] Pure-Cons
+;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] [Pure-Cons] Pure-Cons
 ;; Pure-Cons ::= Full-Cons H
 ;; Full-Cons ::= C [N]
 ;;
@@ -113,25 +114,30 @@
 ;; C - Consonant ($(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E(B
 ;;                $(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X(B)
 ;; N - Nukta ($(5!i(B)
-;; H - Halant($(5!h(B)
-;; V - Vowel ($(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2(B)
-;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu, Visarg  ($(5!!!"(B)
-;; M - Matra ($(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g(B)
+;; H - Halant($(5!h(B) or Virama
+;; V - Vowel ($(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2#&#'#*(B)
+;;     ("$(5#&#'#*(B" can be obtained by IS13194 vowels with nukta.)
+;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu, Visarga  ($(5!!!"!#(B)
+;; M - Matra ($(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g#K#L#M(B)
+;;     ("$(5#K#L#M(B" can be obtained by IS13194 matras with nukta.)
 ;;
 ;; In Emacs, one syllable of Indian language is considered to be one 
 ;; composite glyph.  If we expand the above expression, it would be:
 ;;
-;; [[C [N] H] [C [N] H] C [N] H] C [N] [M] [D] | V [D]
+;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] [D] | V [D]
 ;; 
 ;; Therefore, in worst case, the consonant syllabe will consist of
 ;; following characters.
 ;;
-;; C N H C N H C N H C N M D
+;; C N H C N H C N H C N H C N M D
+;;
+;; The example is a sanskrit word "kaurtsnya", where five consecutive
+;; consonant appears.
 ;;
 ;; On the other hand, incomplete consonant syllable before inputting
 ;; base consonant must satisfy the following condition:
 ;;
-;; [C [N] H] [C [N] H] C [N] H
+;; [C [N] H] [C [N] H] [C [N] H] C [N] H
 ;;
 ;; This is acceptable BEFORE proper consonant-syllable is input.  The
 ;; string which doesn't match with the above expression is invalid and
@@ -141,21 +147,21 @@
 ;; Third case can be considered, which is acceptable syllable and can
 ;; not add any code more.
 ;;
-;; [[C [N] H] [C [N] H] C [N] H] C [N] [M] D
+;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] D
 ;;
 ;; However, to make editing possible even in this condition, we will
 ;; not consider about this case.
 
 (defconst devanagari-cons-syllable-examine
-  "\\(\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?[$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?[$(5!3(B-$(5!X(B]$(5!i(B?[$(5!Z(B-$(5!g(B]?[$(5!!!"(B]?"
+  "\\(\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?[$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?[$(5!3(B-$(5!X(B]$(5!i(B?\\([$(5!Z(B-$(5!g#K#L#M(B]\\|\\($(5!_!i(B\\)\\|\\($(5![!i(B\\)\\|\\($(5!\!i(B\\)\\)?[$(5!!!"!#(B]?"
   "Regexp matching to one Devanagari consonant syllable.")
 
 (defconst devanagari-cons-syllable-incomplete-examine
-  "\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?[$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B$"
+  "\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?\\([$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B\\)?[$(5!3(B-$(5!X(B]$(5!i(B?$(5!h(B$"
   "Regexp matching to one Devanagari incomplete consonant syllable.")
 
 (defconst devanagari-vowel-syllable-examine
-  "[$(5!$(B-$(5!2(B][$(5!!!"!#(B]?"
+  "\\([$(5!$(B-$(5!2#&#'#*(B]\\|\\($(5!*!i(B\\)\\|\\($(5!&!i(B\\)\\|\\($(5!'!i(B\\)\\)[$(5!!!"!#(B]?"
   "Regexp matching to one Devanagari vowel syllable.")
 
 ;;
@@ -167,7 +173,7 @@
 (defconst devanagari-digit-viram-examine 
   "[$(5!q(B-$(5!z!j(B]")
 (defconst devanagari-other-sign-examine
-  "[$(5!!!j(B]$(5!i(B")
+  "\\([$(5!!!j(B]$(5!i(B\\)\\|\\([$(5#!#J(B]\\)")
 
 (defconst devanagari-composite-glyph-unit-examine
   (concat "\\(" devanagari-cons-syllable-incomplete-examine 
@@ -242,6 +248,16 @@
     ;; ("[^$(5!h(B]\\($(5!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"p(B")
     ("^\\($(5!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"p(B")
 
+    ;; Half Form Ligature
+    ;; Here is the half-form ligature which has higher priority than
+    ;; the common ligature rules listed below.
+    ;; special forms.
+    ("\\($(5!3!h!V!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"l(B")
+    ("\\($(5!:!h!<!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"m(B")
+    ;; Ordinary forms.
+    ("\\($(5!B!h!B!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"c(B")
+    ("\\($(5!F!h!F!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"k(B")
+
     ;; If "r" is preceded by the vowel-suppressed consonant
     ;; (especially those with vertical line), it will be written as
     ;; slanted line below the preceding consonant character.  Some of
@@ -250,12 +266,15 @@
     ("\\($(5!:!i!h!O(B\\)" . "$(5"!(B")
     ("\\($(5!I!i!h!O(B\\)" . "$(5""(B")
     ("\\($(5!3!h!O(B\\)" . "$(5"#(B")
-    ("\\($(5!:!h!O(B\\)" . "$(5"$(B")
+    ("\\($(5!5!h!O(B\\)" . "$(5"$(B")
     ("\\($(5!B!h!O(B\\)" . "$(5"%(B")
     ("\\($(5!H!h!O(B\\)" . "$(5"&(B")
     ("\\($(5!I!h!O(B\\)" . "$(5"'(B")
-    ("\\($(5!U!h!O(B\\)" . "$(5"((B")
-    ("\\($(5!W!h!O(B\\)" . "$(5")(B")
+    ("\\($(5!U!h!O(B\\)" . "$(5")(B")
+
+    ;; Special Rules
+    ;; In the following case, "$(5!<!h!:(B" ligature does not occur.
+    ("\\($(5!<!h(B\\)$(5!:!h!<!h(B" . "$(5"<(B")
 
     ;; Ligature Rules 
     ("\\($(5!3!h!B!h!O!h!M(B\\)" . "$(5$!(B")
@@ -294,7 +313,7 @@
     ("\\($(5!8!h!<(B\\)" . "$(5$B(B")
     ("\\($(5!9!h!M(B\\)" . "$(5$C(B")
     ("\\($(5!:!h!O(B\\)" . "$(5$D(B")
-    ("\\($(5!:!h!h(B\\)" . "$(5$E(B")
+    ("\\($(5!:!h!<(B\\)" . "$(5$E(B")
     ("\\($(5!<!h!8(B\\)" . "$(5$F(B")
     ("\\($(5!<!h!:(B\\)" . "$(5$G(B")
     ("\\($(5!=!h!3(B\\)" . "$(5$H(B")
@@ -372,14 +391,10 @@
     ;; connection which is not listed here has not been examined yet.
     ;; I don't know what to do with them.
     ;;
-    ;; special forms 
-    ("\\($(5!3!h!V!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"l(B")
-    ("\\($(5!:!h!<!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"m(B")
     ;; ordinary forms 
     ("\\($(5!5!h!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"`(B")
     ("\\($(5!6!h!F!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"a(B")
     ;; ("\\($(5!<!h!8!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"c(B") ; Mistake, must check later.
-    ("\\($(5!B!h!B!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"c(B")
     ("\\($(5!B!h!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"d(B")
     ("\\($(5!E!h!F!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"e(B")
     ("\\($(5!E!h!O!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"f(B")
@@ -400,8 +415,16 @@
     ;; have the vertical line (such as "$(5!?(B"), "$(5"r(B" is put beneath the
     ;; consonant.
     ;;
-    ;; ("cons-not-yet-listed-up\\($(5!h!O(B\\)" . "$(5"q(B")
     ("[$(5!7!9!=!>!?!@!D!O!P!R!S!X(B]\\($(5!h!O(B\\)" . "$(5"r(B")
+    ("\\($(5!J!h!O(B\\)" . "$(5!J"r(B") ; Protect from Half form conversion.
+    ("\\($(5!E!h!O(B\\)" . "$(5!E"r(B") ; Will be replaced with precomposed font.
+    ("\\($(5!6!h!O(B\\)" . "$(5!6"r(B")
+    ("\\($(5!K!h!O(B\\)" . "$(5!K"r(B")
+    ("\\($(5!T!h!O(B\\)" . "$(5!T"r(B")
+    ("\\($(5!L!h!O(B\\)" . "$(5!L"r(B")
+    ("\\($(5!7!h!5!h!O(B\\)" . "$(5$;"r(B") ; Ggr
+    ("\\($(5!7!h!3!h!O(B\\)" . "$(5$9"r(B") ; Gkr
+
     ("$(5!?!i(B\\($(5!h!O(B\\)" . "$(5"r(B")
     ("$(5!@!i(B\\($(5!h!O(B\\)" . "$(5"r(B")
 
@@ -410,6 +433,9 @@
     ("\\($(5!&!i(B\\)" . "$(5#&(B")
     ("\\($(5!'!i(B\\)" . "$(5#'(B")
     ("\\($(5!*!i(B\\)" . "$(5#*(B")
+    ("\\($(5![!i(B\\)" . "$(5#L(B")
+    ("\\($(5!\!i(B\\)" . "$(5#M(B")
+    ("\\($(5!_!i(B\\)" . "$(5#K(B")
     ("\\($(5!3!i(B\\)" . "$(5#3(B")
     ("\\($(5!4!i(B\\)" . "$(5#4(B")
     ("\\($(5!5!i(B\\)" . "$(5#5(B")
@@ -448,6 +474,11 @@
     ("\\($(5!U!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"U(B")
     ("\\($(5!V!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"V(B")
     ("\\($(5!W!h(B\\)[$(5!3(B-$(5!X(B]" . "$(5"W(B")
+
+    ;; Special rule for "rR"
+    ("\\($(5!O!_(B\\)" . "$(5!*"p(B")
+    ;; If everything fails, "y" will connect to the front consonant.
+    ("\\($(5!h!M(B\\)" . "$(5"](B")
     )
   "Alist of regexps of Devanagari character sequences vs composed characters.")
 
@@ -546,11 +577,16 @@
 ;; Glyphs will be ordered from low priority number to high priority number.
 ;; If application-priority is omitted, it is assumed to be 0.
 ;; If application-direction is omitted, it is asumbed to be '(mr . ml).
+;;
+;; Priority
+;;          Base Glyphs = {$(5!h!i(B} = Misc > 
+;;          {$(5"p"q"r(B} > Matras > {$(5!!!"!#(B}
+;; Question Halant and '$(5"q"r(B' priority problem.
 
 (defconst devanagari-composition-rules
-  '((?$(5!!(B 60 (tr . br))
-    (?$(5!"(B 60 (tr . br))
-    (?$(5!#(B 60)
+  '((?$(5!!(B 70 (tr . br))
+    (?$(5!"(B 70 (mr . mr))
+    (?$(5!#(B 70)
     (?$(5!$(B 0)
     (?$(5!%(B 0)
     (?$(5!&(B 0)
@@ -611,16 +647,16 @@
     (?$(5!](B 40 (bc . tc))
     (?$(5!^(B 40 (bc . tc))
     (?$(5!_(B 40 (bc . tc))
-    (?$(5!`(B 40 (tc . bc))
-    (?$(5!a(B 40 (tc . bc))
-    (?$(5!b(B 40 (tc . bc))
-    (?$(5!c(B 40 (tc . bc))
+    (?$(5!`(B 40 (mr . mr))  ; (tc . bc)
+    (?$(5!a(B 40 (mr . mr))
+    (?$(5!b(B 40 (mr . mr))
+    (?$(5!c(B 40 (mr . mr))
     (?$(5!d(B 40)
     (?$(5!e(B 40)
     (?$(5!f(B 40)
     (?$(5!g(B 40)
-    (?$(5!h(B 0 (br . tr))			; Halant's special treatment.
-    (?$(5!i(B 0 (br . tr))			; Nukta's special treatment.
+    (?$(5!h(B 0 (br . tr))
+    (?$(5!i(B 0 (br . tr))
     (?$(5!j(B 0)
     (nil 0)
     (nil 0)
@@ -721,9 +757,9 @@
     (?$(5"m(B 0)
     (?$(5"n(B 0)
     (?$(5"o(B 0)
-    (?$(5"p(B 20 (tr . br))
-    (?$(5"q(B 20 (br . tr))
-    (?$(5"r(B 20 (br . tr))
+    (?$(5"p(B 30 (mr . mr))
+    (?$(5"q(B 30 (br . tr))
+    (?$(5"r(B 30 (br . tr))
     (?$(5"s(B 0)
     (?$(5"t(B 0)
     (?$(5"u(B 0)
@@ -778,9 +814,9 @@
     (?$(5#H(B 0)
     (?$(5#I(B 0)
     (?$(5#J(B 0)
-    (?$(5#K(B 0)
-    (?$(5#L(B 0)
-    (?$(5#M(B 0)
+    (?$(5#K(B 40 (bc . tc))
+    (?$(5#L(B 40 (bc . tc))
+    (?$(5#M(B 40 (bc . tc))
     (?$(5#N(B 0)
     (?$(5#O(B 0)
     (?$(5#P(B 0)
@@ -929,10 +965,6 @@
 ;; Determine composition priority and rule of the array of Glyphs.
 ;; Sort the glyphs with their priority.
 
-;; Example:
-;;(devanagari-reorder-glyph-for-composition '[?$(5"5(B ?$(5!X(B ?$(5![(B])
-;;	=> ((446680 0) (446773 0) (446683 50 (ml . mr)))
-
 (defun devanagari-reorder-glyph-for-composition (glyph-alist)
   (let* ((pos 0)
 	 (ordered-glyphs '()))
@@ -966,6 +998,19 @@
     (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list))
       (apply 'compose-chars cmp-glyph-list))))
 
+;; Utility function for Phase 2.5
+;; Check whether given glyph is a Devanagari vertical modifier or not.
+;; If it is a vertical modifier, whether it should be 1-column shape or not
+;; depends on previous non-vertical modifier.
+   ; return nil if it is not vertical modifier.
+(defun devanagari-vertical-modifier-p (glyph)
+  (string-match (char-to-string glyph)
+		"[$(5!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M(B]"))
+
+(defun devanagari-non-vertical-modifier-p (glyph)
+  (string-match (char-to-string glyph)
+		"[$(5!Z![!\!d!e!f!g(B]"))
+
 
 ;;
 ;;    Phase 2.5  Convert Appropriate Character to 1-column shape.
@@ -981,56 +1026,50 @@
 ;;   with 2 column base-glyph.
 ;;
 ;; Execution Examples
-;;(devanagari-wide-to-narrow '(446680 446773 (ml . mr) 446683))
-;;(devanagari-wide-to-narrow '(?$(5!6(B (ml . ml) 446773 (tc . mr) 446683))
+;;(devanagari-wide-to-narrow '(?$(5!3(B (ml . ml) ?$(5!a(B))
+;;(devanagari-wide-to-narrow '(?$(5!F(B (ml . ml) ?$(5!a(B))
+
+;(defun devanagari-wide-to-narrow (src-list)
+;  (if (null src-list) '()
+;    (cons 
+;     (if (and (numberp (car src-list))
+;	      (cdr (assq (car src-list) devanagari-1-column-char)))
+;	 (cdr (assq (car src-list) devanagari-1-column-char))
+;       (car src-list))
+;     (devanagari-wide-to-narrow (cdr src-list)))))
 
 (defun devanagari-wide-to-narrow (src-list)
-  (if (null src-list) '()
-    (cons 
-     (if (and (numberp (car src-list))
-	      (cdr (assq (car src-list) devanagari-1-column-char)))
-	 (cdr (assq (car src-list) devanagari-1-column-char))
-       (car src-list))
-     (devanagari-wide-to-narrow (cdr src-list)))))
-
-;; Make this function obsolete temporary Because now Emacs supports
-;; attaching 1 column character at the center 2 column char.  However,
-;; there are still problems attempting to attach Halant or Nukta sign
-;; at the non-vowel consonant.  This problem can not be solved until
-;; Emacs supports attaching the glyph at `temporary-preserved metric'.
+  (devanagari-wide-to-narrow-iter src-list t))
 
-(defun devanagari-wide-to-narrow-old (src-list)
-  (if (null src-list) (progn (error "devanagari-wide-to-narrow error") nil)
-    (let* ((base-glyph (cdr (assq (car src-list) devanagari-1-column-char)))
-	   (wide-base-glyph nil)
-	   (apply-glyph-list (cdr src-list)))
-      (if (null base-glyph)
-	  (progn 
-	    (setq wide-base-glyph t)
-	    (setq base-glyph (car src-list))))
-      (cons base-glyph
-	    (devanagari-wide-to-narrow-iter apply-glyph-list wide-base-glyph))
-      )))
+(defun devanagari-wide-to-narrow-iter (src-list wide-p)
+  (let ((glyph (car src-list)))
+    (cond ((null src-list) '())
+	  ; not glyph code
+	  ((not (numberp glyph)) 
+	   (cons glyph (devanagari-wide-to-narrow-iter (cdr src-list) wide-p)))
+	  ; vertical modifier glyph
+	  ((devanagari-vertical-modifier-p glyph)
+	   (if (and (null wide-p)
+		    (cdr (assq glyph devanagari-1-column-char)))
+	       (cons (cdr (assq glyph devanagari-1-column-char))
+		     (devanagari-wide-to-narrow-iter (cdr src-list) nil))
+	       (cons glyph
+		     (devanagari-wide-to-narrow-iter (cdr src-list) t))))
+	  ; nonvertical modifier glyph
+	  ((devanagari-non-vertical-modifier-p glyph)
+	   (if (cdr (assq glyph devanagari-1-column-char))
+	       (cons (cdr (assq glyph devanagari-1-column-char))
+		     (devanagari-wide-to-narrow-iter (cdr src-list) wide-p))
+	       (cons glyph
+		     (devanagari-wide-to-narrow-iter (cdr src-list) wide-p))))
+	  ; normal glyph
+	  (t
+	   (if (cdr (assq glyph devanagari-1-column-char))
+	       (cons (cdr (assq glyph devanagari-1-column-char))
+		     (devanagari-wide-to-narrow-iter (cdr src-list) nil))
+	       (cons glyph
+		     (devanagari-wide-to-narrow-iter (cdr src-list) t)))))))
 
-;; Convert apply-glyph-list from 2-column to 1-column.
-;;   wide-base-glyph is t when base-glyph is 2-column.
-;;   When apply-glyph is put at the top or bottom of 2-column base-glyph,
-;;   they must be 2-column glyph, too.  Otherwise, they will be 
-;;   converted to 1-column glyph if possible.
-
-(defun devanagari-wide-to-narrow-iter (apply-glyph-list wide-base-glyph)
-  (if (< (length apply-glyph-list) 2) '()
-    (let* ((apply-dir    (car apply-glyph-list))
-	   (apply-glyph  (car (cdr apply-glyph-list)))
-	   (apply-rest   (cdr (cdr apply-glyph-list)))
-	   (put-t-or-b   (member (car apply-dir) '(tl tc tr bl bc br)))
-	   (narrow-glyph (cdr (assq apply-glyph devanagari-1-column-char))))
-      (append 
-       (list apply-dir
-	     (if (or (and wide-base-glyph put-t-or-b)
-		     (null narrow-glyph))
-		 apply-glyph narrow-glyph))
-       (devanagari-wide-to-narrow-iter apply-rest wide-base-glyph)))))
 
 ;;
 ;; Summary