changeset 109356:ef27d1a4f241

Improve Hebrew rendering.
author Kenichi Handa <handa@etlken>
date Mon, 12 Jul 2010 14:26:57 +0900
parents 92a95ad5c098 (current diff) 09daf1538316 (diff)
children 3cfd17e4be81
files
diffstat 4 files changed, 169 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Mon Jul 12 11:31:36 2010 +0900
+++ b/lisp/ChangeLog	Mon Jul 12 14:26:57 2010 +0900
@@ -1,3 +1,11 @@
+2010-07-12  Kenichi Handa  <handa@m17n.org>
+
+	* language/hebrew.el: Remove no-byte-compile declaration.  Change
+	coding: tag to utf-8.  Register hebrew-shape-gstring in
+	composition-function-table for 3-character looking back.
+	(hebrew-font-get-precomposed): New function.
+	(hebrew-shape-gstring): Utilize precomposed glyphs if available.
+
 2010-07-11  Chong Yidong  <cyd@stupidchicken.com>
 
 	* mouse.el (mouse-drag-track): Handle select-active-regions
--- a/lisp/language/hebrew.el	Mon Jul 12 11:31:36 2010 +0900
+++ b/lisp/language/hebrew.el	Mon Jul 12 14:26:57 2010 +0900
@@ -1,4 +1,4 @@
-;;; hebrew.el --- support for Hebrew -*- coding: iso-2022-7bit; no-byte-compile: t -*-
+;;; hebrew.el --- support for Hebrew -*- coding: utf-8 -*-
 
 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
 ;;   Free Software Foundation, Inc.
@@ -59,7 +59,7 @@
 	    (nonascii-translation . iso-8859-8)
 	    (input-method . "hebrew")
 	    (unibyte-display . hebrew-iso-8bit)
-	    (sample-text . "Hebrew	,Hylem(B")
+	    (sample-text . "Hebrew	שלום")
 	    (documentation . "Bidirectional editing is supported.")))
 
 (set-language-info-alist
@@ -85,33 +85,167 @@
   :mime-charset 'cp862)
 (define-coding-system-alias 'ibm862 'cp862)
 
-;; Composition function for hebrew.
+;; Return a nested alist of Hebrew character sequences vs the
+;; corresponding glyph of FONT-OBJECT.
+(defun hebrew-font-get-precomposed (font-object)
+  (let ((precomposed (font-get font-object 'hebrew-precomposed))
+	;; Vector of Hebrew precomposed charaters.
+	(chars [#xFB2A #xFB2B #xFB2C #xFB2D #xFB2E #xFB2F #xFB30 #xFB31
+		#xFB32 #xFB33 #xFB34 #xFB35 #xFB36 #xFB38 #xFB39 #xFB3A
+		#xFB3B #xFB3C #xFB3E #xFB40 #xFB41 #xFB43 #xFB44 #xFB46
+		#xFB47 #xFB48 #xFB49 #xFB4A #xFB4B #xFB4C #xFB4D #xFB4E])
+	;; Vector of decomposition character sequences corresponding
+	;; to the above vector.
+	(decomposed 
+	 [[#x05E9 #x05C1]
+	  [#x05E9 #x05C2]
+	  [#x05E9 #x05BC #x05C1]
+	  [#x05E9 #x05BC #x05C2]
+	  [#x05D0 #x05B7]
+	  [#x05D0 #x05B8]
+	  [#x05D0 #x05BC]
+	  [#x05D1 #x05BC]
+	  [#x05D2 #x05BC]
+	  [#x05D3 #x05BC]
+	  [#x05D4 #x05BC]
+	  [#x05D5 #x05BC]
+	  [#x05D6 #x05BC]
+	  [#x05D8 #x05BC]
+	  [#x05D9 #x05BC]
+	  [#x05DA #x05BC]
+	  [#x05DB #x05BC]
+	  [#x05DC #x05BC]
+	  [#x05DE #x05BC]
+	  [#x05E0 #x05BC]
+	  [#x05E1 #x05BC]
+	  [#x05E3 #x05BC]
+	  [#x05E4 #x05BC]
+	  [#x05E6 #x05BC]
+	  [#x05E7 #x05BC]
+	  [#x05E8 #x05BC]
+	  [#x05E9 #x05BC]
+	  [#x05EA #x05BC]
+	  [#x05D5 #x05B9]
+	  [#x05D1 #x05BF]
+	  [#x05DB #x05BF]
+	  [#x05E4 #x05BF]]))
+    (unless precomposed
+      (setq precomposed (list t))
+      (let ((gvec (font-get-glyphs font-object 0 (length chars) chars)))
+	(dotimes (i (length chars))
+	  (if (aref gvec i)
+	      (set-nested-alist (aref decomposed i) (aref gvec i)
+				precomposed))))
+      ;; Cache the result in FONT-OBJECT's property.
+      (font-put font-object 'hebrew-precomposed precomposed))
+    precomposed))
+
+;; Composition function for hebrew.  GSTRING is made of a Hebrew base
+;; character followed by Hebrew diacritical marks, or is made of
+;; single Hebrew diacritical mark.  Adjust GSTRING to display that
+;; sequence properly.  The basic strategy is:
+;;
+;; (1) If there's single diacritical, add padding space to the left
+;; and right of the glyph.
+;;
+;; (2) If the font has OpenType features for Hebrew, ask the OTF
+;; driver the whole work.
+;;
+;; (3) If the font has precomposed glyphs, use them as far as
+;; possible.  Adjust the remaining glyphs artificially.
+
 (defun hebrew-shape-gstring (gstring)
-  (setq gstring (font-shape-gstring gstring))
-  (let ((header (lgstring-header gstring))
-	(nchars (lgstring-char-len gstring))
-	(nglyphs (lgstring-glyph-len gstring))
-	(base-width (lglyph-width (lgstring-glyph gstring 0))))
-    (while (and (> nglyphs 1)
-		(not (lgstring-glyph gstring (1- nglyphs))))
-      (setq nglyphs (1- nglyphs)))
-    (while (> nglyphs 1)
-      (setq nglyphs (1- nglyphs))
-      (let* ((glyph (lgstring-glyph gstring nglyphs))
-	     (adjust (and glyph (lglyph-adjustment glyph))))
-	(if adjust
-	    (setq nglyphs 0)
-	  (if (>= (lglyph-lbearing glyph) 0)
-	      (lglyph-set-adjustment glyph (- base-width) 0 0))))))
-  gstring)
+  (let* ((font (lgstring-font gstring))
+	 (otf (font-get font :otf))
+	 (nchars (lgstring-char-len gstring))
+	 header nglyphs base-width glyph precomposed val idx)
+    (cond
+     ((= nchars 1)
+      ;; Independent diacritical mark.  Add padding space to left or
+      ;; right so that the glyph doesn't overlap with the surrounding
+      ;; chars.
+      (setq glyph (lgstring-glyph gstring 0))
+      (let ((width (lglyph-width glyph))
+	    bearing)
+	(if (< (setq bearing (lglyph-lbearing glyph)) 0)
+	    (lglyph-set-adjustment glyph bearing 0 (- width bearing)))
+	(if (> (setq bearing (lglyph-rbearing glyph)) width)
+	    (lglyph-set-adjustment glyph 0 0 bearing))))
+
+     ((or (assq 'hebr (car otf)) (assq 'hebr (cdr otf)))
+      ;; FONT has OpenType features for Hebrew.
+      (font-shape-gstring gstring))
+
+     (t
+      ;; FONT doesn't have OpenType features for Hebrew.
+      ;; Try a precomposed glyph.
+      ;; Now GSTRING is in this form:
+      ;;   [[FONT CHAR1 CHAR2 ... CHARn] nil GLYPH1 GLYPH2 ... GLYPHn nil ...]
+      (setq precomposed (hebrew-font-get-precomposed font)
+	    header (lgstring-header gstring)
+	    val (lookup-nested-alist header precomposed nil 1))
+      (if (and (consp val) (vectorp (car val)))
+	  ;; All characters can be displayed by a single precomposed glyph.
+	  ;; Reform GSTRING to [HEADER nil PRECOMPOSED-GLYPH nil ...]
+	  (let ((glyph (copy-sequence (car val))))
+	    (lglyph-set-from-to glyph 0 (1- nchars))
+	    (lgstring-set-glyph gstring 0 glyph)
+	    (lgstring-set-glyph gstring 1 nil))
+	(if (and (integerp val) (> val 2)
+		 (setq glyph (lookup-nested-alist header precomposed val 1))
+		 (consp glyph) (vectorp (car glyph)))
+	    ;; The first (1- VAL) characters can be displayed by a
+	    ;; precomposed glyph.  Provided that VAL is 3, the first
+	    ;; two glyphs should be replaced by the precomposed glyph.
+	    ;; In that case, reform GSTRING to:
+	    ;;   [HEADER nil PRECOMPOSED-GLYPH GLYPH3 ... GLYPHn nil ...]
+	    (let* ((ncmp (1- val))	; number of composed glyphs
+		   (diff (1- ncmp)))	; number of reduced glyphs
+	      (setq glyph (copy-sequence (car glyph)))
+	      (lglyph-set-from-to glyph 0 (1- nchars))
+	      (lgstring-set-glyph gstring 0 glyph)
+	      (setq idx ncmp)
+	      (while (< idx nchars)
+		(setq glyph (lgstring-glyph gstring idx))
+		(lglyph-set-from-to glyph 0 (1- nchars))
+		(lgstring-set-glyph gstring (- idx diff) glyph)
+		(setq idx (1+ idx)))
+	      (lgstring-set-glyph gstring (- idx diff) nil)
+	      (setq idx (- ncmp diff)
+		    nglyphs (- nchars diff)))
+	  (setq glyph (lgstring-glyph gstring 0))
+	  (lglyph-set-from-to glyph 0 (1- nchars))
+	  (setq idx 1 nglyphs nchars))
+	;; Now IDX is an index to the first non-precomposed glyph.
+	;; Adjust positions of the remaining glyphs artificially.
+	(setq base-width (lglyph-width (lgstring-glyph gstring 0)))
+	(while (< idx nglyphs)
+	  (setq glyph (lgstring-glyph gstring idx))
+	  (lglyph-set-from-to glyph 0 (1- nchars))
+	  (if (>= (lglyph-lbearing glyph) (lglyph-width glyph))
+	      ;; It seems that this glyph is designed to be rendered
+	      ;; before the base glyph.
+	      (lglyph-set-adjustment glyph (- base-width) 0 0)
+	    (if (>= (lglyph-lbearing glyph) 0)
+		;; Align the horizontal center of this glyph to the
+		;; horizontal center of the base glyph.
+		(let ((width (- (lglyph-rbearing glyph)
+				(lglyph-lbearing glyph))))
+		  (lglyph-set-adjustment glyph
+					 (- (/ (- base-width width) 2)
+					    (lglyph-lbearing glyph)
+					    base-width) 0 0))))
+	  (setq idx (1+ idx))))))
+    gstring))
 
 (let ((pattern1 "[\u05D0-\u05F2][\u0591-\u05BF\u05C1-\u05C5\u05C7]+")
       (pattern2 "[\u05D0-\u05F2]\u200D[\u0591-\u05BF\u05C1-\u05C5\u05C7]+"))
   (set-char-table-range
    composition-function-table '(#x591 . #x5C7)
-   (list (vector pattern2 2 'hebrew-shape-gstring)
+   (list (vector pattern2 3 'hebrew-shape-gstring)
+	 (vector pattern2 2 'hebrew-shape-gstring)
 	 (vector pattern1 1 'hebrew-shape-gstring)
-	 ["[\u0591-\u05C7]" 0 font-shape-gstring]))
+	 [nil 0 hebrew-shape-gstring]))
   (set-char-table-range
    composition-function-table #x5C0 nil)
   (set-char-table-range
--- a/src/ChangeLog	Mon Jul 12 11:31:36 2010 +0900
+++ b/src/ChangeLog	Mon Jul 12 14:26:57 2010 +0900
@@ -1,5 +1,8 @@
 2010-07-12  Kenichi Handa  <handa@m17n.org>
 
+	* Makefile.in (lisp): Change hebrew.el to hebrew.elc.
+	(shortlisp): Likewise.
+
 	* font.h (enum font_property_index): New member FONT_ENTITY_INDEX.
 
 	* font.c (font_open_entity): Record ENTITY in FONT_OBJECT's slot
--- a/src/Makefile.in	Mon Jul 12 11:31:36 2010 +0900
+++ b/src/Makefile.in	Mon Jul 12 14:26:57 2010 +0900
@@ -426,7 +426,7 @@
 	${lispsource}language/slovak.el \
 	${lispsource}language/romanian.el \
 	${lispsource}language/greek.el \
-	${lispsource}language/hebrew.el \
+	${lispsource}language/hebrew.elc \
 	${lispsource}language/japanese.el \
 	${lispsource}language/korean.el \
 	${lispsource}language/lao.el \
@@ -517,7 +517,7 @@
 	../lisp/language/slovak.el \
 	../lisp/language/romanian.el \
 	../lisp/language/greek.el \
-	../lisp/language/hebrew.el \
+	../lisp/language/hebrew.elc \
 	../lisp/language/japanese.el \
 	../lisp/language/korean.el \
 	../lisp/language/lao.el \