view lisp/language/hebrew.el @ 109354:09daf1538316

Improve Hebrew rendering.
author Kenichi Handa <handa@etlken>
date Mon, 12 Jul 2010 14:25:46 +0900
parents 023730733576
children c18e82c381e4
line wrap: on
line source

;;; hebrew.el --- support for Hebrew -*- coding: utf-8 -*-

;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
;;   Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
;;   2005, 2006, 2007, 2008, 2009, 2010
;;   National Institute of Advanced Industrial Science and Technology (AIST)
;;   Registration Number H14PRO021

;; Copyright (C) 2003
;;   National Institute of Advanced Industrial Science and Technology (AIST)
;;   Registration Number H13PRO009

;; Keywords: multilingual, Hebrew

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

;;; Commentary:

;; For Hebrew, the character set ISO8859-8 is supported.
;; See http://www.ecma.ch/ecma1/STAND/ECMA-121.HTM.
;; Windows-1255 is also supported.

;;; Code:

(define-coding-system 'hebrew-iso-8bit
  "ISO 2022 based 8-bit encoding for Hebrew (MIME:ISO-8859-8)."
  :coding-type 'charset
  :mnemonic ?8
  :charset-list '(iso-8859-8)
  :mime-charset 'iso-8859-8)

(define-coding-system-alias 'iso-8859-8 'hebrew-iso-8bit)

;; These are for Explicit and Implicit directionality information, as
;; defined in RFC 1556.  We don't yet support directional information
;; in bidi languages, so these aliases are a lie, especially as far as
;; iso-8859-8-e is concerned.  FIXME.
(define-coding-system-alias 'iso-8859-8-e 'hebrew-iso-8bit)
(define-coding-system-alias 'iso-8859-8-i 'hebrew-iso-8bit)

(set-language-info-alist
 "Hebrew" '((charset iso-8859-8)
	    (coding-priority hebrew-iso-8bit)
	    (coding-system hebrew-iso-8bit windows-1255 cp862)
	    (nonascii-translation . iso-8859-8)
	    (input-method . "hebrew")
	    (unibyte-display . hebrew-iso-8bit)
	    (sample-text . "Hebrew	שלום")
	    (documentation . "Bidirectional editing is supported.")))

(set-language-info-alist
 "Windows-1255" '((coding-priority windows-1255)
		  (coding-system windows-1255)
		  (documentation . "\
Support for Windows-1255 encoding, e.g. for Yiddish.
Bidirectional editing is supported.")))

(define-coding-system 'windows-1255
  "windows-1255 (Hebrew) encoding (MIME: WINDOWS-1255)"
  :coding-type 'charset
  :mnemonic ?h
  :charset-list '(windows-1255)
  :mime-charset 'windows-1255)
(define-coding-system-alias 'cp1255 'windows-1255)

(define-coding-system 'cp862
  "DOS codepage 862 (Hebrew)"
  :coding-type 'charset
  :mnemonic ?D
  :charset-list '(cp862)
  :mime-charset 'cp862)
(define-coding-system-alias 'ibm862 'cp862)

;; Return a nested alist of Hebrew character sequences vs the
;; corresponding glyph of FONT-OBJECT.
(defun hebrew-font-get-precomposed (font-object)
  (let ((precomposed (font-get font-object 'hebrew-precomposed))
	;; Vector of Hebrew precomposed charaters.
	(chars [#xFB2A #xFB2B #xFB2C #xFB2D #xFB2E #xFB2F #xFB30 #xFB31
		#xFB32 #xFB33 #xFB34 #xFB35 #xFB36 #xFB38 #xFB39 #xFB3A
		#xFB3B #xFB3C #xFB3E #xFB40 #xFB41 #xFB43 #xFB44 #xFB46
		#xFB47 #xFB48 #xFB49 #xFB4A #xFB4B #xFB4C #xFB4D #xFB4E])
	;; Vector of decomposition character sequences corresponding
	;; to the above vector.
	(decomposed 
	 [[#x05E9 #x05C1]
	  [#x05E9 #x05C2]
	  [#x05E9 #x05BC #x05C1]
	  [#x05E9 #x05BC #x05C2]
	  [#x05D0 #x05B7]
	  [#x05D0 #x05B8]
	  [#x05D0 #x05BC]
	  [#x05D1 #x05BC]
	  [#x05D2 #x05BC]
	  [#x05D3 #x05BC]
	  [#x05D4 #x05BC]
	  [#x05D5 #x05BC]
	  [#x05D6 #x05BC]
	  [#x05D8 #x05BC]
	  [#x05D9 #x05BC]
	  [#x05DA #x05BC]
	  [#x05DB #x05BC]
	  [#x05DC #x05BC]
	  [#x05DE #x05BC]
	  [#x05E0 #x05BC]
	  [#x05E1 #x05BC]
	  [#x05E3 #x05BC]
	  [#x05E4 #x05BC]
	  [#x05E6 #x05BC]
	  [#x05E7 #x05BC]
	  [#x05E8 #x05BC]
	  [#x05E9 #x05BC]
	  [#x05EA #x05BC]
	  [#x05D5 #x05B9]
	  [#x05D1 #x05BF]
	  [#x05DB #x05BF]
	  [#x05E4 #x05BF]]))
    (unless precomposed
      (setq precomposed (list t))
      (let ((gvec (font-get-glyphs font-object 0 (length chars) chars)))
	(dotimes (i (length chars))
	  (if (aref gvec i)
	      (set-nested-alist (aref decomposed i) (aref gvec i)
				precomposed))))
      ;; Cache the result in FONT-OBJECT's property.
      (font-put font-object 'hebrew-precomposed precomposed))
    precomposed))

;; Composition function for hebrew.  GSTRING is made of a Hebrew base
;; character followed by Hebrew diacritical marks, or is made of
;; single Hebrew diacritical mark.  Adjust GSTRING to display that
;; sequence properly.  The basic strategy is:
;;
;; (1) If there's single diacritical, add padding space to the left
;; and right of the glyph.
;;
;; (2) If the font has OpenType features for Hebrew, ask the OTF
;; driver the whole work.
;;
;; (3) If the font has precomposed glyphs, use them as far as
;; possible.  Adjust the remaining glyphs artificially.

(defun hebrew-shape-gstring (gstring)
  (let* ((font (lgstring-font gstring))
	 (otf (font-get font :otf))
	 (nchars (lgstring-char-len gstring))
	 header nglyphs base-width glyph precomposed val idx)
    (cond
     ((= nchars 1)
      ;; Independent diacritical mark.  Add padding space to left or
      ;; right so that the glyph doesn't overlap with the surrounding
      ;; chars.
      (setq glyph (lgstring-glyph gstring 0))
      (let ((width (lglyph-width glyph))
	    bearing)
	(if (< (setq bearing (lglyph-lbearing glyph)) 0)
	    (lglyph-set-adjustment glyph bearing 0 (- width bearing)))
	(if (> (setq bearing (lglyph-rbearing glyph)) width)
	    (lglyph-set-adjustment glyph 0 0 bearing))))

     ((or (assq 'hebr (car otf)) (assq 'hebr (cdr otf)))
      ;; FONT has OpenType features for Hebrew.
      (font-shape-gstring gstring))

     (t
      ;; FONT doesn't have OpenType features for Hebrew.
      ;; Try a precomposed glyph.
      ;; Now GSTRING is in this form:
      ;;   [[FONT CHAR1 CHAR2 ... CHARn] nil GLYPH1 GLYPH2 ... GLYPHn nil ...]
      (setq precomposed (hebrew-font-get-precomposed font)
	    header (lgstring-header gstring)
	    val (lookup-nested-alist header precomposed nil 1))
      (if (and (consp val) (vectorp (car val)))
	  ;; All characters can be displayed by a single precomposed glyph.
	  ;; Reform GSTRING to [HEADER nil PRECOMPOSED-GLYPH nil ...]
	  (let ((glyph (copy-sequence (car val))))
	    (lglyph-set-from-to glyph 0 (1- nchars))
	    (lgstring-set-glyph gstring 0 glyph)
	    (lgstring-set-glyph gstring 1 nil))
	(if (and (integerp val) (> val 2)
		 (setq glyph (lookup-nested-alist header precomposed val 1))
		 (consp glyph) (vectorp (car glyph)))
	    ;; The first (1- VAL) characters can be displayed by a
	    ;; precomposed glyph.  Provided that VAL is 3, the first
	    ;; two glyphs should be replaced by the precomposed glyph.
	    ;; In that case, reform GSTRING to:
	    ;;   [HEADER nil PRECOMPOSED-GLYPH GLYPH3 ... GLYPHn nil ...]
	    (let* ((ncmp (1- val))	; number of composed glyphs
		   (diff (1- ncmp)))	; number of reduced glyphs
	      (setq glyph (copy-sequence (car glyph)))
	      (lglyph-set-from-to glyph 0 (1- nchars))
	      (lgstring-set-glyph gstring 0 glyph)
	      (setq idx ncmp)
	      (while (< idx nchars)
		(setq glyph (lgstring-glyph gstring idx))
		(lglyph-set-from-to glyph 0 (1- nchars))
		(lgstring-set-glyph gstring (- idx diff) glyph)
		(setq idx (1+ idx)))
	      (lgstring-set-glyph gstring (- idx diff) nil)
	      (setq idx (- ncmp diff)
		    nglyphs (- nchars diff)))
	  (setq glyph (lgstring-glyph gstring 0))
	  (lglyph-set-from-to glyph 0 (1- nchars))
	  (setq idx 1 nglyphs nchars))
	;; Now IDX is an index to the first non-precomposed glyph.
	;; Adjust positions of the remaining glyphs artificially.
	(setq base-width (lglyph-width (lgstring-glyph gstring 0)))
	(while (< idx nglyphs)
	  (setq glyph (lgstring-glyph gstring idx))
	  (lglyph-set-from-to glyph 0 (1- nchars))
	  (if (>= (lglyph-lbearing glyph) (lglyph-width glyph))
	      ;; It seems that this glyph is designed to be rendered
	      ;; before the base glyph.
	      (lglyph-set-adjustment glyph (- base-width) 0 0)
	    (if (>= (lglyph-lbearing glyph) 0)
		;; Align the horizontal center of this glyph to the
		;; horizontal center of the base glyph.
		(let ((width (- (lglyph-rbearing glyph)
				(lglyph-lbearing glyph))))
		  (lglyph-set-adjustment glyph
					 (- (/ (- base-width width) 2)
					    (lglyph-lbearing glyph)
					    base-width) 0 0))))
	  (setq idx (1+ idx))))))
    gstring))

(let ((pattern1 "[\u05D0-\u05F2][\u0591-\u05BF\u05C1-\u05C5\u05C7]+")
      (pattern2 "[\u05D0-\u05F2]\u200D[\u0591-\u05BF\u05C1-\u05C5\u05C7]+"))
  (set-char-table-range
   composition-function-table '(#x591 . #x5C7)
   (list (vector pattern2 3 'hebrew-shape-gstring)
	 (vector pattern2 2 'hebrew-shape-gstring)
	 (vector pattern1 1 'hebrew-shape-gstring)
	 [nil 0 hebrew-shape-gstring]))
  (set-char-table-range
   composition-function-table #x5C0 nil)
  (set-char-table-range
   composition-function-table #x5C6 nil))

(provide 'hebrew)

;; arch-tag: 3ca04f32-3f1e-498e-af46-8267498ba5d9
;;; hebrew.el ends here