Mercurial > emacs
changeset 89094:4ff0e9aff188
(ucs-devanagari-to-is13194-alist)
(indian-glyph-char, indian-char-glyph): Deleted.
(is13194-default-repertory): Renamed from
is13194-default-repartory,
(iscii-to-ucs-region): Hoist evals from loop.
author | Dave Love <fx@gnu.org> |
---|---|
date | Sun, 08 Sep 2002 20:31:05 +0000 |
parents | 9556f0c558d6 |
children | 0a7fbcb7bda0 |
files | lisp/language/ind-util.el |
diffstat | 1 files changed, 81 insertions(+), 247 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/language/ind-util.el Sun Sep 08 19:49:54 2002 +0000 +++ b/lisp/language/ind-util.el Sun Sep 08 20:31:05 2002 +0000 @@ -44,7 +44,7 @@ "Returns the regular expression of hashtable keys." (let ((max-specpdl-size 1000)) (regexp-opt - (sort + (sort (let (dummy) (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) @@ -59,15 +59,15 @@ (?$,15Q(B ?$,16)(B) (?$,15R(B ?$,16*(B) (?$,15S(B ?$,16+(B) (?$,15T(B ?$,16,(B) (?$,16@(B ?$,16B(B) (?$,16A(B ?$,16C(B)) (;; CONSONANTS (currently 42, including special cases) ?$,15U(B ?$,15V(B ?$,15W(B ?$,15X(B ?$,15Y(B ;; GUTTRULS - ?$,15Z(B ?$,15[(B ?$,15\(B ?$,15](B ?$,15^(B ;; PALATALS - ?$,15_(B ?$,15`(B ?$,15a(B ?$,15b(B ?$,15c(B ;; CEREBRALS - ?$,15d(B ?$,15e(B ?$,15f(B ?$,15g(B ?$,15h(B ?$,15i(B ;; DENTALS - ?$,15j(B ?$,15k(B ?$,15l(B ?$,15m(B ?$,15n(B ;; LABIALS + ?$,15Z(B ?$,15[(B ?$,15\(B ?$,15](B ?$,15^(B ;; PALATALS + ?$,15_(B ?$,15`(B ?$,15a(B ?$,15b(B ?$,15c(B ;; CEREBRALS + ?$,15d(B ?$,15e(B ?$,15f(B ?$,15g(B ?$,15h(B ?$,15i(B ;; DENTALS + ?$,15j(B ?$,15k(B ?$,15l(B ?$,15m(B ?$,15n(B ;; LABIALS ?$,15o(B ?$,15p(B ?$,15q(B ?$,15r(B ?$,15s(B ?$,15t(B ?$,15u(B ;; SEMIVOWELS - ?$,15v(B ?$,15w(B ?$,15x(B ?$,15y(B ;; SIBILANTS - ?$,168(B ?$,169(B ?$,16:(B ?$,16;(B ?$,16<(B ?$,16=(B ?$,16>(B ?$,16?(B ;; NUKTAS + ?$,15v(B ?$,15w(B ?$,15x(B ?$,15y(B ;; SIBILANTS + ?$,168(B ?$,169(B ?$,16:(B ?$,16;(B ?$,16<(B ?$,16=(B ?$,16>(B ?$,16?(B ;; NUKTAS "$,15\6-5^(B" "$,15U6-5w(B") - (;; Misc Symbols (7) + (;; Misc Symbols (7) ?$,15A(B ?$,15B(B ?$,15C(B ?$,15}(B ?$,16-(B ?$,160(B ?$,16D(B) (;; Digits (10) ?$,16F(B ?$,16G(B ?$,16H(B ?$,16I(B ?$,16J(B ?$,16K(B ?$,16L(B ?$,16M(B ?$,16N(B ?$,16O(B) @@ -85,7 +85,7 @@ (defvar indian-base-table-to-language-alist '((indian-dev-base-table . "Devanagari") - (indian-pnj-base-table . "Punjabi") + (indian-pnj-base-table . "Punjabi") (indian-ori-base-table . "Oriya") (indian-bng-base-table . "Bengali") (indian-asm-base-table . "Assamese") @@ -100,11 +100,11 @@ "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U") ("RRi" "R^i") ("LLi" "L^i") (".c" "e.c") nil "e" "ai" "o.c" nil "o" "au" ("RRI" "R^I") ("LLI" "L^I")) - (;; consonants -- 40 + (;; consonants -- 40 "k" "kh" "g" "gh" ("~N" "N^") "ch" ("Ch" "chh") "j" "jh" ("~n" "JN") "T" "Th" "D" "Dh" "N" - "t" "th" "d" "dh" "n" "nh" + "t" "th" "d" "dh" "n" "nh" "p" "ph" "b" "bh" "m" "y" "r" "rh" "l" ("L" "ld") nil ("v" "w") "sh" ("Sh" "shh") "s" "h" @@ -196,12 +196,12 @@ Thus, if SEQ1 contains 3 elements and SEQ2 contains 5 elements, then FUNCTION will be called 15 times." (if seqrest - (mapcar + (mapcar (lambda (x) - (apply - 'mapthread - `(lambda (&rest y) (apply ',function x y)) - seqrest)) + (apply + 'mapthread + `(lambda (&rest y) (apply ',function x y)) + seqrest)) seq1) (mapcar function seq1))) @@ -225,7 +225,7 @@ (funcall f (pop l1) (pop l2)))) (defun indian--puthash-v (v trans-v hashtbls) - (indian--map + (indian--map (lambda (v trans-v) (indian--puthash-char (car v) trans-v hashtbls)) v trans-v)) @@ -253,7 +253,7 @@ (setq v (if (characterp (cadr v)) (char-to-string (cadr v)) "")) (if (stringp trans-c) (setq trans-c (list trans-c))) (if (stringp trans-v) (setq trans-v (list trans-v))) - (indian--puthash-char + (indian--puthash-char (concat c v) (apply 'append (mapthread 'concat trans-c trans-v)) @@ -277,7 +277,7 @@ (trans-digits '("0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))) (indian--puthash-v vowels trans-vowels hashtbls) (indian--puthash-c consonants trans-consonants halant hashtbls) - (indian--puthash-cv consonants trans-consonants + (indian--puthash-cv consonants trans-consonants vowels trans-vowels hashtbls) (indian--puthash-m misc trans-misc hashtbls) (indian--puthash-m digits trans-digits hashtbls) @@ -298,13 +298,13 @@ (defmacro indian-translate-region (from to hashtable encode-p) `(save-excursion (save-restriction - (let ((regexp ,(indian-regexp-of-hashtbl-keys - (if encode-p (car (eval hashtable)) + (let ((regexp ,(indian-regexp-of-hashtbl-keys + (if encode-p (car (eval hashtable)) (cdr (eval hashtable)))))) (narrow-to-region from to) (goto-char (point-min)) (while (re-search-forward regexp nil t) - (let ((matchstr (gethash (match-string 0) + (let ((matchstr (gethash (match-string 0) (if ,encode-p (car ,hashtable) (cdr ,hashtable))))) @@ -314,7 +314,7 @@ (defun indian-dev-itrans-v5-encode-region (from to) (interactive "r") - (indian-translate-region + (indian-translate-region from to indian-dev-itrans-v5-hash t)) (defun indian-dev-itrans-v5-decode-region (from to) @@ -324,7 +324,7 @@ (defun indian-dev-kyoto-harvard-encode-region (from to) (interactive "r") - (indian-translate-region + (indian-translate-region from to indian-dev-kyoto-harvard-hash t)) (defun indian-dev-kyoto-harvard-decode-region (from to) @@ -334,7 +334,7 @@ (defun indian-dev-aiba-encode-region (from to) (interactive "r") - (indian-translate-region + (indian-translate-region from to indian-dev-aiba-hash t)) (defun indian-dev-aiba-decode-region (from to) @@ -347,138 +347,7 @@ ;;; IS 13194 utilities -;; The followings provide conversion between IS 13194 (ISCII) and UCS. - -(defvar ucs-devanagari-to-is13194-alist - '(;;Unicode vs IS13194 ;; only Devanagari is supported now. - (?\x0900 . "[U+0900]") - (?\x0901 . "(5!(B") - (?\x0902 . "(5"(B") - (?\x0903 . "(5#(B") - (?\x0904 . "[U+0904]") - (?\x0905 . "(5$(B") - (?\x0906 . "(5%(B") - (?\x0907 . "(5&(B") - (?\x0908 . "(5'(B") - (?\x0909 . "(5((B") - (?\x090a . "(5)(B") - (?\x090b . "(5*(B") - (?\x090c . "(5&i(B") - (?\x090d . "(5.(B") - (?\x090e . "(5+(B") - (?\x090f . "(5,(B") - (?\x0910 . "(5-(B") - (?\x0911 . "(52(B") - (?\x0912 . "(5/(B") - (?\x0913 . "(50(B") - (?\x0914 . "(51(B") - (?\x0915 . "(53(B") - (?\x0916 . "(54(B") - (?\x0917 . "(55(B") - (?\x0918 . "(56(B") - (?\x0919 . "(57(B") - (?\x091a . "(58(B") - (?\x091b . "(59(B") - (?\x091c . "(5:(B") - (?\x091d . "(5;(B") - (?\x091e . "(5<(B") - (?\x091f . "(5=(B") - (?\x0920 . "(5>(B") - (?\x0921 . "(5?(B") - (?\x0922 . "(5@(B") - (?\x0923 . "(5A(B") - (?\x0924 . "(5B(B") - (?\x0925 . "(5C(B") - (?\x0926 . "(5D(B") - (?\x0927 . "(5E(B") - (?\x0928 . "(5F(B") - (?\x0929 . "(5G(B") - (?\x092a . "(5H(B") - (?\x092b . "(5I(B") - (?\x092c . "(5J(B") - (?\x092d . "(5K(B") - (?\x092e . "(5L(B") - (?\x092f . "(5M(B") - (?\x0930 . "(5O(B") - (?\x0931 . "(5P(B") - (?\x0932 . "(5Q(B") - (?\x0933 . "(5R(B") - (?\x0934 . "(5S(B") - (?\x0935 . "(5T(B") - (?\x0936 . "(5U(B") - (?\x0937 . "(5V(B") - (?\x0938 . "(5W(B") - (?\x0939 . "(5X(B") - (?\x093a . "[U+093a]") - (?\x093b . "[U+093b]") - (?\x093c . "(5i(B") - (?\x093d . "(5ji(B") - (?\x093e . "(5Z(B") - (?\x093f . "(5[(B") - (?\x0940 . "(5\(B") - (?\x0941 . "(5](B") - (?\x0942 . "(5^(B") - (?\x0943 . "(5_(B") - (?\x0944 . "(5_i(B") - (?\x0945 . "(5c(B") - (?\x0946 . "(5`(B") - (?\x0947 . "(5a(B") - (?\x0948 . "(5b(B") - (?\x0949 . "(5g(B") - (?\x094a . "(5d(B") - (?\x094b . "(5e(B") - (?\x094c . "(5f(B") - (?\x094d . "(5h(B") - (?\x094e . "[U+094e]") - (?\x094f . "[U+094f]") - (?\x0950 . "(5!i(B") - (?\x0951 . "(5p5(B") - (?\x0952 . "(5p8(B") - (?\x0953 . "[DEVANAGARI GRAVE ACCENT]") - (?\x0954 . "[DEVANAGARI ACUTE ACCENT]") - (?\x0955 . "[U+0955]") - (?\x0956 . "[U+0956]") - (?\x0957 . "[U+0957]") - (?\x0958 . "(53i(B") - (?\x0959 . "(54i(B") - (?\x095a . "(55i(B") - (?\x095b . "(5:i(B") - (?\x095c . "(5?i(B") - (?\x095d . "(5@i(B") - (?\x095e . "(5Ii(B") - (?\x095f . "(5N(B") - (?\x0960 . "(5*i(B") - (?\x0961 . "(5'i(B") - (?\x0962 . "(5[i(B") - (?\x0963 . "(5ei(B") - (?\x0964 . "(5j(B") - (?\x0965 . "(5jj(B") - (?\x0966 . "(5q(B") - (?\x0967 . "(5r(B") - (?\x0968 . "(5s(B") - (?\x0969 . "(5t(B") - (?\x096a . "(5u(B") - (?\x096b . "(5v(B") - (?\x096c . "(5w(B") - (?\x096d . "(5x(B") - (?\x096e . "(5y(B") - (?\x096f . "(5z(B") - (?\x0970 . "[U+0970]") - (?\x0971 . "[U+0971]") - (?\x0972 . "[U+0972]") - (?\x0973 . "[U+0973]") - (?\x0974 . "[U+0974]") - (?\x0975 . "[U+0975]") - (?\x0976 . "[U+0976]") - (?\x0977 . "[U+0977]") - (?\x0978 . "[U+0978]") - (?\x0979 . "[U+0979]") - (?\x097a . "[U+097a]") - (?\x097b . "[U+097b]") - (?\x097c . "[U+097c]") - (?\x097d . "[U+097d]") - (?\x097e . "[U+097e]") - (?\x097f . "[U+097f]"))) +;; The following provide conversion between IS 13194 (ISCII) and UCS. (defvar ucs-bengali-to-is13194-alist nil) (defvar ucs-assamese-to-is13194-alist nil) @@ -489,11 +358,11 @@ (defvar ucs-telugu-to-is13194-alist nil) (defvar ucs-malayalam-to-is13194-alist nil) -(defvar is13194-default-repartory 'devanagari) +(defvar is13194-default-repertory 'devanagari) (defvar is13194-repertory-to-ucs-script - `((DEF ?\x40 ,is13194-default-repartory) - (RMN ?\x41 ,is13194-default-repartory) + `((DEF ?\x40 ,is13194-default-repertory) + (RMN ?\x41 ,is13194-default-repertory) (DEV ?\x42 devanagari) (BNG ?\x43 bengali) (TML ?\x44 tamil) @@ -525,21 +394,21 @@ (defvar is13194-to-ucs-malayalam-hashtbl nil) (defvar is13194-to-ucs-malayalam-regexp nil) -(mapc - (function (lambda (script) - (let ((hashtable (intern (concat "is13194-to-ucs-" - (symbol-name script) "-hashtbl" ))) - (regexp (intern (concat "is13194-to-ucs-" - (symbol-name script) "-regexp")))) +(mapc + (function (lambda (script) + (let ((hashtable (intern (concat "is13194-to-ucs-" + (symbol-name script) "-hashtbl" ))) + (regexp (intern (concat "is13194-to-ucs-" + (symbol-name script) "-regexp")))) (set hashtable (make-hash-table :test 'equal :size 128)) (mapc (function (lambda (x) - (put-char-code-property (car x) 'script script) - (put-char-code-property (car x) 'iscii (cdr x)) - (puthash (cdr x) (char-to-string (car x)) - (eval hashtable)))) + (put-char-code-property (car x) 'script script) + (put-char-code-property (car x) 'iscii (cdr x)) + (puthash (cdr x) (char-to-string (car x)) + (eval hashtable)))) (eval (intern (concat "ucs-" (symbol-name script) - "-to-is13194-alist")))) + "-to-is13194-alist")))) (set regexp (indian-regexp-of-hashtbl-keys (eval hashtable)))))) '(devanagari bengali assamese gurmukhi gujarati oriya tamil telugu malayalam)) @@ -547,11 +416,11 @@ (defvar ucs-to-is13194-regexp ;; only Devanagari is supported now. (concat "[" (char-to-string #x0900) - "-" (char-to-string #x097f) "]") + "-" (char-to-string #x097f) "]") "Regexp that matches to conversion") (defun ucs-to-iscii-region (from to) - "Converts the indian UCS characters in the region to ISCII. + "Converts the indian UCS characters in the region to ISCII. Returns new end position." (interactive "r") ;; only Devanagari is supported now. @@ -559,15 +428,15 @@ (save-restriction (narrow-to-region from to) (goto-char (point-min)) - (let* ((current-repertory is13194-default-repartory)) - (while (re-search-forward ucs-to-is13194-regexp nil t) - (replace-match - (get-char-code-property (string-to-char (match-string 0)) - 'iscii)))) + (let* ((current-repertory is13194-default-repertory)) + (while (re-search-forward ucs-to-is13194-regexp nil t) + (replace-match + (get-char-code-property (string-to-char (match-string 0)) + 'iscii)))) (point-max)))) (defun iscii-to-ucs-region (from to) - "Converts the ISCII characters in the region to UCS. + "Converts the ISCII characters in the region to UCS. Returns new end position." (interactive "r") ;; only Devanagari is supported now. @@ -575,16 +444,17 @@ (save-restriction (narrow-to-region from to) (goto-char (point-min)) - (let* ((current-repertory is13194-default-repartory) - (current-hashtable - (intern (concat "is13194-to-ucs-" - (symbol-name current-repertory) "-hashtbl"))) - (current-regexp - (intern (concat "is13194-to-ucs-" - (symbol-name current-repertory) "-regexp")))) - (while (re-search-forward (eval current-regexp) nil t) - (replace-match - (gethash (match-string 0) (eval current-hashtable) "")))) + (let* ((current-repertory is13194-default-repertory) + (current-hashtable + (intern (concat "is13194-to-ucs-" + (symbol-name current-repertory) "-hashtbl"))) + (current-regexp + (intern (concat "is13194-to-ucs-" + (symbol-name current-repertory) "-regexp"))) + (re (eval current-regexp)) + (hahsh (eval current-hashtable))) + (while (re-search-forward re nil t) + (replace-match (gethash (match-string 0) hash "")))) (point-max)))) ;;;###autoload @@ -594,14 +464,14 @@ (save-excursion (save-restriction (let ((pos from) chars (max to)) - (narrow-to-region from to) - (while (< pos max) - (setq chars (compose-chars-after pos)) - (if chars (setq pos (+ pos chars)) (setq pos (1+ pos)))))))) + (narrow-to-region from to) + (while (< pos max) + (setq chars (compose-chars-after pos)) + (if chars (setq pos (+ pos chars)) (setq pos (1+ pos)))))))) ;;;###autoload (defun indian-compose-string (string) - (with-temp-buffer + (with-temp-buffer (insert string) (indian-compose-region (point-min) (point-max)) (buffer-string))) @@ -628,7 +498,7 @@ ;;; Backward Compatibility support programs -;; The followings provides the conversion from old-implementation of +;; The following provides the conversion from old-implementation of ;; Emacs Devanagari script to UCS. (defconst indian-2-colum-to-ucs @@ -964,11 +834,11 @@ (put 'indian-2-column-to-ucs-chartable 'char-table-extra-slots 1) (defconst indian-2-column-to-ucs-chartable (let ((table (make-char-table 'indian-2-column-to-ucs-chartable)) - (alist nil)) + (alist nil)) (dolist (elt indian-2-colum-to-ucs) (if (= (length (car elt)) 1) - (aset table (aref (car elt) 0) (cdr elt)) - (setq alist (cons elt alist)))) + (aset table (aref (car elt) 0) (cdr elt)) + (setq alist (cons elt alist)))) (set-char-table-extra-slot table 0 alist) table)) @@ -978,56 +848,20 @@ (save-excursion (save-restriction (let ((pos from) - (alist (char-table-extra-slot indian-2-column-to-ucs-chartable 0))) - (narrow-to-region from to) - (decompose-region from to) - (goto-char (point-min)) - (while (re-search-forward indian-2-column-to-ucs-regexp nil t) - (let ((len (- (match-end 0) (match-beginning 0))) - subst) - (if (= len 1) - (setq subst (aref indian-2-column-to-ucs-chartable + (alist (char-table-extra-slot indian-2-column-to-ucs-chartable 0))) + (narrow-to-region from to) + (decompose-region from to) + (goto-char (point-min)) + (while (re-search-forward indian-2-column-to-ucs-regexp nil t) + (let ((len (- (match-end 0) (match-beginning 0))) + subst) + (if (= len 1) + (setq subst (aref indian-2-column-to-ucs-chartable (char-after (match-beginning 0)))) - (setq subst (assoc (match-string 0) alist))) - (replace-match (if subst subst "?")))) - (indian-compose-region (point-min) (point-max)))))) + (setq subst (assoc (match-string 0) alist))) + (replace-match (if subst subst "?")))) + (indian-compose-region (point-min) (point-max)))))) -;;;###autoload -(defun indian-glyph-char (index &optional script) - "Return character of charset `indian-glyph' made from glyph index INDEX. -The variable `indian-default-script' specifies the script of the glyph. -Optional argument SCRIPT, if non-nil, overrides `indian-default-script'. -See also the function `indian-char-glyph'." - (or script - (setq script indian-default-script)) - (let ((offset (get script 'indian-glyph-code-offset))) - (or (integerp offset) - (error "Invalid script name: %s" script)) - (or (and (>= index 0) (< index 256)) - (error "Invalid glyph index: %d" index)) - (setq index (+ offset index)) - (make-char 'indian-glyph (+ (/ index 96) 32) (+ (% index 96) 32)))) +(provide 'ind-util) -(defvar indian-glyph-max-char - (indian-glyph-char - 255 (aref indian-script-table (1- (length indian-script-table)))) - "The maximum valid code of characters in the charset `indian-glyph'.") - -;;;###autoload -(defun indian-char-glyph (char) - "Return information about the glyph code for CHAR of `indian-glyph' charset. -The value is (INDEX . SCRIPT), where INDEX is the glyph index -in the font that Indian script name SCRIPT specifies. -See also the function `indian-glyph-char'." - (let ((split (split-char char)) - code) - (or (eq (car split) 'indian-glyph) - (error "Charset of `%c' is not indian-glyph" char)) - (or (<= char indian-glyph-max-char) - (error "Invalid indian-glyph char: %d" char)) - (setq code (+ (* (- (nth 1 split) 32) 96) (nth 2 split) -32)) - (cons (% code 256) (aref indian-script-table (/ code 256))))) - -(provide 'ind-util) - ;;; ind-util.el ends here