# HG changeset patch # User Kenichi Handa # Date 1115692181 0 # Node ID 88ca83396d150982bac9418a6de4fa17422a432d # Parent 1d9e0c2958e692253834be8f014107472be9361e Typo fixed in comments. Change string-to-int to string-to-number. (unidata-text-file): Defined to .../unidata.txt. (unidata-list): Just insert unidata-text-file. (unidata-get-decomposition): Handle Hangul decomposition. (unidata-gen-files): Don't use \040, instead at ^L near the end of file. diff -r 1d9e0c2958e6 -r 88ca83396d15 admin/unidata/unidata-gen.el --- a/admin/unidata/unidata-gen.el Sat May 07 02:55:01 2005 +0000 +++ b/admin/unidata/unidata-gen.el Tue May 10 02:29:41 2005 +0000 @@ -22,23 +22,29 @@ ;;; Commentary: +;; SPECIAL NOTICE +;; +;; This file must be byte-compilable/loadable by `temacs' and also +;; the entry function `unidata-gen-files' must be runnable by +;; `temacs'. + ;; FILES TO BE GENERATED ;; -;; The entry function `unidata-gen-files' generated these filese in +;; The entry function `unidata-gen-files' generates these files in ;; the current directory. ;; ;; charprop.el ;; It contains a series of forms of this format: ;; (char-code-property-register PROP FILE) ;; where PROP is a symbol representing a character property -;; (name, geneirc-category, etc), and FILE is a name of one of +;; (name, generic-category, etc), and FILE is a name of one of ;; the following files. ;; ;; uni-name.el, uni-cat.el, uni-comb.el, uni-bidi.el ;; It contains a single form of this format: ;; (char-code-property-register PROP CHAR-TABLE) ;; where PROP is the same as above, and CHAR-TABLE is a -;; char-table containing property values in a comporessed format. +;; char-table containing property values in a compressed format. ;; ;; When they are installed in .../lisp/international/, the file ;; "charprop.el" is preloaded in loadup.el. The other files are @@ -55,7 +61,7 @@ ;; data in a char-table as below. ;; ;; If succeeding 128*N characters have the same property value, we -;; store that value for them. Otherwise, comporess values for +;; store that value for them. Otherwise, compress values for ;; succeeding 128 characters into a single string and store it as a ;; value for those characters. The way of compression depends on a ;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE", @@ -67,14 +73,10 @@ ;; 3nd: function to call to put a property value ;; 4th: function to call to get a description of a property value ;; 5th: data referred by the above functions -;; -;; The actual -;; For more detail, see the comments in the section "SIMPLE TABLE" -;; and "NAME TABLE". ;; The name of the file UnicodeData.txt. (defconst unidata-text-file - (expand-file-name "admin/unidata/UnicodeData.txt" source-directory)) + (expand-file-name "admin/unidata/unidata.txt" source-directory)) ;; List of elements of this form: ;; (CHAR-or-RANGE PROP1 PROP2 ... PROPn) @@ -92,9 +94,7 @@ (or (file-readable-p unidata-text-file) (error "File not readable: %s" unidata-text-file)) (with-temp-buffer - (call-process "sed" unidata-text-file t nil - "-e" "s/\\([^;]*\\);\\(.*\\)/(#x\\1 \\\"\\2\\\")/" - "-e" "s/;/\\\" \\\"/g") + (insert-file-contents unidata-text-file) (goto-char (point-min)) (condition-case nil (while t @@ -166,7 +166,7 @@ 5 unidata-gen-table-decomposition "uni-decomposition.el" "Unicode decomposition mapping. Property value is a list of characters. The first element may be -one of these symbols representing compatiblity formatting tag: +one of these symbols representing compatibility formatting tag: , , , , , , , , , , , , , , , " @@ -231,7 +231,7 @@ ;; ;; The first character of the string is FIRST-INDEX. ;; The Nth (N > 0) character of the string is a property value of the -;; character (BLOCk-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is +;; character (BLOCK-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is ;; the first of the characters in the block. ;; ;; The 4th extra slot of a char-table is nil. @@ -763,9 +763,23 @@ (aset vec idx (nconc word-list tail-list))) (dotimes (i 128) (aset table (+ first-char i) (aref vec i))) - (aref vec (- char first-char))))))) + (aref vec (- char first-char))))) -;; Store VAL as the name of CHAR in TABLE. + ;; Hangul syllable + ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3)) + ;; SIndex = S (char) - SBase (#xAC00) + (setq char (- char #xAC00)) + (let (;; L = LBase + SIndex / NCount + (L (+ #x1100 (/ char 588))) + ;; V = VBase + (SIndex % NCount) * TCount + (V (+ #x1161 (/ (% char 588) 28))) + ;; T = TBase + SIndex % TCount + (T (+ #x11A7 (% char 28)))) + (list L V T))) + + )) + +;; Store VAL as the decomposition information of CHAR in TABLE. (defun unidata-put-decomposition (char val table) (let ((current-val (aref table char))) @@ -871,7 +885,7 @@ (if (and (eq prop 'decomposition) (> idx 32)) - (error "Too many symobls in decomposition data")) + (error "Too many symbols in decomposition data")) (dotimes (i (/ #x110000 128)) (let* ((idx (* i 128)) @@ -956,11 +970,11 @@ (if (= c 32) (setq l (if (= (aref str idx) ?<) (cons (intern (substring str idx i)) l) - (cons (string-to-int (substring str idx i) 16) l)) + (cons (string-to-number (substring str idx i) 16) l)) idx (1+ i)))) (if (= (aref str idx) ?<) (setq l (cons (intern (substring str idx len)) l)) - (setq l (cons (string-to-int (substring str idx len) 16) l))) + (setq l (cons (string-to-number (substring str idx len) 16) l))) (nreverse l))))) @@ -1091,9 +1105,9 @@ (cond ((eq generator 'unidata-gen-table-symbol) (setq val1 (intern val1))) ((eq generator 'unidata-gen-table-integer) - (setq val1 (string-to-int val1))) + (setq val1 (string-to-number val1))) ((eq generator 'unidata-gen-table-character) - (setq val1 (string-to-int val1 16))) + (setq val1 (string-to-number val1 16))) ((eq generator 'unidata-gen-table-decomposition) (setq val1 (unidata-split-decomposition val1))))) (when (>= char check) @@ -1136,19 +1150,19 @@ (insert ";; Automatically generated from UnicodeData.txt.\n" (format "(define-char-code-property '%S %S %S)\n" prop table docstring) - ;; \040 below is to avoid error on reading this file. - ";; Local\040Variables:\n" + ";; Local Variables:\n" ";; coding: utf-8\n" ";; no-byte-compile: t\n" ";; End:\n\n" (format ";; %s ends here\n" file))))) (message "Writing %s..." charprop-file) - ;; \040 below is to avoid error on reading this file. - (insert ";; Local\040Variables:\n" + (insert ";; Local Variables:\n" ";; coding: utf-8\n" ";; no-byte-compile: t\n" ";; End:\n\n" (format ";; %s ends here\n" charprop-file))))) + + ;; arch-tag: 961c862e-b821-447e-9b8a-bfbab9c2d525 ;;; unidata-gen.el ends here