annotate admin/unidata/unidata-gen.el @ 110822:8d679bf1ecf1

* dired.el (dired-save-positions): Doc fix. (Bug#7119)
author Juanma Barranquero <lekktu@gmail.com>
date Tue, 28 Sep 2010 01:41:00 +0200
parents 1d1d5d9bd884
children 4d54e23aa31e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1 ;; unidata-gen.el -- Create files containing character property data.
106815
1d1d5d9bd884 Add 2010 to copyright years.
Glenn Morris <rgm@gnu.org>
parents: 105873
diff changeset
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
3 ;; National Institute of Advanced Industrial Science and Technology (AIST)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
4 ;; Registration Number H13PRO009
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
5
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
6 ;; This file is part of GNU Emacs.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
7
94829
aeac1d771ae4 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 94071
diff changeset
8 ;; GNU Emacs is free software: you can redistribute it and/or modify
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
9 ;; it under the terms of the GNU General Public License as published by
94829
aeac1d771ae4 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 94071
diff changeset
10 ;; the Free Software Foundation, either version 3 of the License, or
aeac1d771ae4 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 94071
diff changeset
11 ;; (at your option) any later version.
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
12
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
13 ;; GNU Emacs is distributed in the hope that it will be useful,
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
16 ;; GNU General Public License for more details.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
17
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
18 ;; You should have received a copy of the GNU General Public License
94829
aeac1d771ae4 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 94071
diff changeset
19 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
20
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
21 ;;; Commentary:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
22
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
23 ;; SPECIAL NOTICE
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
24 ;;
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
25 ;; This file must be byte-compilable/loadable by `temacs' and also
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
26 ;; the entry function `unidata-gen-files' must be runnable by
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
27 ;; `temacs'.
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
28
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
29 ;; FILES TO BE GENERATED
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
30 ;;
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
31 ;; The entry function `unidata-gen-files' generates these files in
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
32 ;; the current directory.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
33 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
34 ;; charprop.el
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
35 ;; It contains a series of forms of this format:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
36 ;; (char-code-property-register PROP FILE)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
37 ;; where PROP is a symbol representing a character property
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
38 ;; (name, generic-category, etc), and FILE is a name of one of
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
39 ;; the following files.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
40 ;;
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
41 ;; uni-name.el, uni-category.el, uni-combining.el, uni-bidi.el,
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
42 ;; uni-decomposition.el, uni-decimal.el, uni-digit.el, uni-numeric.el,
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
43 ;; uni-mirrored.el, uni-old-name.el, uni-comment.el, uni-uppercase.el,
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
44 ;; uni-lowercase.el, uni-titlecase.el
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
45 ;; They each contain a single form of this format:
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
46 ;; (char-code-property-register PROP CHAR-TABLE)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
47 ;; where PROP is the same as above, and CHAR-TABLE is a
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
48 ;; char-table containing property values in a compressed format.
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
49 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
50 ;; When they are installed in .../lisp/international/, the file
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
51 ;; "charprop.el" is preloaded in loadup.el. The other files are
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
52 ;; automatically loaded when the functions `get-char-code-property'
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
53 ;; and `put-char-code-property' are called.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
54 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
55 ;; FORMAT OF A CHAR TABLE
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
56 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
57 ;; We want to make a file size containing a char-table small. We
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
58 ;; also want to load the file and get a property value fast. We
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
59 ;; also want to reduce the used memory after loading it. So,
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
60 ;; instead of naively storing a property value for each character in
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
61 ;; a char-table (and write it out into a file), we store compressed
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
62 ;; data in a char-table as below.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
63 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
64 ;; If succeeding 128*N characters have the same property value, we
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
65 ;; store that value for them. Otherwise, compress values for
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
66 ;; succeeding 128 characters into a single string and store it as a
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
67 ;; value for those characters. The way of compression depends on a
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
68 ;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE",
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
69 ;; and "WORD-LIST TABLE".
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
70
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
71 ;; The char table has four extra slots:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
72 ;; 1st: property symbol
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
73 ;; 2nd: function to call to get a property value
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
74 ;; 3nd: function to call to put a property value
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
75 ;; 4th: function to call to get a description of a property value
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
76 ;; 5th: data referred by the above functions
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
77
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
78 ;; List of elements of this form:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
79 ;; (CHAR-or-RANGE PROP1 PROP2 ... PROPn)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
80 ;; CHAR-or-RANGE: a character code or a cons of character codes
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
81 ;; PROPn: string representing the nth property value
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
82
90174
f3973ae57d8b (unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents: 90162
diff changeset
83 (defvar unidata-list nil)
f3973ae57d8b (unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents: 90162
diff changeset
84
f3973ae57d8b (unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents: 90162
diff changeset
85 (defun unidata-setup-list (unidata-text-file)
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
86 (let* ((table (list nil))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
87 (tail table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
88 (block-names '(("^<CJK Ideograph" . CJK\ IDEOGRAPH)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
89 ("^<Hangul Syllable" . HANGUL\ SYLLABLE)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
90 ("^<.*Surrogate" . nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
91 ("^<.*Private Use" . PRIVATE\ USE)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
92 val char name)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
93 (or (file-readable-p unidata-text-file)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
94 (error "File not readable: %s" unidata-text-file))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
95 (with-temp-buffer
105873
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
96 ;; Insert a file of this format:
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
97 ;; (CHAR NAME CATEGORY ...)
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
98 ;; where CHAR is a charater code, the following elements are strings
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
99 ;; representing character properties.
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
100 (insert-file-contents unidata-text-file)
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
101 (goto-char (point-min))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
102 (condition-case nil
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
103 (while t
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
104 (setq val (read (current-buffer))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
105 char (car val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
106 name (cadr val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
107
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
108 ;; Check this kind of block.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
109 ;; 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
105873
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
110 ;; 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
111 (if (and (= (aref name 0) ?<)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
112 (string-match ", First>$" name))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
113 (let ((first char)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
114 (l block-names)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
115 block-name)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
116 (setq val (read (current-buffer))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
117 char (car val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
118 block-name (cadr val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
119 name nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
120 (while l
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
121 (if (string-match (caar l) block-name)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
122 (setq name (cdar l) l nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
123 (setq l (cdr l))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
124 (if (not name)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
125 ;; As this is a surrogate pair range, ignore it.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
126 (setq val nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
127 (setcar val (cons first char))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
128 (setcar (cdr val) name))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
129
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
130 (when val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
131 (setcdr tail (list val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
132 (setq tail (cdr tail))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
133 (error nil)))
90174
f3973ae57d8b (unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents: 90162
diff changeset
134 (setq unidata-list (cdr table))))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
135
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
136 ;; Alist of this form:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
137 ;; (PROP INDEX GENERATOR FILENAME)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
138 ;; PROP: character property
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
139 ;; INDEX: index to each element of unidata-list for PROP
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
140 ;; GENERATOR: function to generate a char-table
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
141 ;; FILENAME: filename to store the char-table
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
142 ;; DESCRIBER: function to call to get a description string of property value
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
143
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
144 (defconst unidata-prop-alist
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
145 '((name
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
146 1 unidata-gen-table-name "uni-name.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
147 "Unicode character name.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
148 Property value is a string.")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
149 (general-category
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
150 2 unidata-gen-table-symbol "uni-category.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
151 "Unicode general category.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
152 Property value is one of the following symbols:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
153 Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po,
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
154 Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
155 unidata-describe-general-category)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
156 (canonical-combining-class
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
157 3 unidata-gen-table-integer "uni-combining.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
158 "Unicode canonical combining class.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
159 Property value is an integer."
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
160 unidata-describe-canonical-combining-class)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
161 (bidi-class
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
162 4 unidata-gen-table-symbol "uni-bidi.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
163 "Unicode bidi class.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
164 Property value is one of the following symbols:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
165 L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET,
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
166 AN, CS, NSM, BN, B, S, WS, ON"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
167 unidata-describe-bidi-class)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
168 (decomposition
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
169 5 unidata-gen-table-decomposition "uni-decomposition.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
170 "Unicode decomposition mapping.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
171 Property value is a list of characters. The first element may be
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
172 one of these symbols representing compatibility formatting tag:
100107
eff1b0128211 (unidata-prop-alist): Docstring for
Kenichi Handa <handa@m17n.org>
parents: 100094
diff changeset
173 font, noBreak, initial, medial, final, isolated, circle, super,
eff1b0128211 (unidata-prop-alist): Docstring for
Kenichi Handa <handa@m17n.org>
parents: 100094
diff changeset
174 sub, vertical, wide, narrow, small, square, fraction, compat"
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
175 unidata-describe-decomposition)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
176 (decimal-digit-value
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
177 6 unidata-gen-table-integer "uni-decimal.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
178 "Unicode numeric value (decimal digit).
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
179 Property value is an integer.")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
180 (digit-value
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
181 7 unidata-gen-table-integer "uni-digit.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
182 "Unicode numeric value (digit).
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
183 Property value is an integer.")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
184 (numeric-value
100093
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
185 8 unidata-gen-table-numeric "uni-numeric.el"
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
186 "Unicode numeric value (numeric).
100094
c39e7dbb8896 (unidata-prop-alist): Docstring adjusted.
Kenichi Handa <handa@m17n.org>
parents: 100093
diff changeset
187 Property value is an integer or a floating point.")
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
188 (mirrored
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
189 9 unidata-gen-table-symbol "uni-mirrored.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
190 "Unicode bidi mirrored flag.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
191 Property value is a symbol `Y' or `N'.")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
192 (old-name
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
193 10 unidata-gen-table-name "uni-old-name.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
194 "Unicode old names as published in Unicode 1.0.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
195 Property value is a string.")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
196 (iso-10646-comment
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
197 11 unidata-gen-table-name "uni-comment.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
198 "Unicode ISO 10646 comment.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
199 Property value is a string.")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
200 (uppercase
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
201 12 unidata-gen-table-character "uni-uppercase.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
202 "Unicode simple uppercase mapping.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
203 Property value is a character."
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
204 string)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
205 (lowercase
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
206 13 unidata-gen-table-character "uni-lowercase.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
207 "Unicode simple lowercase mapping.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
208 Property value is a character."
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
209 string)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
210 (titlecase
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
211 14 unidata-gen-table-character "uni-titlecase.el"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
212 "Unicode simple titlecase mapping.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
213 Property value is a character."
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
214 string)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
215
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
216 ;; Functions to access the above data.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
217 (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
218 (defsubst unidata-prop-generator (prop) (nth 2 (assq prop unidata-prop-alist)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
219 (defsubst unidata-prop-file (prop) (nth 3 (assq prop unidata-prop-alist)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
220 (defsubst unidata-prop-docstring (prop) (nth 4 (assq prop unidata-prop-alist)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
221 (defsubst unidata-prop-describer (prop) (nth 5 (assq prop unidata-prop-alist)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
222
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
223
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
224 ;; SIMPLE TABLE
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
225 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
226 ;; If the type of character property value is character, and the
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
227 ;; values of succeeding character codes are usually different, we use
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
228 ;; a char-table described here to store such values.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
229 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
230 ;; If succeeding 128 characters has no property, a char-table has the
105873
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
231 ;; symbol t for them. Otherwise a char-table has a string of the
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
232 ;; following format for them.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
233 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
234 ;; The first character of the string is FIRST-INDEX.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
235 ;; The Nth (N > 0) character of the string is a property value of the
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
236 ;; character (BLOCK-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
237 ;; the first of the characters in the block.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
238 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
239 ;; The 4th extra slot of a char-table is nil.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
240
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
241 (defun unidata-get-character (char val table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
242 (cond
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
243 ((characterp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
244 val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
245
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
246 ((stringp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
247 (let* ((len (length val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
248 (block-head (lsh (lsh char -7) 7))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
249 (vec (make-vector 128 nil))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
250 (first-index (aref val 0)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
251 (dotimes (i (1- len))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
252 (let ((elt (aref val (1+ i))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
253 (if (> elt 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
254 (aset vec (+ first-index i) elt))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
255 (dotimes (i 128)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
256 (aset table (+ block-head i) (aref vec i)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
257 (aref vec (- char block-head))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
258
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
259 (defun unidata-put-character (char val table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
260 (or (characterp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
261 (not val)
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
262 (error "Not a character nor nil: %S" val))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
263 (let ((current-val (aref table char)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
264 (unless (eq current-val val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
265 (if (stringp current-val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
266 (funcall (char-table-extra-slot table 1) char current-val table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
267 (aset table char val))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
268
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
269 (defun unidata-gen-table-character (prop)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
270 (let ((table (make-char-table 'char-code-property-table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
271 (prop-idx (unidata-prop-index prop))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
272 (vec (make-vector 128 0))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
273 (tail unidata-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
274 elt range val idx slot)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
275 (set-char-table-range table (cons 0 (max-char)) t)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
276 (while tail
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
277 (setq elt (car tail) tail (cdr tail))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
278 (setq range (car elt)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
279 val (nth prop-idx elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
280 (if (= (length val) 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
281 (setq val nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
282 (setq val (string-to-number val 16)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
283 (if (consp range)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
284 (if val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
285 (set-char-table-range table range val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
286 (let* ((start (lsh (lsh range -7) 7))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
287 (limit (+ start 127))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
288 first-index last-index)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
289 (fillarray vec 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
290 (if val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
291 (aset vec (setq last-index (setq first-index (- range start)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
292 val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
293 (while (and (setq elt (car tail) range (car elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
294 (integerp range)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
295 (<= range limit))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
296 (setq val (nth prop-idx elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
297 (when (> (length val) 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
298 (aset vec (setq last-index (- range start))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
299 (string-to-number val 16))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
300 (or first-index
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
301 (setq first-index last-index)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
302 (setq tail (cdr tail)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
303 (when first-index
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
304 (let ((str (string first-index))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
305 c)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
306 (while (<= first-index last-index)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
307 (setq str (format "%s%c" str (or (aref vec first-index) 0))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
308 first-index (1+ first-index)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
309 (set-char-table-range table (cons start limit) str))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
310
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
311 (set-char-table-extra-slot table 0 prop)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
312 (byte-compile 'unidata-get-character)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
313 (byte-compile 'unidata-put-character)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
314 (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-character))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
315 (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-character))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
316
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
317 table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
318
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
319
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
320
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
321 ;; RUN-LENGTH TABLE
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
322 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
323 ;; If the type of character property value is symbol, integer,
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
324 ;; boolean, or character, we use a char-table described here to store
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
325 ;; the values.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
326 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
327 ;; The 4th extra slot is a vector of property values (VAL-TABLE), and
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
328 ;; values for succeeding 128 characters are encoded into this
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
329 ;; character sequence:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
330 ;; ( VAL-CODE RUN-LENGTH ? ) +
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
331 ;; where:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
332 ;; VAL-CODE (0..127):
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
333 ;; (VAL-CODE - 1) is an index into VAL-TABLE.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
334 ;; The value 0 means no-value.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
335 ;; RUN-LENGTH (130..255):
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
336 ;; (RUN-LENGTH - 128) specifies how many characters have the same
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
337 ;; value. If omitted, it means 1.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
338
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
339
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
340 ;; Return a symbol-type character property value of CHAR. VAL is the
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
341 ;; current value of (aref TABLE CHAR).
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
342
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
343 (defun unidata-get-symbol (char val table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
344 (let ((val-table (char-table-extra-slot table 4)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
345 (cond ((symbolp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
346 val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
347 ((stringp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
348 (let ((first-char (lsh (lsh char -7) 7))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
349 (str val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
350 (len (length val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
351 (idx 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
352 this-val count)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
353 (set-char-table-range table (cons first-char (+ first-char 127))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
354 nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
355 (while (< idx len)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
356 (setq val (aref str idx) idx (1+ idx)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
357 count (if (< idx len) (aref str idx) 1))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
358 (setq val (and (> val 0) (aref val-table (1- val)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
359 count (if (< count 128)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
360 1
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
361 (prog1 (- count 128) (setq idx (1+ idx)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
362 (dotimes (i count)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
363 (if val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
364 (aset table first-char val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
365 (if (= first-char char)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
366 (setq this-val val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
367 (setq first-char (1+ first-char))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
368 this-val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
369 ((> val 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
370 (aref val-table (1- val))))))
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
371
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
372 ;; Return a integer-type character property value of CHAR. VAL is the
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
373 ;; current value of (aref TABLE CHAR).
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
374
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
375 (defun unidata-get-integer (char val table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
376 (let ((val-table (char-table-extra-slot table 4)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
377 (cond ((integerp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
378 val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
379 ((stringp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
380 (let ((first-char (lsh (lsh char -7) 7))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
381 (str val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
382 (len (length val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
383 (idx 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
384 this-val count)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
385 (while (< idx len)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
386 (setq val (aref str idx) idx (1+ idx)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
387 count (if (< idx len) (aref str idx) 1))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
388 (setq val (and (> val 0) (aref val-table (1- val)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
389 count (if (< count 128)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
390 1
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
391 (prog1 (- count 128) (setq idx (1+ idx)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
392 (dotimes (i count)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
393 (aset table first-char val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
394 (if (= first-char char)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
395 (setq this-val val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
396 (setq first-char (1+ first-char))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
397 this-val)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
398
100093
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
399 ;; Return a numeric-type (integer or float) character property value
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
400 ;; of CHAR. VAL is the current value of (aref TABLE CHAR).
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
401
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
402 (defun unidata-get-numeric (char val table)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
403 (cond
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
404 ((numberp val)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
405 val)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
406 ((stringp val)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
407 (let ((val-table (char-table-extra-slot table 4))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
408 (first-char (lsh (lsh char -7) 7))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
409 (str val)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
410 (len (length val))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
411 (idx 0)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
412 this-val count)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
413 (while (< idx len)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
414 (setq val (aref str idx) idx (1+ idx)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
415 count (if (< idx len) (aref str idx) 1))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
416 (setq val (and (> val 0) (aref val-table (1- val)))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
417 count (if (< count 128)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
418 1
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
419 (prog1 (- count 128) (setq idx (1+ idx)))))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
420 (dotimes (i count)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
421 (aset table first-char val)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
422 (if (= first-char char)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
423 (setq this-val val))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
424 (setq first-char (1+ first-char))))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
425 this-val))))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
426
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
427 ;; Store VAL (symbol) as a character property value of CHAR in TABLE.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
428
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
429 (defun unidata-put-symbol (char val table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
430 (or (symbolp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
431 (error "Not a symbol: %S" val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
432 (let ((current-val (aref table char)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
433 (unless (eq current-val val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
434 (if (stringp current-val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
435 (funcall (char-table-extra-slot table 1) char current-val table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
436 (aset table char val))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
437
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
438 ;; Store VAL (integer) as a character property value of CHAR in TABLE.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
439
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
440 (defun unidata-put-integer (char val table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
441 (or (integerp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
442 (not val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
443 (error "Not an integer nor nil: %S" val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
444 (let ((current-val (aref table char)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
445 (unless (eq current-val val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
446 (if (stringp current-val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
447 (funcall (char-table-extra-slot table 1) char current-val table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
448 (aset table char val))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
449
100093
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
450 ;; Store VAL (integer or float) as a character property value of CHAR
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
451 ;; in TABLE.
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
452
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
453 (defun unidata-put-numeric (char val table)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
454 (or (numberp val)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
455 (not val)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
456 (error "Not a number nor nil: %S" val))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
457 (let ((current-val (aref table char)))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
458 (unless (equal current-val val)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
459 (if (stringp current-val)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
460 (funcall (char-table-extra-slot table 1) char current-val table))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
461 (aset table char val))))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
462
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
463 ;; Encode the character property value VAL into an integer value by
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
464 ;; VAL-LIST. By side effect, VAL-LIST is modified.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
465 ;; VAL-LIST has this form:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
466 ;; (t (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
467 ;; If VAL is one of VALn, just return VAL-CODEn. Otherwise,
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
468 ;; VAL-LIST is modified to this:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
469 ;; (t (VAL . (1+ VAL-CODE1)) (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
470
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
471 (defun unidata-encode-val (val-list val)
100093
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
472 (let ((slot (assoc val val-list))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
473 val-code)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
474 (if slot
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
475 (cdr slot)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
476 (setq val-code (if (cdr val-list) (1+ (cdr (nth 1 val-list))) 1))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
477 (setcdr val-list (cons (cons val val-code) (cdr val-list)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
478 val-code)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
479
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
480 ;; Generate a char-table for the character property PROP.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
481
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
482 (defun unidata-gen-table (prop val-func default-value)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
483 (let ((table (make-char-table 'char-code-property-table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
484 (prop-idx (unidata-prop-index prop))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
485 (val-list (list t))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
486 (vec (make-vector 128 0))
105873
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
487 tail elt range val val-code idx slot
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
488 prev-range-data)
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
489 (set-char-table-range table (cons 0 (max-char)) default-value)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
490 (setq tail unidata-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
491 (while tail
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
492 (setq elt (car tail) tail (cdr tail))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
493 (setq range (car elt)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
494 val (funcall val-func (nth prop-idx elt)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
495 (setq val-code (if val (unidata-encode-val val-list val)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
496 (if (consp range)
105873
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
497 (when val-code
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
498 (set-char-table-range table range val)
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
499 (let ((from (car range)) (to (cdr range)))
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
500 ;; If RANGE doesn't end at the char-table boundary (each
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
501 ;; 128 characters), we may have to carry over the data
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
502 ;; for the last several characters (at most 127 chars)
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
503 ;; to the next loop. In that case, set PREV-RANGE-DATA
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
504 ;; to ((FROM . TO) . VAL-CODE) where (FROM . TO)
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
505 ;; specifies the range of characters handled in the next
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
506 ;; loop.
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
507 (when (< (logand to #x7F) #x7F)
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
508 (if (< from (logand to #x1FFF80))
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
509 (setq from (logand to #x1FFF80)))
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
510 (setq prev-range-data (cons (cons from to) val-code)))))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
511 (let* ((start (lsh (lsh range -7) 7))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
512 (limit (+ start 127))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
513 str count new-val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
514 (fillarray vec 0)
105873
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
515 ;; See the comment above.
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
516 (when (and prev-range-data
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
517 (>= (cdr (car prev-range-data)) start))
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
518 (let ((from (car (car prev-range-data)))
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
519 (to (cdr (car prev-range-data)))
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
520 (vcode (cdr prev-range-data)))
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
521 (while (<= from to)
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
522 (aset vec (- from start) vcode)
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
523 (setq from (1+ from)))))
5626ccae11ed (unidata-gen-table): Fix for the case that the block data and the
Kenichi Handa <handa@m17n.org>
parents: 103310
diff changeset
524 (setq prev-range-data nil)
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
525 (if val-code
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
526 (aset vec (- range start) val-code))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
527 (while (and (setq elt (car tail) range (car elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
528 (integerp range)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
529 (<= range limit))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
530 (setq new-val (funcall val-func (nth prop-idx elt)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
531 (if (not (eq val new-val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
532 (setq val new-val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
533 val-code (if val (unidata-encode-val val-list val))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
534 (if val-code
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
535 (aset vec (- range start) val-code))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
536 (setq tail (cdr tail)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
537 (setq str "" val-code -1 count 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
538 (mapc #'(lambda (x)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
539 (if (= val-code x)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
540 (setq count (1+ count))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
541 (if (> count 2)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
542 (setq str (concat str (string val-code
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
543 (+ count 128))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
544 (if (= count 2)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
545 (setq str (concat str (string val-code val-code)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
546 (if (= count 1)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
547 (setq str (concat str (string val-code))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
548 (setq val-code x count 1)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
549 vec)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
550 (if (= count 128)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
551 (if val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
552 (set-char-table-range table (cons start limit) val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
553 (if (= val-code 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
554 (set-char-table-range table (cons start limit) str)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
555 (if (> count 2)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
556 (setq str (concat str (string val-code (+ count 128))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
557 (if (= count 2)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
558 (setq str (concat str (string val-code val-code)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
559 (setq str (concat str (string val-code)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
560 (set-char-table-range table (cons start limit) str))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
561
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
562 (setq val-list (nreverse (cdr val-list)))
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
563 (set-char-table-extra-slot table 0 prop)
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
564 (set-char-table-extra-slot table 4 (vconcat (mapcar 'car val-list)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
565 table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
566
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
567 (defun unidata-gen-table-symbol (prop)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
568 (let ((table (unidata-gen-table prop
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
569 #'(lambda (x) (and (> (length x) 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
570 (intern x)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
571 0)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
572 (byte-compile 'unidata-get-symbol)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
573 (byte-compile 'unidata-put-symbol)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
574 (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-symbol))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
575 (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-symbol))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
576 table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
577
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
578 (defun unidata-gen-table-integer (prop)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
579 (let ((table (unidata-gen-table prop
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
580 #'(lambda (x) (and (> (length x) 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
581 (string-to-number x)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
582 t)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
583 (byte-compile 'unidata-get-integer)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
584 (byte-compile 'unidata-put-integer)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
585 (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-integer))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
586 (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-integer))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
587 table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
588
100093
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
589 (defun unidata-gen-table-numeric (prop)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
590 (let ((table (unidata-gen-table prop
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
591 #'(lambda (x)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
592 (if (string-match "/" x)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
593 (/ (float (string-to-number x))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
594 (string-to-number
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
595 (substring x (match-end 0))))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
596 (if (> (length x) 0)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
597 (string-to-number x))))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
598 t)))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
599 (byte-compile 'unidata-get-numeric)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
600 (byte-compile 'unidata-put-numeric)
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
601 (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-numeric))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
602 (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-numeric))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
603 table))
eae5b6774936 (unidata-prop-alist): Set `numric-value'
Kenichi Handa <handa@m17n.org>
parents: 94829
diff changeset
604
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
605
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
606 ;; WORD-LIST TABLE
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
607
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
608 ;; If the table is for `name' property, each character in the string
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
609 ;; is one of these:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
610 ;; DIFF-HEAD-CODE (0, 1, or 2):
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
611 ;; specifies how to decode the following characters.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
612 ;; WORD-CODE (3..#x7FF excluding '-', '0'..'9', 'A'..'Z'):
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
613 ;; specifies an index number into WORD-TABLE (see below)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
614 ;; Otherwise (' ', '-', '0'..'9', 'A'..'Z'):
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
615 ;; specifies a literal word.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
616 ;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
617 ;; The 4th slots is a vector:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
618 ;; [ WORD-TABLE BLOCK-NAME HANGUL-JAMO-TABLE ]
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
619 ;; WORD-TABLE is a vector of word symbols.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
620 ;; BLOCK-NAME is a vector of name symbols for a block of characters.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
621 ;; HANGUL-JAMO-TABLE is `unidata-name-jamo-name-table'.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
622
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
623 ;; Return the difference of symbol list L1 and L2 in this form:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
624 ;; (DIFF-HEAD SYM1 SYM2 ...)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
625 ;; DIFF-HEAD is ((SAME-HEAD-LENGTH * 16) + SAME-TAIL-LENGTH).
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
626 ;; Ex: If L1 is (a b c d e f) and L2 is (a g h e f), this function
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
627 ;; returns ((+ (* 1 16) 2) g h).
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
628 ;; It means that we can get L2 from L1 by prepending the first element
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
629 ;; of L1 and appending the last 2 elements of L1 to the list (g h).
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
630 ;; If L1 and L2 don't have common elements at the head and tail,
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
631 ;; set DIFF-HEAD to -1 and SYM1 ... to the elements of L2.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
632
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
633 (defun unidata-word-list-diff (l1 l2)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
634 (let ((beg 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
635 (end 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
636 (len1 (length l1))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
637 (len2 (length l2))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
638 result)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
639 (when (< len1 16)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
640 (while (and l1 (eq (car l1) (car l2)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
641 (setq beg (1+ beg)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
642 l1 (cdr l1) len1 (1- len1) l2 (cdr l2) len2 (1- len2)))
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
643 (while (and (< end len1) (< end len2)
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
644 (eq (nth (- len1 end 1) l1) (nth (- len2 end 1) l2)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
645 (setq end (1+ end))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
646 (if (= (+ beg end) 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
647 (setq result (list -1))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
648 (setq result (list (+ (* beg 16) (+ beg (- len1 end))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
649 (while (< end len2)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
650 (setcdr result (cons (nth (- len2 end 1) l2) (cdr result)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
651 (setq end (1+ end)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
652 result))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
653
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
654 ;; Return a compressed form of the vector VEC. Each element of VEC is
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
655 ;; a list of symbols of which names can be concatenated to form a
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
656 ;; character name. This function changes those elements into
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
657 ;; compressed forms by utilizing the fact that diff of consecutive
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
658 ;; elements is usually small.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
659
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
660 (defun unidata-word-list-compress (vec)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
661 (let (last-elt last-idx diff-head tail elt val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
662 (dotimes (i 128)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
663 (setq elt (aref vec i))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
664 (when elt
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
665 (if (null last-elt)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
666 (setq diff-head -1
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
667 val (cons 0 elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
668 (setq val (unidata-word-list-diff last-elt elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
669 (if (= (car val) -1)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
670 (setq diff-head -1
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
671 val (cons 0 (cdr val)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
672 (if (eq diff-head (car val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
673 (setq val (cons 2 (cdr val)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
674 (setq diff-head (car val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
675 (if (>= diff-head 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
676 (setq val (cons 1 val))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
677 (aset vec i val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
678 (setq last-idx i last-elt elt)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
679 (if (not last-idx)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
680 (setq vec nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
681 (if (< last-idx 127)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
682 (let ((shorter (make-vector (1+ last-idx) nil)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
683 (dotimes (i (1+ last-idx))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
684 (aset shorter i (aref vec i)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
685 (setq vec shorter))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
686 vec))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
687
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
688 ;; Encode the word index IDX into a characters code that can be
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
689 ;; embedded in a string.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
690
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
691 (defsubst unidata-encode-word (idx)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
692 ;; Exclude 0, 1, 2.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
693 (+ idx 3))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
694
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
695 ;; Decode the character code CODE (that is embedded in a string) into
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
696 ;; the corresponding word name by looking up WORD-TABLE.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
697
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
698 (defsubst unidata-decode-word (code word-table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
699 (setq code (- code 3))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
700 (if (< code (length word-table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
701 (aref word-table code)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
702
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
703 ;; Table of short transliterated name symbols of Hangul Jamo divided
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
704 ;; into Choseong, Jungseong, and Jongseong.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
705
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
706 (defconst unidata-name-jamo-name-table
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
707 [[G GG N D DD R M B BB S SS nil J JJ C K T P H]
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
708 [A AE YA YAE EO E YEO YE O WA WAE OE YO U WEO WE WI YU EU YI I]
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
709 [G GG GS N NJ NH D L LG LM LB LS LT LP LH M B BS S SS NG J C K T P H]])
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
710
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
711 ;; Return a name of CHAR. VAL is the current value of (aref TABLE
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
712 ;; CHAR).
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
713
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
714 (defun unidata-get-name (char val table)
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
715 (cond
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
716 ((stringp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
717 (if (> (aref val 0) 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
718 val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
719 (let* ((first-char (lsh (lsh char -7) 7))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
720 (word-table (aref (char-table-extra-slot table 4) 0))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
721 (i 1)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
722 (len (length val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
723 (vec (make-vector 128 nil))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
724 (idx 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
725 (case-fold-search nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
726 c word-list tail-list last-list word diff-head)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
727 (while (< i len)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
728 (setq c (aref val i))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
729 (if (< c 3)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
730 (progn
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
731 (if (or word-list tail-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
732 (aset vec idx
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
733 (setq last-list (nconc word-list tail-list))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
734 (setq i (1+ i) idx (1+ idx)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
735 word-list nil tail-list nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
736 (if (> c 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
737 (let ((l last-list))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
738 (if (= c 1)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
739 (setq diff-head
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
740 (prog1 (aref val i) (setq i (1+ i)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
741 (setq tail-list (nthcdr (% diff-head 16) last-list))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
742 (dotimes (i (/ diff-head 16))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
743 (setq word-list (nconc word-list (list (car l)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
744 l (cdr l))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
745 (setq word-list
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
746 (nconc word-list
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
747 (list (symbol-name
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
748 (unidata-decode-word c word-table))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
749 i (1+ i))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
750 (if (or word-list tail-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
751 (aset vec idx (nconc word-list tail-list)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
752 (setq val nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
753 (dotimes (i 128)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
754 (setq c (+ first-char i))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
755 (let ((name (aref vec i)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
756 (if name
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
757 (let ((tail (cdr (setq name (copy-sequence name))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
758 elt)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
759 (while tail
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
760 (setq elt (car tail))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
761 (or (string= elt "-")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
762 (progn
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
763 (setcdr tail (cons elt (cdr tail)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
764 (setcar tail " ")))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
765 (setq tail (cddr tail)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
766 (setq name (apply 'concat name))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
767 (aset table c name)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
768 (if (= c char)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
769 (setq val name))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
770 val)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
771
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
772 ((and (integerp val) (> val 0))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
773 (let* ((symbol-table (aref (char-table-extra-slot table 4) 1))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
774 (sym (aref symbol-table (1- val))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
775 (cond ((eq sym 'HANGUL\ SYLLABLE)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
776 (let ((jamo-name-table (aref (char-table-extra-slot table 4) 2)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
777 ;; SIndex = S - SBase
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
778 (setq char (- char #xAC00))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
779 (let ( ;; LIndex = SIndex / NCount
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
780 (L (/ char 588))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
781 ;; VIndex = (SIndex % NCount) * TCount
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
782 (V (/ (% char 588) 28))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
783 ;; TIndex = SIndex % TCount
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
784 (T (% char 28)))
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
785 (format "HANGUL SYLLABLE %s%s%s"
90112
eb1b00df002b (unidata-get-name): Handle U+110B.
Kenichi Handa <handa@m17n.org>
parents: 90109
diff changeset
786 ;; U+110B is nil in this table.
eb1b00df002b (unidata-get-name): Handle U+110B.
Kenichi Handa <handa@m17n.org>
parents: 90109
diff changeset
787 (or (aref (aref jamo-name-table 0) L) "")
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
788 (aref (aref jamo-name-table 1) V)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
789 (if (= T 0) ""
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
790 (aref (aref jamo-name-table 2) (1- T)))))))
90109
790c49ba39db (unidata-get-name): Handle "CJK IDEOGRAPH".
Kenichi Handa <handa@m17n.org>
parents: 90097
diff changeset
791 ((eq sym 'CJK\ IDEOGRAPH)
790c49ba39db (unidata-get-name): Handle "CJK IDEOGRAPH".
Kenichi Handa <handa@m17n.org>
parents: 90097
diff changeset
792 (format "%s-%04X" sym char))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
793 ((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
794 (format "%s-%04X" sym char))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
795 ((eq sym 'VARIATION\ SELECTOR)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
796 (format "%s-%d" sym (+ (- char #xe0100) 17))))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
797
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
798 ;; Store VAL as the name of CHAR in TABLE.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
799
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
800 (defun unidata-put-name (char val table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
801 (let ((current-val (aref table char)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
802 (if (and (stringp current-val) (= (aref current-val 0) 0))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
803 (funcall (char-table-extra-slot table 1) char current-val table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
804 (aset table char val)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
805
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
806 (defun unidata-get-decomposition (char val table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
807 (cond
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
808 ((consp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
809 val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
810
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
811 ((stringp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
812 (if (> (aref val 0) 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
813 val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
814 (let* ((first-char (lsh (lsh char -7) 7))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
815 (word-table (char-table-extra-slot table 4))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
816 (i 1)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
817 (len (length val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
818 (vec (make-vector 128 nil))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
819 (idx 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
820 (case-fold-search nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
821 c word-list tail-list last-list word diff-head)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
822 (while (< i len)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
823 (setq c (aref val i))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
824 (if (< c 3)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
825 (progn
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
826 (if (or word-list tail-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
827 (aset vec idx
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
828 (setq last-list (nconc word-list tail-list))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
829 (setq i (1+ i) idx (1+ idx)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
830 word-list nil tail-list nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
831 (if (> c 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
832 (let ((l last-list))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
833 (if (= c 1)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
834 (setq diff-head
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
835 (prog1 (aref val i) (setq i (1+ i)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
836 (setq tail-list (nthcdr (% diff-head 16) last-list))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
837 (dotimes (i (/ diff-head 16))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
838 (setq word-list (nconc word-list (list (car l)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
839 l (cdr l))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
840 (setq word-list
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
841 (nconc word-list
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
842 (list (or (unidata-decode-word c word-table) c)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
843 i (1+ i))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
844 (if (or word-list tail-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
845 (aset vec idx (nconc word-list tail-list)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
846 (dotimes (i 128)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
847 (aset table (+ first-char i) (aref vec i)))
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
848 (aref vec (- char first-char)))))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
849
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
850 ;; Hangul syllable
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
851 ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3))
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
852 ;; SIndex = S (char) - SBase (#xAC00)
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
853 (setq char (- char #xAC00))
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
854 (let (;; L = LBase + SIndex / NCount
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
855 (L (+ #x1100 (/ char 588)))
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
856 ;; V = VBase + (SIndex % NCount) * TCount
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
857 (V (+ #x1161 (/ (% char 588) 28)))
103033
0dd3b08296b6 (unidata-get-decomposition): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 103009
diff changeset
858 ;; LV = SBase + (SIndex / TCount) * TCount
0dd3b08296b6 (unidata-get-decomposition): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 103009
diff changeset
859 (LV (+ #xAC00 (* (/ char 28) 28)))
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
860 ;; T = TBase + SIndex % TCount
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
861 (T (+ #x11A7 (% char 28))))
90176
dfbe37c15be3 (unidata-get-decomposition): For Hangul
Kenichi Handa <handa@m17n.org>
parents: 90174
diff changeset
862 (if (= T #x11A7)
dfbe37c15be3 (unidata-get-decomposition): For Hangul
Kenichi Handa <handa@m17n.org>
parents: 90174
diff changeset
863 (list L V)
103009
96e8e1d84170 (unidata-get-decomposition): Adjust Hangle decomposition rule to
Kenichi Handa <handa@m17n.org>
parents: 102901
diff changeset
864 (list LV T))))
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
865
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
866 ))
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
867
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
868 ;; Store VAL as the decomposition information of CHAR in TABLE.
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
869
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
870 (defun unidata-put-decomposition (char val table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
871 (let ((current-val (aref table char)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
872 (if (and (stringp current-val) (= (aref current-val 0) 0))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
873 (funcall (char-table-extra-slot table 1) char current-val table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
874 (aset table char val)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
875
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
876 ;; UnicodeData.txt contains these lines:
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
877 ;; 0000;<control>;Cc;0;BN;;;;;N;NULL;;;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
878 ;; ...
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
879 ;; 0020;SPACE;Zs;0;WS;;;;;N;;;;;
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
880 ;; ...
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
881 ;; The following command yields a file of about 96K bytes.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
882 ;; % gawk -F ';' '{print $1,$2;}' < UnicodeData.txt | gzip > temp.gz
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
883 ;; With the following function, we can get a file of almost the same
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
884 ;; the size.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
885
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
886 ;; Generate a char-table for character names.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
887
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
888 (defun unidata-gen-table-word-list (prop val-func)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
889 (let ((table (make-char-table 'char-code-property-table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
890 (prop-idx (unidata-prop-index prop))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
891 (word-list (list nil))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
892 word-table
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
893 block-list block-word-table block-end
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
894 tail elt range val idx slot)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
895 (set-char-table-range table (cons 0 (max-char)) 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
896 (setq tail unidata-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
897 (setq block-end -1)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
898 (while tail
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
899 (setq elt (car tail) tail (cdr tail))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
900 (setq range (car elt)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
901 val (funcall val-func (nth prop-idx elt)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
902 ;; Treat the sequence of "CJK COMPATIBILITY IDEOGRAPH-XXXX" and
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
903 ;; "VARIATION SELECTOR-XXX" as a block.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
904 (if (and (consp val) (eq prop 'name)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
905 (or (and (eq (car val) 'CJK)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
906 (eq (nth 1 val) 'COMPATIBILITY))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
907 (and (>= range #xe0100)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
908 (eq (car val) 'VARIATION)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
909 (eq (nth 1 val) 'SELECTOR))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
910 (let ((first (car val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
911 (second (nth 1 val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
912 (start range))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
913 (while (and (setq elt (car tail) range (car elt)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
914 val (funcall val-func (nth prop-idx elt)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
915 (consp val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
916 (eq first (car val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
917 (eq second (nth 1 val)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
918 (setq block-end range
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
919 tail (cdr tail)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
920 (setq range (cons start block-end)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
921 val (if (eq first 'CJK) 'CJK\ COMPATIBILITY\ IDEOGRAPH
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
922 'VARIATION\ SELECTOR))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
923
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
924 (if (consp range)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
925 (if val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
926 (let ((slot (assq val block-list)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
927 (setq range (cons (car range) (cdr range)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
928 (setq block-end (cdr range))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
929 (if slot
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
930 (nconc slot (list range))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
931 (push (list val range) block-list))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
932 (let* ((start (lsh (lsh range -7) 7))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
933 (limit (+ start 127))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
934 (first tail)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
935 (vec (make-vector 128 nil))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
936 c name len)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
937 (if (<= start block-end)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
938 ;; START overlap with the previous block.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
939 (aset table range (nth prop-idx elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
940 (if val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
941 (aset vec (- range start) val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
942 (while (and (setq elt (car tail) range (car elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
943 (integerp range)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
944 (<= range limit))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
945 (setq val (funcall val-func (nth prop-idx elt)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
946 (if val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
947 (aset vec (- range start) val))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
948 (setq tail (cdr tail)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
949 (setq vec (unidata-word-list-compress vec))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
950 (when vec
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
951 (dotimes (i (length vec))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
952 (dolist (elt (aref vec i))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
953 (if (symbolp elt)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
954 (let ((slot (assq elt word-list)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
955 (if slot
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
956 (setcdr slot (1+ (cdr slot)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
957 (setcdr word-list
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
958 (cons (cons elt 1) (cdr word-list))))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
959 (set-char-table-range table (cons start limit) vec))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
960 (setq word-list (sort (cdr word-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
961 #'(lambda (x y) (> (cdr x) (cdr y)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
962 (setq tail word-list idx 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
963 (while tail
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
964 (setcdr (car tail) (unidata-encode-word idx))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
965 (setq idx (1+ idx) tail (cdr tail)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
966 (setq word-table (make-vector (length word-list) nil))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
967 (setq idx 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
968 (dolist (elt word-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
969 (aset word-table idx (car elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
970 (setq idx (1+ idx)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
971
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
972 (if (and (eq prop 'decomposition)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
973 (> idx 32))
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
974 (error "Too many symbols in decomposition data"))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
975
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
976 (dotimes (i (/ #x110000 128))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
977 (let* ((idx (* i 128))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
978 (vec (aref table idx)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
979 (when (vectorp vec)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
980 (dotimes (i (length vec))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
981 (let ((tail (aref vec i))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
982 elt code)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
983 (if (not tail)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
984 (aset vec i "\0")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
985 (while tail
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
986 (setq elt (car tail)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
987 code (if (integerp elt) elt
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
988 (cdr (assq elt word-list))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
989 (setcar tail (string code))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
990 (setq tail (cdr tail)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
991 (aset vec i (mapconcat 'identity (aref vec i) "")))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
992 (set-char-table-range
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
993 table (cons idx (+ idx 127))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
994 (mapconcat 'identity vec "")))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
995
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
996 (setq block-word-table (make-vector (length block-list) nil))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
997 (setq idx 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
998 (dolist (elt block-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
999 (dolist (e (cdr elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1000 (set-char-table-range table e (1+ idx)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1001 (aset block-word-table idx (car elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1002 (setq idx (1+ idx)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1003
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1004 (set-char-table-extra-slot table 0 prop)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1005 (set-char-table-extra-slot table 4 (cons word-table block-word-table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1006 table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1007
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1008 (defun unidata-split-name (str)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1009 (if (symbolp str)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1010 str
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1011 (let ((len (length str))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1012 (l nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1013 (idx 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1014 c)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1015 (if (= len 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1016 nil
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1017 (dotimes (i len)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1018 (setq c (aref str i))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1019 (if (= c 32)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1020 (setq l (cons (intern (substring str idx i)) l)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1021 idx (1+ i))
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
1022 (if (and (= c ?-) (< idx i)
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1023 (< (1+ i) len) (/= (aref str (1+ i)) 32))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1024 (setq l (cons '- (cons (intern (substring str idx i)) l))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1025 idx (1+ i)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1026 (nreverse (cons (intern (substring str idx)) l))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1027
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1028 (defun unidata-gen-table-name (prop)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1029 (let* ((table (unidata-gen-table-word-list prop 'unidata-split-name))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1030 (word-tables (char-table-extra-slot table 4)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1031 (byte-compile 'unidata-get-name)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1032 (byte-compile 'unidata-put-name)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1033 (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-name))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1034 (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-name))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1035
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1036 (if (eq prop 'name)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1037 (set-char-table-extra-slot table 4
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1038 (vector (car word-tables)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1039 (cdr word-tables)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1040 unidata-name-jamo-name-table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1041 (set-char-table-extra-slot table 4
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1042 (vector (car word-tables))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1043 table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1044
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1045 (defun unidata-split-decomposition (str)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1046 (if (symbolp str)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1047 str
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1048 (let ((len (length str))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1049 (l nil)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1050 (idx 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1051 c)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1052 (if (= len 0)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1053 nil
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1054 (dotimes (i len)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1055 (setq c (aref str i))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1056 (if (= c 32)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1057 (setq l (if (= (aref str idx) ?<)
100107
eff1b0128211 (unidata-prop-alist): Docstring for
Kenichi Handa <handa@m17n.org>
parents: 100094
diff changeset
1058 (cons (intern (substring str (1+ idx) (1- i))) l)
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
1059 (cons (string-to-number (substring str idx i) 16) l))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1060 idx (1+ i))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1061 (if (= (aref str idx) ?<)
100107
eff1b0128211 (unidata-prop-alist): Docstring for
Kenichi Handa <handa@m17n.org>
parents: 100094
diff changeset
1062 (setq l (cons (intern (substring str (1+ idx) (1- len))) l))
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
1063 (setq l (cons (string-to-number (substring str idx len) 16) l)))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1064 (nreverse l)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1065
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1066
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1067 (defun unidata-gen-table-decomposition (prop)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1068 (let* ((table (unidata-gen-table-word-list prop 'unidata-split-decomposition))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1069 (word-tables (char-table-extra-slot table 4)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1070 (byte-compile 'unidata-get-decomposition)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1071 (byte-compile 'unidata-put-decomposition)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1072 (set-char-table-extra-slot table 1
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1073 (symbol-function 'unidata-get-decomposition))
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
1074 (set-char-table-extra-slot table 2
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1075 (symbol-function 'unidata-put-decomposition))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1076 (set-char-table-extra-slot table 4 (car word-tables))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1077 table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1078
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1079
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1080
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1081 (defun unidata-describe-general-category (val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1082 (cdr (assq val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1083 '((Lu . "Letter, Uppercase")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1084 (Ll . "Letter, Lowercase")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1085 (Lt . "Letter, Titlecase")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1086 (Lm . "Letter, Modifier")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1087 (Lo . "Letter, Other")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1088 (Mn . "Mark, Nonspacing")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1089 (Mc . "Mark, Spacing Combining")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1090 (Me . "Mark, Enclosing")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1091 (Nd . "Number, Decimal Digit")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1092 (Nl . "Number, Letter")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1093 (No . "Number, Other")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1094 (Pc . "Punctuation, Connector")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1095 (Pd . "Punctuation, Dash")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1096 (Ps . "Punctuation, Open")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1097 (Pe . "Punctuation, Close")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1098 (Pi . "Punctuation, Initial quote")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1099 (Pf . "Punctuation, Final quote")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1100 (Po . "Punctuation, Other")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1101 (Sm . "Symbol, Math")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1102 (Sc . "Symbol, Currency")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1103 (Sk . "Symbol, Modifier")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1104 (So . "Symbol, Other")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1105 (Zs . "Separator, Space")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1106 (Zl . "Separator, Line")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1107 (Zp . "Separator, Paragraph")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1108 (Cc . "Other, Control")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1109 (Cf . "Other, Format")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1110 (Cs . "Other, Surrogate")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1111 (Co . "Other, Private Use")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1112 (Cn . "Other, Not Assigned")))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1113
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1114 (defun unidata-describe-canonical-combining-class (val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1115 (cdr (assq val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1116 '((0 . "Spacing, split, enclosing, reordrant, and Tibetan subjoined")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1117 (1 . "Overlays and interior")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1118 (7 . "Nuktas")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1119 (8 . "Hiragana/Katakana voicing marks")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1120 (9 . "Viramas")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1121 (10 . "Start of fixed position classes")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1122 (199 . "End of fixed position classes")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1123 (200 . "Below left attached")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1124 (202 . "Below attached")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1125 (204 . "Below right attached")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1126 (208 . "Left attached (reordrant around single base character)")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1127 (210 . "Right attached")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1128 (212 . "Above left attached")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1129 (214 . "Above attached")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1130 (216 . "Above right attached")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1131 (218 . "Below left")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1132 (220 . "Below")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1133 (222 . "Below right")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1134 (224 . "Left (reordrant around single base character)")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1135 (226 . "Right")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1136 (228 . "Above left")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1137 (230 . "Above")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1138 (232 . "Above right")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1139 (233 . "Double below")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1140 (234 . "Double above")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1141 (240 . "Below (iota subscript)")))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1142
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1143 (defun unidata-describe-bidi-class (val)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1144 (cdr (assq val
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1145 '((L . "Left-to-Right")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1146 (LRE . "Left-to-Right Embedding")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1147 (LRO . "Left-to-Right Override")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1148 (R . "Right-to-Left")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1149 (AL . "Right-to-Left Arabic")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1150 (RLE . "Right-to-Left Embedding")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1151 (RLO . "Right-to-Left Override")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1152 (PDF . "Pop Directional Format")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1153 (EN . "European Number")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1154 (ES . "European Number Separator")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1155 (ET . "European Number Terminator")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1156 (AN . "Arabic Number")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1157 (CS . "Common Number Separator")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1158 (NSM . "Non-Spacing Mark")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1159 (BN . "Boundary Neutral")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1160 (B . "Paragraph Separator")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1161 (S . "Segment Separator")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1162 (WS . "Whitespace")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1163 (ON . "Other Neutrals")))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1164
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1165 (defun unidata-describe-decomposition (val)
102901
3c5ac8f47c5d (unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
1166 (mapconcat
3c5ac8f47c5d (unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
1167 #'(lambda (x)
3c5ac8f47c5d (unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
1168 (if (symbolp x) (symbol-name x)
3c5ac8f47c5d (unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
1169 (concat (string ?')
3c5ac8f47c5d (unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
1170 (compose-string (string x) 0 1 (string ?\t x ?\t))
3c5ac8f47c5d (unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
1171 (string ?'))))
3c5ac8f47c5d (unidata-describe-decomposition): Return
Kenichi Handa <handa@m17n.org>
parents: 100971
diff changeset
1172 val " "))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1173
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1174 ;; Verify if we can retrieve correct values from the generated
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1175 ;; char-tables.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1176
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1177 (defun unidata-check ()
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1178 (dolist (elt unidata-prop-alist)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1179 (let* ((prop (car elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1180 (index (unidata-prop-index prop))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1181 (generator (unidata-prop-generator prop))
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
1182 (table (progn
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1183 (message "Generating %S table..." prop)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1184 (funcall generator prop)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1185 (decoder (char-table-extra-slot table 1))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1186 (check #x400))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1187 (dolist (e unidata-list)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1188 (let ((char (car e))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1189 (val1 (nth index e))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1190 val2)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1191 (if (and (stringp val1) (= (length val1) 0))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1192 (setq val1 nil))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1193 (unless (consp char)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1194 (setq val2 (funcall decoder char (aref table char) table))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1195 (if val1
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1196 (cond ((eq generator 'unidata-gen-table-symbol)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1197 (setq val1 (intern val1)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1198 ((eq generator 'unidata-gen-table-integer)
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
1199 (setq val1 (string-to-number val1)))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1200 ((eq generator 'unidata-gen-table-character)
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
1201 (setq val1 (string-to-number val1 16)))
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1202 ((eq generator 'unidata-gen-table-decomposition)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1203 (setq val1 (unidata-split-decomposition val1)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1204 (when (>= char check)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1205 (message "%S %04X" prop check)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1206 (setq check (+ check #x400)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1207 (or (equal val1 val2)
94071
03be13c38b12 (unidata-prop-alist): Fix typo in description of `numeric-value'.
Juanma Barranquero <lekktu@gmail.com>
parents: 91421
diff changeset
1208 (insert (format "> %04X %S\n< %04X %S\n"
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1209 char val1 char val2)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1210 (sit-for 0)))))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1211
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1212 ;; The entry function. It generates files described in the header
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1213 ;; comment of this file.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1214
90174
f3973ae57d8b (unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents: 90162
diff changeset
1215 (defun unidata-gen-files (&optional unidata-text-file)
f3973ae57d8b (unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents: 90162
diff changeset
1216 (or unidata-text-file
f3973ae57d8b (unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents: 90162
diff changeset
1217 (setq unidata-text-file (car command-line-args-left)
f3973ae57d8b (unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents: 90162
diff changeset
1218 command-line-args-left (cdr command-line-args-left)))
f3973ae57d8b (unidata-text-file): Delete it.
Kenichi Handa <handa@m17n.org>
parents: 90162
diff changeset
1219 (unidata-setup-list unidata-text-file)
91405
30275283f62d (unidata-gen-files): Force unix line ends.
Jason Rumney <jasonr@gnu.org>
parents: 90176
diff changeset
1220 (let ((coding-system-for-write 'utf-8-unix)
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1221 (charprop-file "charprop.el"))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1222 (with-temp-file charprop-file
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1223 (insert ";; Automatically generated by unidata-gen.el.\n")
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1224 (dolist (elt unidata-prop-alist)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1225 (let* ((prop (car elt))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1226 (generator (unidata-prop-generator prop))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1227 (file (unidata-prop-file prop))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1228 (docstring (unidata-prop-docstring prop))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1229 (describer (unidata-prop-describer prop))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1230 table)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1231 ;; Filename in this comment line is extracted by sed in
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1232 ;; Makefile.
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1233 (insert (format ";; FILE: %s\n" file))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1234 (insert (format "(define-char-code-property '%S %S\n %S)\n"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1235 prop file docstring))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1236 (with-temp-file file
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1237 (message "Generating %s..." file)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1238 (setq table (funcall generator prop))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1239 (when describer
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1240 (unless (subrp (symbol-function describer))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1241 (byte-compile describer)
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1242 (setq describer (symbol-function describer)))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1243 (set-char-table-extra-slot table 3 describer))
106815
1d1d5d9bd884 Add 2010 to copyright years.
Glenn Morris <rgm@gnu.org>
parents: 105873
diff changeset
1244 (insert ";; Copyright (C) 1991-2009, 2010 Unicode, Inc.
103310
e1613d164d67 Add copyright header.
Chong Yidong <cyd@stupidchicken.com>
parents: 103033
diff changeset
1245 ;; This file was generated from the Unicode data file at
e1613d164d67 Add copyright header.
Chong Yidong <cyd@stupidchicken.com>
parents: 103033
diff changeset
1246 ;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt.
e1613d164d67 Add copyright header.
Chong Yidong <cyd@stupidchicken.com>
parents: 103033
diff changeset
1247 ;; See lisp/international/README for the copyright and permission notice.\n"
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1248 (format "(define-char-code-property '%S %S %S)\n"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1249 prop table docstring)
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
1250 ";; Local Variables:\n"
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1251 ";; coding: utf-8\n"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1252 ";; no-byte-compile: t\n"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1253 ";; End:\n\n"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1254 (format ";; %s ends here\n" file)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1255 (message "Writing %s..." charprop-file)
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
1256 (insert ";; Local Variables:\n"
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1257 ";; coding: utf-8\n"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1258 ";; no-byte-compile: t\n"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1259 ";; End:\n\n"
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1260 (format ";; %s ends here\n" charprop-file)))))
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1261
90162
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
1262
88ca83396d15 Typo fixed in comments. Change
Kenichi Handa <handa@m17n.org>
parents: 90112
diff changeset
1263
90097
380964b11586 Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents: 90086
diff changeset
1264 ;; arch-tag: 961c862e-b821-447e-9b8a-bfbab9c2d525
90086
f16730ea4562 New file.
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1265 ;;; unidata-gen.el ends here