Mercurial > emacs
annotate src/charset.h @ 97228:390c190a3d44
*** empty log message ***
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Sat, 02 Aug 2008 16:27:01 +0000 |
parents | 34d4840c6fd2 |
children | bfd3ac81ee0f |
rev | line source |
---|---|
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
1 /* Header for charset handler. |
68651
3bd95f4f2941
Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents:
67658
diff
changeset
|
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, |
79759 | 3 2006, 2007, 2008 Free Software Foundation, Inc. |
74605
6ee41fdd69ff
Update AIST copyright years.
Kenichi Handa <handa@m17n.org>
parents:
73055
diff
changeset
|
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
79759 | 5 2005, 2006, 2007, 2008 |
67658 | 6 National Institute of Advanced Industrial Science and Technology (AIST) |
7 Registration Number H14PRO021 | |
17052 | 8 |
89483 | 9 Copyright (C) 2003 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
10 National Institute of Advanced Industrial Science and Technology (AIST) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
11 Registration Number H13PRO009 |
17052 | 12 |
17071 | 13 This file is part of GNU Emacs. |
14 | |
94994
29adfc9354e7
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94920
diff
changeset
|
15 GNU Emacs is free software: you can redistribute it and/or modify |
17071 | 16 it under the terms of the GNU General Public License as published by |
94994
29adfc9354e7
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94920
diff
changeset
|
17 the Free Software Foundation, either version 3 of the License, or |
29adfc9354e7
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94920
diff
changeset
|
18 (at your option) any later version. |
17052 | 19 |
17071 | 20 GNU Emacs is distributed in the hope that it will be useful, |
21 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
23 GNU General Public License for more details. | |
17052 | 24 |
17071 | 25 You should have received a copy of the GNU General Public License |
94994
29adfc9354e7
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94920
diff
changeset
|
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ |
17052 | 27 |
29570
825505ff211e
(EMACS_CHARSET_H): Renamed from _CHARSET_H.
Kenichi Handa <handa@m17n.org>
parents:
29539
diff
changeset
|
28 #ifndef EMACS_CHARSET_H |
825505ff211e
(EMACS_CHARSET_H): Renamed from _CHARSET_H.
Kenichi Handa <handa@m17n.org>
parents:
29539
diff
changeset
|
29 #define EMACS_CHARSET_H |
17052 | 30 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
31 /* Index to arguments of Fdefine_charset_internal. */ |
17052 | 32 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
33 enum define_charset_arg_index |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
34 { |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
35 charset_arg_name, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
36 charset_arg_dimension, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
37 charset_arg_code_space, |
88677
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
parents:
88476
diff
changeset
|
38 charset_arg_min_code, |
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
parents:
88476
diff
changeset
|
39 charset_arg_max_code, |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
40 charset_arg_iso_final, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
41 charset_arg_iso_revision, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
42 charset_arg_emacs_mule_id, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
43 charset_arg_ascii_compatible_p, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
44 charset_arg_supplementary_p, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
45 charset_arg_invalid_code, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
46 charset_arg_code_offset, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
47 charset_arg_map, |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
48 charset_arg_subset, |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
49 charset_arg_superset, |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
50 charset_arg_unify_map, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
51 charset_arg_plist, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
52 charset_arg_max |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
53 }; |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
54 |
17052 | 55 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
56 /* Indices to charset attributes vector. */ |
17052 | 57 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
58 enum charset_attr_index |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
59 { |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
60 /* ID number of the charset. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
61 charset_id, |
17052 | 62 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
63 /* Name of the charset (symbol). */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
64 charset_name, |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
65 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
66 /* Property list of the charset. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
67 charset_plist, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
68 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
69 /* If the method of the charset is `MAP_DEFERRED', the value is a |
88436 | 70 mapping vector or a file name that contains mapping vector. |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
71 Otherwise, nil. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
72 charset_map, |
17052 | 73 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
74 /* If the method of the charset is `MAP', the value is a vector |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
75 that maps code points of the charset to characters. The vector |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
76 is indexed by a character index. A character index is |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
77 calculated from a code point and the code-space table of the |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
78 charset. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
79 charset_decoder, |
17833
59aa4a0772f6
(VALID_CHAR_P): New macro.
Kenichi Handa <handa@m17n.org>
parents:
17726
diff
changeset
|
80 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
81 /* If the method of the charset is `MAP', the value is a |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
82 char-table that maps characters of the charset to code |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
83 points. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
84 charset_encoder, |
17052 | 85 |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
86 /* If the method of the charset is `SUBSET', the value is a vector |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
87 that has this form: |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
88 |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
89 [ CHARSET-ID MIN-CODE MAX-CODE OFFSET ] |
21033
9f32198e0d9f
(NONASCII_INSERT_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
20932
diff
changeset
|
90 |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
91 CHARSET-ID is an ID number of a parent charset. MIN-CODE and |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
92 MAX-CODE specify the range of characters inherited from the |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
93 parent. OFFSET is an integer value to add to a code point of |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
94 the parent charset to get the corresponding code point of this |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
95 charset. */ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
96 charset_subset, |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
97 |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
98 /* If the method of the charset is `SUPERSET', the value is a list |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
99 whose elements have this form: |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
100 |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
101 (CHARSET-ID . OFFSET) |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
102 |
95259 | 103 CHARSET-IDs are ID numbers of parent charsets. OFFSET is an |
104 integer value to add to a code point of the parent charset to | |
105 get the corresponding code point of this charset. */ | |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
106 charset_superset, |
21033
9f32198e0d9f
(NONASCII_INSERT_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
20932
diff
changeset
|
107 |
88839 | 108 /* The value is a mapping vector or a file name that contains the |
109 mapping. This defines how characters in the charset should be | |
110 unified with Unicode. The value of the member | |
88436 | 111 `charset_deunifier' is created from this information. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
112 charset_unify_map, |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
113 |
88436 | 114 /* If characters in the charset must be unified Unicode, the value |
89804
109e674576b8
(CHAR_CHARSET_P): Fix for the case that the method is subset or
Kenichi Handa <handa@m17n.org>
parents:
89682
diff
changeset
|
115 is a char table that maps a unified Unicode character code to |
109e674576b8
(CHAR_CHARSET_P): Fix for the case that the method is subset or
Kenichi Handa <handa@m17n.org>
parents:
89682
diff
changeset
|
116 the non-unified character code in the charset. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
117 charset_deunifier, |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
118 |
88839 | 119 /* The length of the charset attribute vector. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
120 charset_attr_max |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
121 }; |
17052 | 122 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
123 /* Methods for converting code points and characters of charsets. */ |
17052 | 124 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
125 enum charset_method |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
126 { |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
127 /* For a charset of this method, a character code is calculated |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
128 from a character index (which is calculated from a code point) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
129 simply by adding an offset value. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
130 CHARSET_METHOD_OFFSET, |
25505
4d5f87073d63
(MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents:
25234
diff
changeset
|
131 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
132 /* For a charset of this method, a decoder vector and an encoder |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
133 char-table is used for code point <-> character code |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
134 conversion. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
135 CHARSET_METHOD_MAP, |
17052 | 136 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
137 /* Same as above but decoder and encoder are loaded from a file on |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
138 demand. Once loaded, the method is changed to |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
139 CHARSET_METHOD_MAP. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
140 CHARSET_METHOD_MAP_DEFERRED, |
17052 | 141 |
88839 | 142 /* A charset of this method is a subset of another charset. */ |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
143 CHARSET_METHOD_SUBSET, |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
144 |
88839 | 145 /* A charset of this method is a superset of other charsets. */ |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
146 CHARSET_METHOD_SUPERSET |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
147 }; |
17052 | 148 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
149 struct charset |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
150 { |
88436 | 151 /* Index to charset_table. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
152 int id; |
17052 | 153 |
88436 | 154 /* Index to Vcharset_hash_table. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
155 int hash_index; |
17052 | 156 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
157 /* Dimension of the charset: 1, 2, 3, or 4. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
158 int dimension; |
21033
9f32198e0d9f
(NONASCII_INSERT_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
20932
diff
changeset
|
159 |
88476
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
160 /* Byte code range of each dimension. <code_space>[4N] is a mininum |
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
161 byte code of the (N+1)th dimension, <code_space>[4N+1] is a |
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
162 maximum byte code of the (N+1)th dimension, <code_space>[4N+2] is |
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
163 (<code_space>[4N+1] - <code_space>[4N] + 1), <code_space>[4N+3] |
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
164 is a number of characters containd in the first to (N+1)th |
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
165 dismesions. We get `char-index' of a `code-point' from this |
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
166 information. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
167 int code_space[16]; |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
168 |
88476
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
169 /* If B is a byte of Nth dimension of a code-point, the (N-1)th bit |
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
170 of code_space_mask[B] is set. This array is used to quickly |
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
171 check if a code-point is in a valid range. */ |
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
172 unsigned char *code_space_mask; |
47c2e6825a1e
(struct charset): New member `code_space_mask'.
Kenichi Handa <handa@m17n.org>
parents:
88436
diff
changeset
|
173 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
174 /* 1 if there's no gap in code-points. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
175 int code_linear_p; |
19319 | 176 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
177 /* If the charset is treated as 94-chars in ISO-2022, the value is 0. |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
178 If the charset is treated as 96-chars in ISO-2022, the value is 1. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
179 int iso_chars_96; |
17052 | 180 |
88436 | 181 /* ISO final byte of the charset: 48..127. It may be -1 if the |
182 charset doesn't conform to ISO-2022. */ | |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
183 int iso_final; |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
184 |
88436 | 185 /* ISO revision number of the charset. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
186 int iso_revision; |
17052 | 187 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
188 /* If the charset is identical to what supported by Emacs 21 and the |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
189 priors, the identification number of the charset used in those |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
190 version. Otherwise, -1. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
191 int emacs_mule_id; |
17052 | 192 |
91005 | 193 /* Nonzero if the charset is compatible with ASCII. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
194 int ascii_compatible_p; |
51351
32900e49a097
(VALID_LEADING_CODE_P): New macro.
Kenichi Handa <handa@m17n.org>
parents:
50628
diff
changeset
|
195 |
91005 | 196 /* Nonzero if the charset is supplementary. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
197 int supplementary_p; |
17052 | 198 |
91005 | 199 /* Nonzero if all the code points are representable by Lisp_Int. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
200 int compact_codes_p; |
17052 | 201 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
202 /* The method for encoding/decoding characters of the charset. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
203 enum charset_method method; |
17052 | 204 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
205 /* Mininum and Maximum code points of the charset. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
206 unsigned min_code, max_code; |
17052 | 207 |
88677
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
parents:
88476
diff
changeset
|
208 /* Offset value used by macros CODE_POINT_TO_INDEX and |
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
parents:
88476
diff
changeset
|
209 INDEX_TO_CODE_POINT. . */ |
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
parents:
88476
diff
changeset
|
210 unsigned char_index_offset; |
19319 | 211 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
212 /* Mininum and Maximum character codes of the charset. If the |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
213 charset is compatible with ASCII, min_char is a minimum non-ASCII |
88875
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
214 character of the charset. If the method of charset is |
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
215 CHARSET_METHOD_OFFSET, even if the charset is unified, min_char |
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
216 and max_char doesn't change. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
217 int min_char, max_char; |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
218 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
219 /* The code returned by ENCODE_CHAR if a character is not encodable |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
220 by the charset. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
221 unsigned invalid_code; |
17052 | 222 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
223 /* If the method of the charset is CHARSET_METHOD_MAP, this is a |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
224 table of bits used to quickly and roughly guess if a character |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
225 belongs to the charset. |
17052 | 226 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
227 The first 64 elements are 512 bits for characters less than |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
228 0x10000. Each bit corresponds to 128-character block. The last |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
229 126 elements are 1008 bits for the greater characters |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
230 (0x10000..0x3FFFFF). Each bit corresponds to 4096-character |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
231 block. |
17052 | 232 |
88839 | 233 If a bit is 1, at least one character in the corresponding block is |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
234 in this charset. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
235 unsigned char fast_map[190]; |
17052 | 236 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
237 /* Offset value to calculate a character code from code-point, and |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
238 visa versa. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
239 int code_offset; |
17052 | 240 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
241 int unified_p; |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
242 }; |
17052 | 243 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
244 /* Hash table of charset symbols vs. the correponding attribute |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
245 vectors. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
246 extern Lisp_Object Vcharset_hash_table; |
17052 | 247 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
248 /* Table of struct charset. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
249 extern struct charset *charset_table; |
17052 | 250 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
251 #define CHARSET_FROM_ID(id) (charset_table + (id)) |
17052 | 252 |
88875
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
253 extern Lisp_Object Vcharset_ordered_list; |
94920
754dcec40ffe
(Vcharset_non_preferred_head)
Kenichi Handa <handa@m17n.org>
parents:
92107
diff
changeset
|
254 extern Lisp_Object Vcharset_non_preferred_head; |
88875
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
255 |
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
256 /* Incremented everytime we change the priority of charsets. */ |
89065
62aa2d4f3773
(charset_ordered_list_tick): Declare extern.
Dave Love <fx@gnu.org>
parents:
89064
diff
changeset
|
257 extern unsigned short charset_ordered_list_tick; |
88875
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
258 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
259 extern Lisp_Object Vcharset_list; |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
260 extern Lisp_Object Viso_2022_charset_list; |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
261 extern Lisp_Object Vemacs_mule_charset_list; |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
262 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
263 extern struct charset *emacs_mule_charset[256]; |
51351
32900e49a097
(VALID_LEADING_CODE_P): New macro.
Kenichi Handa <handa@m17n.org>
parents:
50628
diff
changeset
|
264 |
94920
754dcec40ffe
(Vcharset_non_preferred_head)
Kenichi Handa <handa@m17n.org>
parents:
92107
diff
changeset
|
265 extern Lisp_Object Vcurrent_iso639_language; |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
266 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
267 /* Macros to access information about charset. */ |
19319 | 268 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
269 /* Return the attribute vector of charset whose symbol is SYMBOL. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
270 #define CHARSET_SYMBOL_ATTRIBUTES(symbol) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
271 Fgethash ((symbol), Vcharset_hash_table, Qnil) |
17052 | 272 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
273 #define CHARSET_ATTR_ID(attrs) AREF ((attrs), charset_id) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
274 #define CHARSET_ATTR_NAME(attrs) AREF ((attrs), charset_name) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
275 #define CHARSET_ATTR_PLIST(attrs) AREF ((attrs), charset_plist) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
276 #define CHARSET_ATTR_MAP(attrs) AREF ((attrs), charset_map) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
277 #define CHARSET_ATTR_DECODER(attrs) AREF ((attrs), charset_decoder) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
278 #define CHARSET_ATTR_ENCODER(attrs) AREF ((attrs), charset_encoder) |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
279 #define CHARSET_ATTR_SUBSET(attrs) AREF ((attrs), charset_subset) |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
280 #define CHARSET_ATTR_SUPERSET(attrs) AREF ((attrs), charset_superset) |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
281 #define CHARSET_ATTR_UNIFY_MAP(attrs) AREF ((attrs), charset_unify_map) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
282 #define CHARSET_ATTR_DEUNIFIER(attrs) AREF ((attrs), charset_deunifier) |
17052 | 283 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
284 #define CHARSET_SYMBOL_ID(symbol) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
285 CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol)) |
17052 | 286 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
287 /* Return an index to Vcharset_hash_table of the charset whose symbol |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
288 is SYMBOL. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
289 #define CHARSET_SYMBOL_HASH_INDEX(symbol) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
290 hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL) |
38395
b7a9187751b2
(CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents:
35949
diff
changeset
|
291 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
292 /* Return the attribute vector of CHARSET. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
293 #define CHARSET_ATTRIBUTES(charset) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
294 (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index)) |
17052 | 295 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
296 #define CHARSET_ID(charset) ((charset)->id) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
297 #define CHARSET_HASH_INDEX(charset) ((charset)->hash_index) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
298 #define CHARSET_DIMENSION(charset) ((charset)->dimension) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
299 #define CHARSET_CODE_SPACE(charset) ((charset)->code_space) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
300 #define CHARSET_CODE_LINEAR_P(charset) ((charset)->code_linear_p) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
301 #define CHARSET_ISO_CHARS_96(charset) ((charset)->iso_chars_96) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
302 #define CHARSET_ISO_FINAL(charset) ((charset)->iso_final) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
303 #define CHARSET_ISO_PLANE(charset) ((charset)->iso_plane) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
304 #define CHARSET_ISO_REVISION(charset) ((charset)->iso_revision) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
305 #define CHARSET_EMACS_MULE_ID(charset) ((charset)->emacs_mule_id) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
306 #define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
307 #define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
308 #define CHARSET_METHOD(charset) ((charset)->method) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
309 #define CHARSET_MIN_CODE(charset) ((charset)->min_code) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
310 #define CHARSET_MAX_CODE(charset) ((charset)->max_code) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
311 #define CHARSET_INVALID_CODE(charset) ((charset)->invalid_code) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
312 #define CHARSET_MIN_CHAR(charset) ((charset)->min_char) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
313 #define CHARSET_MAX_CHAR(charset) ((charset)->max_char) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
314 #define CHARSET_CODE_OFFSET(charset) ((charset)->code_offset) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
315 #define CHARSET_UNIFIED_P(charset) ((charset)->unified_p) |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
316 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
317 #define CHARSET_NAME(charset) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
318 (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset))) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
319 #define CHARSET_MAP(charset) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
320 (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset))) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
321 #define CHARSET_DECODER(charset) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
322 (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset))) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
323 #define CHARSET_ENCODER(charset) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
324 (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset))) |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
325 #define CHARSET_SUBSET(charset) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
326 (CHARSET_ATTR_SUBSET (CHARSET_ATTRIBUTES (charset))) |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
327 #define CHARSET_SUPERSET(charset) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
328 (CHARSET_ATTR_SUPERSET (CHARSET_ATTRIBUTES (charset))) |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
329 #define CHARSET_UNIFY_MAP(charset) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
330 (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset))) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
331 #define CHARSET_DEUNIFIER(charset) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
332 (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset))) |
26843
0aadeca4a4a7
In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents:
25637
diff
changeset
|
333 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
334 |
91005 | 335 /* Nonzero if OBJ is a valid charset symbol. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
336 #define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0) |
26843
0aadeca4a4a7
In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents:
25637
diff
changeset
|
337 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
338 /* Check if X is a valid charset symbol. If not, signal an error. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
339 #define CHECK_CHARSET(x) \ |
25505
4d5f87073d63
(MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents:
25234
diff
changeset
|
340 do { \ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
341 if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0) \ |
92107
5a1ea5a32206
(CHECK_CHARSET, CHECK_CHARSET_GET_ID, CHECK_CHARSET_GET_ATTR):
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91327
diff
changeset
|
342 wrong_type_argument (Qcharsetp, (x)); \ |
49275
3a72263855b4
(NEXT_CHAR_BOUNDARY, PREV_CHAR_BOUNDARY): New macros.
Kenichi Handa <handa@m17n.org>
parents:
46547
diff
changeset
|
343 } while (0) |
3a72263855b4
(NEXT_CHAR_BOUNDARY, PREV_CHAR_BOUNDARY): New macros.
Kenichi Handa <handa@m17n.org>
parents:
46547
diff
changeset
|
344 |
3a72263855b4
(NEXT_CHAR_BOUNDARY, PREV_CHAR_BOUNDARY): New macros.
Kenichi Handa <handa@m17n.org>
parents:
46547
diff
changeset
|
345 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
346 /* Check if X is a valid charset symbol. If valid, set ID to the id |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
347 number of the charset. Otherwise, signal an error. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
348 #define CHECK_CHARSET_GET_ID(x, id) \ |
49275
3a72263855b4
(NEXT_CHAR_BOUNDARY, PREV_CHAR_BOUNDARY): New macros.
Kenichi Handa <handa@m17n.org>
parents:
46547
diff
changeset
|
349 do { \ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
350 int idx; \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
351 \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
352 if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0) \ |
92107
5a1ea5a32206
(CHECK_CHARSET, CHECK_CHARSET_GET_ID, CHECK_CHARSET_GET_ATTR):
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91327
diff
changeset
|
353 wrong_type_argument (Qcharsetp, (x)); \ |
88854
8b5248397330
(CHECK_CHARSET_GET_ID): Use XINT on AREF result.
Dave Love <fx@gnu.org>
parents:
88839
diff
changeset
|
354 id = XINT (AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \ |
8b5248397330
(CHECK_CHARSET_GET_ID): Use XINT on AREF result.
Dave Love <fx@gnu.org>
parents:
88839
diff
changeset
|
355 charset_id)); \ |
49275
3a72263855b4
(NEXT_CHAR_BOUNDARY, PREV_CHAR_BOUNDARY): New macros.
Kenichi Handa <handa@m17n.org>
parents:
46547
diff
changeset
|
356 } while (0) |
3a72263855b4
(NEXT_CHAR_BOUNDARY, PREV_CHAR_BOUNDARY): New macros.
Kenichi Handa <handa@m17n.org>
parents:
46547
diff
changeset
|
357 |
3a72263855b4
(NEXT_CHAR_BOUNDARY, PREV_CHAR_BOUNDARY): New macros.
Kenichi Handa <handa@m17n.org>
parents:
46547
diff
changeset
|
358 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
359 /* Check if X is a valid charset symbol. If valid, set ATTR to the |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
360 attr vector of the charset. Otherwise, signal an error. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
361 #define CHECK_CHARSET_GET_ATTR(x, attr) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
362 do { \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
363 if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x))) \ |
92107
5a1ea5a32206
(CHECK_CHARSET, CHECK_CHARSET_GET_ID, CHECK_CHARSET_GET_ATTR):
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91327
diff
changeset
|
364 wrong_type_argument (Qcharsetp, (x)); \ |
17052 | 365 } while (0) |
366 | |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
367 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
368 #define CHECK_CHARSET_GET_CHARSET(x, charset) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
369 do { \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
370 int id; \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
371 CHECK_CHARSET_GET_ID (x, id); \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
372 charset = CHARSET_FROM_ID (id); \ |
20531
f019e056ad9a
(CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents:
20352
diff
changeset
|
373 } while (0) |
f019e056ad9a
(CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents:
20352
diff
changeset
|
374 |
49275
3a72263855b4
(NEXT_CHAR_BOUNDARY, PREV_CHAR_BOUNDARY): New macros.
Kenichi Handa <handa@m17n.org>
parents:
46547
diff
changeset
|
375 |
95259 | 376 /* Lookup Vcharset_ordered_list and return the first charset that |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
377 contains the character C. */ |
89980
b27fcfc48d2e
(CHAR_CHARSET): Shortcut for ASCII case.
Kenichi Handa <handa@m17n.org>
parents:
89889
diff
changeset
|
378 #define CHAR_CHARSET(c) \ |
b27fcfc48d2e
(CHAR_CHARSET): Shortcut for ASCII case.
Kenichi Handa <handa@m17n.org>
parents:
89889
diff
changeset
|
379 ((c) < 0x80 ? CHARSET_FROM_ID (charset_ascii) \ |
b27fcfc48d2e
(CHAR_CHARSET): Shortcut for ASCII case.
Kenichi Handa <handa@m17n.org>
parents:
89889
diff
changeset
|
380 : char_charset ((c), Qnil, NULL)) |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
381 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
382 #if 0 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
383 /* Char-table of charset-sets. Each element is a bool vector indexed |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
384 by a charset ID. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
385 extern Lisp_Object Vchar_charset_set; |
20531
f019e056ad9a
(CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents:
20352
diff
changeset
|
386 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
387 /* Charset-bag of character C. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
388 #define CHAR_CHARSET_SET(c) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
389 CHAR_TABLE_REF (Vchar_charset_set, c) |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
390 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
391 /* Check if two characters C1 and C2 belong to the same charset. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
392 #define SAME_CHARSET_P(c1, c2) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
393 intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2)) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
394 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
395 #endif |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
396 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
397 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
398 /* Return a character correponding to the code-point CODE of CHARSET. |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
399 Try some optimization before calling decode_char. */ |
20531
f019e056ad9a
(CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents:
20352
diff
changeset
|
400 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
401 #define DECODE_CHAR(charset, code) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
402 ((ASCII_BYTE_P (code) && (charset)->ascii_compatible_p) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
403 ? (code) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
404 : ((code) < (charset)->min_code || (code) > (charset)->max_code) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
405 ? -1 \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
406 : (charset)->unified_p \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
407 ? decode_char ((charset), (code)) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
408 : (charset)->method == CHARSET_METHOD_OFFSET \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
409 ? ((charset)->code_linear_p \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
410 ? (code) - (charset)->min_code + (charset)->code_offset \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
411 : decode_char ((charset), (code))) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
412 : (charset)->method == CHARSET_METHOD_MAP \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
413 ? ((charset)->code_linear_p \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
414 ? XINT (AREF (CHARSET_DECODER (charset), \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
415 (code) - (charset)->min_code)) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
416 : decode_char ((charset), (code))) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
417 : decode_char ((charset), (code))) |
20531
f019e056ad9a
(CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents:
20352
diff
changeset
|
418 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
419 |
89832
e2262814a871
(CHARSET_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89804
diff
changeset
|
420 /* If CHARSET is a simple offset base charset, return it's offset, |
e2262814a871
(CHARSET_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89804
diff
changeset
|
421 otherwise return -1. */ |
e2262814a871
(CHARSET_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89804
diff
changeset
|
422 #define CHARSET_OFFSET(charset) \ |
e2262814a871
(CHARSET_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89804
diff
changeset
|
423 (((charset)->method == CHARSET_METHOD_OFFSET \ |
e2262814a871
(CHARSET_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89804
diff
changeset
|
424 && (charset)->code_linear_p \ |
e2262814a871
(CHARSET_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89804
diff
changeset
|
425 && ! (charset)->unified_p) \ |
e2262814a871
(CHARSET_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89804
diff
changeset
|
426 ? (charset)->code_offset - (charset)->min_code \ |
e2262814a871
(CHARSET_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89804
diff
changeset
|
427 : -1) |
e2262814a871
(CHARSET_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89804
diff
changeset
|
428 |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
429 extern Lisp_Object charset_work; |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
430 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
431 /* Return a code point of CHAR in CHARSET. |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
432 Try some optimization before calling encode_char. */ |
20531
f019e056ad9a
(CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents:
20352
diff
changeset
|
433 |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
434 #define ENCODE_CHAR(charset, c) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
435 ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
436 ? (c) \ |
89366
0c9b64b6b6af
(ENCODE_CHAR): If the method is SUBSET or SUPERSET, call encode_char.
Kenichi Handa <handa@m17n.org>
parents:
89065
diff
changeset
|
437 : ((charset)->unified_p \ |
0c9b64b6b6af
(ENCODE_CHAR): If the method is SUBSET or SUPERSET, call encode_char.
Kenichi Handa <handa@m17n.org>
parents:
89065
diff
changeset
|
438 || (charset)->method == CHARSET_METHOD_SUBSET \ |
0c9b64b6b6af
(ENCODE_CHAR): If the method is SUBSET or SUPERSET, call encode_char.
Kenichi Handa <handa@m17n.org>
parents:
89065
diff
changeset
|
439 || (charset)->method == CHARSET_METHOD_SUPERSET) \ |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
440 ? encode_char ((charset), (c)) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
441 : ((c) < (charset)->min_char || (c) > (charset)->max_char) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
442 ? (charset)->invalid_code \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
443 : (charset)->method == CHARSET_METHOD_OFFSET \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
444 ? ((charset)->code_linear_p \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
445 ? (c) - (charset)->code_offset + (charset)->min_code \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
446 : encode_char ((charset), (c))) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
447 : (charset)->method == CHARSET_METHOD_MAP \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
448 ? ((charset)->compact_codes_p \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
449 ? (charset_work = CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c)), \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
450 (NILP (charset_work) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
451 ? (charset)->invalid_code \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
452 : XFASTINT (charset_work))) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
453 : encode_char ((charset), (c))) \ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
454 : encode_char ((charset), (c))) |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
455 |
25505
4d5f87073d63
(MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents:
25234
diff
changeset
|
456 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
457 /* Set to 1 when a charset map is loaded to warn that a buffer text |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
458 and a string data may be relocated. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
459 extern int charset_map_loaded; |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
460 |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
461 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
462 /* Set CHARSET to the charset highest priority of C, CODE to the |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
463 code-point of C in CHARSET. */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
464 #define SPLIT_CHAR(c, charset, code) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
465 ((charset) = char_charset ((c), Qnil, &(code))) |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
466 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
467 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
468 #define ISO_MAX_DIMENSION 3 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
469 #define ISO_MAX_CHARS 2 |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
470 #define ISO_MAX_FINAL 0x80 /* only 0x30..0xFF are used */ |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
471 |
17052 | 472 /* Mapping table from ISO2022's charset (specified by DIMENSION, |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
473 CHARS, and FINAL_CHAR) to Emacs' charset ID. Should be accessed by |
17052 | 474 macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
475 extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; |
17052 | 476 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
477 /* A charset of type iso2022 who has DIMENSION, CHARS, and FINAL |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
478 (final character). */ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
479 #define ISO_CHARSET_TABLE(dimension, chars_96, final) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
480 iso_charset_table[(dimension) - 1][(chars_96)][(final)] |
20589
3acb053e757e
(FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents:
20531
diff
changeset
|
481 |
91005 | 482 /* Nonzero if the charset who has FAST_MAP may contain C. */ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
483 #define CHARSET_FAST_MAP_REF(c, fast_map) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
484 ((c) < 0x10000 \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
485 ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7)) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
486 : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7))) |
26843
0aadeca4a4a7
In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents:
25637
diff
changeset
|
487 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
488 #define CHARSET_FAST_MAP_SET(c, fast_map) \ |
25505
4d5f87073d63
(MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents:
25234
diff
changeset
|
489 do { \ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
490 if ((c) < 0x10000) \ |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
491 (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7); \ |
25505
4d5f87073d63
(MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents:
25234
diff
changeset
|
492 else \ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
493 (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7); \ |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
494 } while (0) |
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
495 |
20531
f019e056ad9a
(CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents:
20352
diff
changeset
|
496 |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
497 |
91005 | 498 /* 1 if CHARSET may contain the character C. */ |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
499 #define CHAR_CHARSET_P(c, charset) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
500 ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \ |
89804
109e674576b8
(CHAR_CHARSET_P): Fix for the case that the method is subset or
Kenichi Handa <handa@m17n.org>
parents:
89682
diff
changeset
|
501 || ((CHARSET_UNIFIED_P (charset) \ |
109e674576b8
(CHAR_CHARSET_P): Fix for the case that the method is subset or
Kenichi Handa <handa@m17n.org>
parents:
89682
diff
changeset
|
502 || (charset)->method == CHARSET_METHOD_SUBSET \ |
109e674576b8
(CHAR_CHARSET_P): Fix for the case that the method is subset or
Kenichi Handa <handa@m17n.org>
parents:
89682
diff
changeset
|
503 || (charset)->method == CHARSET_METHOD_SUPERSET) \ |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
504 ? encode_char ((charset), (c)) != (charset)->invalid_code \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
505 : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
506 && ((charset)->method == CHARSET_METHOD_OFFSET \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
507 ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
508 : ((charset)->method == CHARSET_METHOD_MAP \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
509 && (charset)->compact_codes_p) \ |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
510 ? ! NILP (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \ |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
511 : encode_char ((charset), (c)) != (charset)->invalid_code)))) |
17052 | 512 |
88875
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
513 |
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
514 /* Special macros for emacs-mule encoding. */ |
17052 | 515 |
88875
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
516 /* Leading-code followed by extended leading-code. DIMENSION/COLUMN */ |
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
517 #define EMACS_MULE_LEADING_CODE_PRIVATE_11 0x9A /* 1/1 */ |
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
518 #define EMACS_MULE_LEADING_CODE_PRIVATE_12 0x9B /* 1/2 */ |
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
519 #define EMACS_MULE_LEADING_CODE_PRIVATE_21 0x9C /* 2/2 */ |
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
520 #define EMACS_MULE_LEADING_CODE_PRIVATE_22 0x9D /* 2/2 */ |
20932
3c2c8431c51d
(INC_POS): Use macro BASE_LEADING_CODE_P.
Kenichi Handa <handa@m17n.org>
parents:
20904
diff
changeset
|
521 |
88875
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
522 extern struct charset *emacs_mule_charset[256]; |
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
523 |
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
524 |
29004
383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents:
28513
diff
changeset
|
525 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
526 extern Lisp_Object Qcharsetp; |
17052 | 527 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
528 extern Lisp_Object Qascii, Qunicode; |
88949
57ca0c34b3be
(charset_8_bit): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88875
diff
changeset
|
529 extern int charset_ascii, charset_eight_bit; |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
530 extern int charset_iso_8859_1; |
89569
111c03888806
(charset_unicode): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
89483
diff
changeset
|
531 extern int charset_unicode; |
88682
e2170fdb6abc
(charset_jisx0201_roman, charset_jisx0208_1978,
Kenichi Handa <handa@m17n.org>
parents:
88677
diff
changeset
|
532 extern int charset_jisx0201_roman; |
e2170fdb6abc
(charset_jisx0201_roman, charset_jisx0208_1978,
Kenichi Handa <handa@m17n.org>
parents:
88677
diff
changeset
|
533 extern int charset_jisx0208_1978; |
e2170fdb6abc
(charset_jisx0201_roman, charset_jisx0208_1978,
Kenichi Handa <handa@m17n.org>
parents:
88677
diff
changeset
|
534 extern int charset_jisx0208; |
17726 | 535 |
89064 | 536 extern int charset_unibyte; |
22120
90f77c401689
Change terms unify/unification to
Kenichi Handa <handa@m17n.org>
parents:
21444
diff
changeset
|
537 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
538 extern struct charset *char_charset P_ ((int, Lisp_Object, unsigned *)); |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
539 extern Lisp_Object charset_attributes P_ ((int)); |
23488
958ab288116d
(Vauto_fill_chars): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
23202
diff
changeset
|
540 |
88364
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
541 extern int decode_char P_ ((struct charset *, unsigned)); |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
542 extern unsigned encode_char P_ ((struct charset *, int)); |
f6557aebe110
Completely re-written. Character and multibyte
Kenichi Handa <handa@m17n.org>
parents:
42473
diff
changeset
|
543 extern int string_xstring_p P_ ((Lisp_Object)); |
20719 | 544 |
88744
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
545 extern void map_charset_chars P_ ((void (*) (Lisp_Object, Lisp_Object), |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
546 Lisp_Object, Lisp_Object, |
5b9a72e491d4
(enum define_charset_arg_index): Delete
Kenichi Handa <handa@m17n.org>
parents:
88682
diff
changeset
|
547 struct charset *, unsigned, unsigned)); |
20719 | 548 |
88875
1a232a6d79fc
(Vcharset_ordered_list): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88854
diff
changeset
|
549 EXFUN (Funify_charset, 3); |
20719 | 550 |
29570
825505ff211e
(EMACS_CHARSET_H): Renamed from _CHARSET_H.
Kenichi Handa <handa@m17n.org>
parents:
29539
diff
changeset
|
551 #endif /* EMACS_CHARSET_H */ |
52401 | 552 |
553 /* arch-tag: 3b96db55-4961-481d-ac3e-219f46a2b3aa | |
554 (do not change this comment) */ |