88364
|
1 /* Header for charset handler.
|
20708
|
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
|
18341
|
3 Licensed to the Free Software Foundation.
|
38395
|
4 Copyright (C) 2001 Free Software Foundation, Inc.
|
88364
|
5 Copyright (C) 2001, 2002
|
|
6 National Institute of Advanced Industrial Science and Technology (AIST)
|
|
7 Registration Number H13PRO009
|
17052
|
8
|
17071
|
9 This file is part of GNU Emacs.
|
|
10
|
|
11 GNU Emacs is free software; you can redistribute it and/or modify
|
|
12 it under the terms of the GNU General Public License as published by
|
|
13 the Free Software Foundation; either version 2, or (at your option)
|
|
14 any later version.
|
17052
|
15
|
17071
|
16 GNU Emacs is distributed in the hope that it will be useful,
|
|
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19 GNU General Public License for more details.
|
17052
|
20
|
17071
|
21 You should have received a copy of the GNU General Public License
|
|
22 along with GNU Emacs; see the file COPYING. If not, write to
|
|
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
24 Boston, MA 02111-1307, USA. */
|
17052
|
25
|
29570
|
26 #ifndef EMACS_CHARSET_H
|
|
27 #define EMACS_CHARSET_H
|
17052
|
28
|
88364
|
29 /* Index to arguments of Fdefine_charset_internal. */
|
17052
|
30
|
88364
|
31 enum define_charset_arg_index
|
|
32 {
|
|
33 charset_arg_name,
|
|
34 charset_arg_dimension,
|
|
35 charset_arg_code_space,
|
88677
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
diff
changeset
|
36 charset_arg_min_code,
|
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
diff
changeset
|
37 charset_arg_max_code,
|
88364
|
38 charset_arg_iso_final,
|
|
39 charset_arg_iso_revision,
|
|
40 charset_arg_emacs_mule_id,
|
|
41 charset_arg_ascii_compatible_p,
|
|
42 charset_arg_supplementary_p,
|
|
43 charset_arg_invalid_code,
|
|
44 charset_arg_code_offset,
|
|
45 charset_arg_map,
|
88744
|
46 charset_arg_subset,
|
|
47 charset_arg_superset,
|
88364
|
48 charset_arg_unify_map,
|
|
49 charset_arg_plist,
|
|
50 charset_arg_max
|
|
51 };
|
17052
|
52
|
88364
|
53
|
|
54 /* Indices to charset attributes vector. */
|
17052
|
55
|
88364
|
56 enum charset_attr_index
|
|
57 {
|
|
58 /* ID number of the charset. */
|
|
59 charset_id,
|
17052
|
60
|
88364
|
61 /* Name of the charset (symbol). */
|
|
62 charset_name,
|
29004
|
63
|
88364
|
64 /* Property list of the charset. */
|
|
65 charset_plist,
|
|
66
|
|
67 /* If the method of the charset is `MAP_DEFERRED', the value is a
|
88436
|
68 mapping vector or a file name that contains mapping vector.
|
88364
|
69 Otherwise, nil. */
|
|
70 charset_map,
|
17052
|
71
|
88364
|
72 /* If the method of the charset is `MAP', the value is a vector
|
|
73 that maps code points of the charset to characters. The vector
|
|
74 is indexed by a character index. A character index is
|
|
75 calculated from a code point and the code-space table of the
|
|
76 charset. */
|
|
77 charset_decoder,
|
17052
|
78
|
88364
|
79 /* If the method of the charset is `MAP', the value is a
|
|
80 char-table that maps characters of the charset to code
|
|
81 points. */
|
|
82 charset_encoder,
|
17052
|
83
|
88744
|
84 /* If the method of the charset is `SUBSET', the value is a vector
|
|
85 that has this form:
|
|
86
|
|
87 [ CHARSET-ID MIN-CODE MAX-CODE OFFSET ]
|
|
88
|
|
89 CHARSET-ID is an ID number of a parent charset. MIN-CODE and
|
|
90 MAX-CODE specify the range of characters inherited from the
|
|
91 parent. OFFSET is an integer value to add to a code point of
|
|
92 the parent charset to get the corresponding code point of this
|
|
93 charset. */
|
|
94 charset_subset,
|
|
95
|
|
96 /* If the method of the charset is `SUPERSET', the value is a list
|
|
97 whose elements have this form:
|
|
98
|
|
99 (CHARSET-ID . OFFSET)
|
|
100
|
|
101 CHARSET-IDs are ID numbers of parent charsets. OFFSET is an
|
|
102 integer value to add to a code point of the parent charset to
|
|
103 get the corresponding code point of this charset. */
|
|
104 charset_superset,
|
17052
|
105
|
88839
|
106 /* The value is a mapping vector or a file name that contains the
|
|
107 mapping. This defines how characters in the charset should be
|
|
108 unified with Unicode. The value of the member
|
88436
|
109 `charset_deunifier' is created from this information. */
|
88364
|
110 charset_unify_map,
|
|
111
|
88436
|
112 /* If characters in the charset must be unified Unicode, the value
|
|
113 is a char table that maps a character code in the charset to
|
|
114 the corresponding Unicode character. */
|
88364
|
115 charset_deunifier,
|
17052
|
116
|
88839
|
117 /* The length of the charset attribute vector. */
|
88364
|
118 charset_attr_max
|
|
119 };
|
17052
|
120
|
88364
|
121 /* Methods for converting code points and characters of charsets. */
|
17052
|
122
|
88364
|
123 enum charset_method
|
|
124 {
|
|
125 /* For a charset of this method, a character code is calculated
|
|
126 from a character index (which is calculated from a code point)
|
|
127 simply by adding an offset value. */
|
|
128 CHARSET_METHOD_OFFSET,
|
17052
|
129
|
88364
|
130 /* For a charset of this method, a decoder vector and an encoder
|
|
131 char-table is used for code point <-> character code
|
|
132 conversion. */
|
|
133 CHARSET_METHOD_MAP,
|
17052
|
134
|
88364
|
135 /* Same as above but decoder and encoder are loaded from a file on
|
|
136 demand. Once loaded, the method is changed to
|
|
137 CHARSET_METHOD_MAP. */
|
|
138 CHARSET_METHOD_MAP_DEFERRED,
|
17052
|
139
|
88839
|
140 /* A charset of this method is a subset of another charset. */
|
88744
|
141 CHARSET_METHOD_SUBSET,
|
|
142
|
88839
|
143 /* A charset of this method is a superset of other charsets. */
|
88744
|
144 CHARSET_METHOD_SUPERSET
|
88364
|
145 };
|
17052
|
146
|
88364
|
147 struct charset
|
|
148 {
|
88436
|
149 /* Index to charset_table. */
|
88364
|
150 int id;
|
17052
|
151
|
88436
|
152 /* Index to Vcharset_hash_table. */
|
88364
|
153 int hash_index;
|
17052
|
154
|
88364
|
155 /* Dimension of the charset: 1, 2, 3, or 4. */
|
|
156 int dimension;
|
17052
|
157
|
88476
|
158 /* Byte code range of each dimension. <code_space>[4N] is a mininum
|
|
159 byte code of the (N+1)th dimension, <code_space>[4N+1] is a
|
|
160 maximum byte code of the (N+1)th dimension, <code_space>[4N+2] is
|
|
161 (<code_space>[4N+1] - <code_space>[4N] + 1), <code_space>[4N+3]
|
|
162 is a number of characters containd in the first to (N+1)th
|
|
163 dismesions. We get `char-index' of a `code-point' from this
|
|
164 information. */
|
88364
|
165 int code_space[16];
|
17052
|
166
|
88476
|
167 /* If B is a byte of Nth dimension of a code-point, the (N-1)th bit
|
|
168 of code_space_mask[B] is set. This array is used to quickly
|
|
169 check if a code-point is in a valid range. */
|
|
170 unsigned char *code_space_mask;
|
|
171
|
88364
|
172 /* 1 if there's no gap in code-points. */
|
|
173 int code_linear_p;
|
17052
|
174
|
88364
|
175 /* If the charset is treated as 94-chars in ISO-2022, the value is 0.
|
|
176 If the charset is treated as 96-chars in ISO-2022, the value is 1. */
|
|
177 int iso_chars_96;
|
17052
|
178
|
88436
|
179 /* ISO final byte of the charset: 48..127. It may be -1 if the
|
|
180 charset doesn't conform to ISO-2022. */
|
88364
|
181 int iso_final;
|
17052
|
182
|
88436
|
183 /* ISO revision number of the charset. */
|
88364
|
184 int iso_revision;
|
17052
|
185
|
88364
|
186 /* If the charset is identical to what supported by Emacs 21 and the
|
|
187 priors, the identification number of the charset used in those
|
|
188 version. Otherwise, -1. */
|
|
189 int emacs_mule_id;
|
17052
|
190
|
88364
|
191 /* Nonzero iff the charset is compatible with ASCII. */
|
|
192 int ascii_compatible_p;
|
35478
|
193
|
88364
|
194 /* Nonzero iff the charset is supplementary. */
|
|
195 int supplementary_p;
|
17052
|
196
|
88364
|
197 /* Nonzero iff all the code points are representable by Lisp_Int. */
|
|
198 int compact_codes_p;
|
17052
|
199
|
88364
|
200 /* The method for encoding/decoding characters of the charset. */
|
|
201 enum charset_method method;
|
17052
|
202
|
88364
|
203 /* Mininum and Maximum code points of the charset. */
|
|
204 unsigned min_code, max_code;
|
17052
|
205
|
88677
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
diff
changeset
|
206 /* Offset value used by macros CODE_POINT_TO_INDEX and
|
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
diff
changeset
|
207 INDEX_TO_CODE_POINT. . */
|
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
diff
changeset
|
208 unsigned char_index_offset;
|
61f981fb213a
(sturct define_charset_arg_index): New enums charset_arg_min_code and
Kenichi Handa <handa@m17n.org>
diff
changeset
|
209
|
88364
|
210 /* Mininum and Maximum character codes of the charset. If the
|
|
211 charset is compatible with ASCII, min_char is a minimum non-ASCII
|
88875
|
212 character of the charset. If the method of charset is
|
|
213 CHARSET_METHOD_OFFSET, even if the charset is unified, min_char
|
|
214 and max_char doesn't change. */
|
88364
|
215 int min_char, max_char;
|
|
216
|
|
217 /* The code returned by ENCODE_CHAR if a character is not encodable
|
|
218 by the charset. */
|
|
219 unsigned invalid_code;
|
17052
|
220
|
88364
|
221 /* If the method of the charset is CHARSET_METHOD_MAP, this is a
|
|
222 table of bits used to quickly and roughly guess if a character
|
|
223 belongs to the charset.
|
17052
|
224
|
88364
|
225 The first 64 elements are 512 bits for characters less than
|
|
226 0x10000. Each bit corresponds to 128-character block. The last
|
|
227 126 elements are 1008 bits for the greater characters
|
|
228 (0x10000..0x3FFFFF). Each bit corresponds to 4096-character
|
|
229 block.
|
17052
|
230
|
88839
|
231 If a bit is 1, at least one character in the corresponding block is
|
88364
|
232 in this charset. */
|
|
233 unsigned char fast_map[190];
|
17052
|
234
|
88364
|
235 /* Offset value to calculate a character code from code-point, and
|
|
236 visa versa. */
|
|
237 int code_offset;
|
17052
|
238
|
88364
|
239 int unified_p;
|
|
240 };
|
17052
|
241
|
88364
|
242 /* Hash table of charset symbols vs. the correponding attribute
|
|
243 vectors. */
|
|
244 extern Lisp_Object Vcharset_hash_table;
|
17052
|
245
|
88364
|
246 /* Table of struct charset. */
|
|
247 extern struct charset *charset_table;
|
|
248 extern int charset_table_used;
|
17052
|
249
|
88364
|
250 #define CHARSET_FROM_ID(id) (charset_table + (id))
|
17052
|
251
|
88875
|
252 extern Lisp_Object Vcharset_ordered_list;
|
|
253
|
|
254 /* Incremented everytime we change the priority of charsets. */
|
89065
|
255 extern unsigned short charset_ordered_list_tick;
|
88875
|
256
|
88364
|
257 extern Lisp_Object Vcharset_list;
|
|
258 extern Lisp_Object Viso_2022_charset_list;
|
|
259 extern Lisp_Object Vemacs_mule_charset_list;
|
|
260
|
|
261 extern struct charset *emacs_mule_charset[256];
|
29004
|
262
|
17052
|
263
|
88364
|
264 /* Macros to access information about charset. */
|
|
265
|
|
266 /* Return the attribute vector of charset whose symbol is SYMBOL. */
|
|
267 #define CHARSET_SYMBOL_ATTRIBUTES(symbol) \
|
|
268 Fgethash ((symbol), Vcharset_hash_table, Qnil)
|
17833
|
269
|
88364
|
270 #define CHARSET_ATTR_ID(attrs) AREF ((attrs), charset_id)
|
|
271 #define CHARSET_ATTR_NAME(attrs) AREF ((attrs), charset_name)
|
|
272 #define CHARSET_ATTR_PLIST(attrs) AREF ((attrs), charset_plist)
|
|
273 #define CHARSET_ATTR_MAP(attrs) AREF ((attrs), charset_map)
|
|
274 #define CHARSET_ATTR_DECODER(attrs) AREF ((attrs), charset_decoder)
|
|
275 #define CHARSET_ATTR_ENCODER(attrs) AREF ((attrs), charset_encoder)
|
88744
|
276 #define CHARSET_ATTR_SUBSET(attrs) AREF ((attrs), charset_subset)
|
|
277 #define CHARSET_ATTR_SUPERSET(attrs) AREF ((attrs), charset_superset)
|
88364
|
278 #define CHARSET_ATTR_UNIFY_MAP(attrs) AREF ((attrs), charset_unify_map)
|
|
279 #define CHARSET_ATTR_DEUNIFIER(attrs) AREF ((attrs), charset_deunifier)
|
21033
|
280
|
88364
|
281 #define CHARSET_SYMBOL_ID(symbol) \
|
|
282 CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol))
|
|
283
|
|
284 /* Return an index to Vcharset_hash_table of the charset whose symbol
|
|
285 is SYMBOL. */
|
|
286 #define CHARSET_SYMBOL_HASH_INDEX(symbol) \
|
|
287 hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL)
|
|
288
|
|
289 /* Return the attribute vector of CHARSET. */
|
|
290 #define CHARSET_ATTRIBUTES(charset) \
|
|
291 (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index))
|
21033
|
292
|
88364
|
293 #define CHARSET_ID(charset) ((charset)->id)
|
|
294 #define CHARSET_HASH_INDEX(charset) ((charset)->hash_index)
|
|
295 #define CHARSET_DIMENSION(charset) ((charset)->dimension)
|
|
296 #define CHARSET_CODE_SPACE(charset) ((charset)->code_space)
|
|
297 #define CHARSET_CODE_LINEAR_P(charset) ((charset)->code_linear_p)
|
|
298 #define CHARSET_ISO_CHARS_96(charset) ((charset)->iso_chars_96)
|
|
299 #define CHARSET_ISO_FINAL(charset) ((charset)->iso_final)
|
|
300 #define CHARSET_ISO_PLANE(charset) ((charset)->iso_plane)
|
|
301 #define CHARSET_ISO_REVISION(charset) ((charset)->iso_revision)
|
|
302 #define CHARSET_EMACS_MULE_ID(charset) ((charset)->emacs_mule_id)
|
|
303 #define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p)
|
|
304 #define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p)
|
|
305 #define CHARSET_METHOD(charset) ((charset)->method)
|
|
306 #define CHARSET_MIN_CODE(charset) ((charset)->min_code)
|
|
307 #define CHARSET_MAX_CODE(charset) ((charset)->max_code)
|
|
308 #define CHARSET_INVALID_CODE(charset) ((charset)->invalid_code)
|
|
309 #define CHARSET_MIN_CHAR(charset) ((charset)->min_char)
|
|
310 #define CHARSET_MAX_CHAR(charset) ((charset)->max_char)
|
|
311 #define CHARSET_CODE_OFFSET(charset) ((charset)->code_offset)
|
|
312 #define CHARSET_UNIFIED_P(charset) ((charset)->unified_p)
|
25505
|
313
|
88364
|
314 #define CHARSET_NAME(charset) \
|
|
315 (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset)))
|
|
316 #define CHARSET_MAP(charset) \
|
|
317 (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset)))
|
|
318 #define CHARSET_DECODER(charset) \
|
|
319 (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset)))
|
|
320 #define CHARSET_ENCODER(charset) \
|
|
321 (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset)))
|
88744
|
322 #define CHARSET_SUBSET(charset) \
|
|
323 (CHARSET_ATTR_SUBSET (CHARSET_ATTRIBUTES (charset)))
|
|
324 #define CHARSET_SUPERSET(charset) \
|
|
325 (CHARSET_ATTR_SUPERSET (CHARSET_ATTRIBUTES (charset)))
|
88364
|
326 #define CHARSET_UNIFY_MAP(charset) \
|
|
327 (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset)))
|
|
328 #define CHARSET_DEUNIFIER(charset) \
|
|
329 (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset)))
|
|
330
|
|
331
|
|
332 /* Nonzero iff OBJ is a valid charset symbol. */
|
|
333 #define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0)
|
|
334
|
|
335 /* Check if X is a valid charset symbol. If not, signal an error. */
|
|
336 #define CHECK_CHARSET(x) \
|
25505
|
337 do { \
|
88364
|
338 if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0) \
|
|
339 x = wrong_type_argument (Qcharsetp, (x)); \
|
|
340 } while (0)
|
|
341
|
|
342
|
|
343 /* Check if X is a valid charset symbol. If valid, set ID to the id
|
|
344 number of the charset. Otherwise, signal an error. */
|
|
345 #define CHECK_CHARSET_GET_ID(x, id) \
|
|
346 do { \
|
|
347 int idx; \
|
|
348 \
|
|
349 if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0) \
|
|
350 x = wrong_type_argument (Qcharsetp, (x)); \
|
88854
|
351 id = XINT (AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \
|
|
352 charset_id)); \
|
25505
|
353 } while (0)
|
21033
|
354
|
29004
|
355
|
88364
|
356 /* Check if X is a valid charset symbol. If valid, set ATTR to the
|
|
357 attr vector of the charset. Otherwise, signal an error. */
|
|
358 #define CHECK_CHARSET_GET_ATTR(x, attr) \
|
|
359 do { \
|
|
360 if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x))) \
|
|
361 x = wrong_type_argument (Qcharsetp, (x)); \
|
|
362 } while (0)
|
19319
|
363
|
29004
|
364
|
88364
|
365 #define CHECK_CHARSET_GET_CHARSET(x, charset) \
|
|
366 do { \
|
|
367 int id; \
|
|
368 CHECK_CHARSET_GET_ID (x, id); \
|
|
369 charset = CHARSET_FROM_ID (id); \
|
|
370 } while (0)
|
|
371
|
19319
|
372
|
88364
|
373 /* Lookup Vcharset_order_list and return the first charset that
|
|
374 contains the character C. */
|
|
375 #define CHAR_CHARSET(c) \
|
|
376 char_charset ((c), Qnil, NULL)
|
|
377
|
|
378 #if 0
|
|
379 /* Char-table of charset-sets. Each element is a bool vector indexed
|
|
380 by a charset ID. */
|
|
381 extern Lisp_Object Vchar_charset_set;
|
29004
|
382
|
88364
|
383 /* Charset-bag of character C. */
|
|
384 #define CHAR_CHARSET_SET(c) \
|
|
385 CHAR_TABLE_REF (Vchar_charset_set, c)
|
29004
|
386
|
88364
|
387 /* Check if two characters C1 and C2 belong to the same charset. */
|
|
388 #define SAME_CHARSET_P(c1, c2) \
|
|
389 intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2))
|
|
390
|
|
391 #endif
|
|
392
|
|
393
|
|
394 /* Return a character correponding to the code-point CODE of CHARSET.
|
|
395 Try some optimization before calling decode_char. */
|
17052
|
396
|
88364
|
397 #define DECODE_CHAR(charset, code) \
|
|
398 ((ASCII_BYTE_P (code) && (charset)->ascii_compatible_p) \
|
|
399 ? (code) \
|
|
400 : ((code) < (charset)->min_code || (code) > (charset)->max_code) \
|
|
401 ? -1 \
|
|
402 : (charset)->unified_p \
|
|
403 ? decode_char ((charset), (code)) \
|
|
404 : (charset)->method == CHARSET_METHOD_OFFSET \
|
|
405 ? ((charset)->code_linear_p \
|
|
406 ? (code) - (charset)->min_code + (charset)->code_offset \
|
|
407 : decode_char ((charset), (code))) \
|
|
408 : (charset)->method == CHARSET_METHOD_MAP \
|
|
409 ? ((charset)->code_linear_p \
|
|
410 ? XINT (AREF (CHARSET_DECODER (charset), \
|
|
411 (code) - (charset)->min_code)) \
|
|
412 : decode_char ((charset), (code))) \
|
|
413 : decode_char ((charset), (code)))
|
|
414
|
|
415
|
88744
|
416 extern Lisp_Object charset_work;
|
|
417
|
88364
|
418 /* Return a code point of CHAR in CHARSET.
|
|
419 Try some optimization before calling encode_char. */
|
19319
|
420
|
88744
|
421 #define ENCODE_CHAR(charset, c) \
|
|
422 ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \
|
|
423 ? (c) \
|
89366
0c9b64b6b6af
(ENCODE_CHAR): If the method is SUBSET or SUPERSET, call encode_char.
Kenichi Handa <handa@m17n.org>
diff
changeset
|
424 : ((charset)->unified_p \
|
0c9b64b6b6af
(ENCODE_CHAR): If the method is SUBSET or SUPERSET, call encode_char.
Kenichi Handa <handa@m17n.org>
diff
changeset
|
425 || (charset)->method == CHARSET_METHOD_SUBSET \
|
0c9b64b6b6af
(ENCODE_CHAR): If the method is SUBSET or SUPERSET, call encode_char.
Kenichi Handa <handa@m17n.org>
diff
changeset
|
426 || (charset)->method == CHARSET_METHOD_SUPERSET) \
|
88744
|
427 ? encode_char ((charset), (c)) \
|
|
428 : ((c) < (charset)->min_char || (c) > (charset)->max_char) \
|
|
429 ? (charset)->invalid_code \
|
|
430 : (charset)->method == CHARSET_METHOD_OFFSET \
|
|
431 ? ((charset)->code_linear_p \
|
|
432 ? (c) - (charset)->code_offset + (charset)->min_code \
|
|
433 : encode_char ((charset), (c))) \
|
|
434 : (charset)->method == CHARSET_METHOD_MAP \
|
|
435 ? ((charset)->compact_codes_p \
|
|
436 ? (charset_work = CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c)), \
|
|
437 (NILP (charset_work) \
|
|
438 ? (charset)->invalid_code \
|
|
439 : XFASTINT (charset_work))) \
|
|
440 : encode_char ((charset), (c))) \
|
88364
|
441 : encode_char ((charset), (c)))
|
17052
|
442
|
25505
|
443
|
88364
|
444 /* Set to 1 when a charset map is loaded to warn that a buffer text
|
|
445 and a string data may be relocated. */
|
|
446 extern int charset_map_loaded;
|
|
447
|
19319
|
448
|
88364
|
449 /* Set CHARSET to the charset highest priority of C, CODE to the
|
|
450 code-point of C in CHARSET. */
|
|
451 #define SPLIT_CHAR(c, charset, code) \
|
|
452 ((charset) = char_charset ((c), Qnil, &(code)))
|
|
453
|
|
454
|
|
455 #define ISO_MAX_DIMENSION 3
|
|
456 #define ISO_MAX_CHARS 2
|
|
457 #define ISO_MAX_FINAL 0x80 /* only 0x30..0xFF are used */
|
17052
|
458
|
|
459 /* Mapping table from ISO2022's charset (specified by DIMENSION,
|
88364
|
460 CHARS, and FINAL_CHAR) to Emacs' charset ID. Should be accessed by
|
17052
|
461 macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */
|
88364
|
462 extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
|
17052
|
463
|
88364
|
464 /* A charset of type iso2022 who has DIMENSION, CHARS, and FINAL
|
|
465 (final character). */
|
|
466 #define ISO_CHARSET_TABLE(dimension, chars_96, final) \
|
|
467 iso_charset_table[(dimension) - 1][(chars_96)][(final)]
|
20589
|
468
|
88364
|
469 /* Nonzero iff the charset who has FAST_MAP may contain C. */
|
|
470 #define CHARSET_FAST_MAP_REF(c, fast_map) \
|
|
471 ((c) < 0x10000 \
|
|
472 ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7)) \
|
|
473 : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7)))
|
26843
|
474
|
88364
|
475 #define CHARSET_FAST_MAP_SET(c, fast_map) \
|
|
476 do { \
|
|
477 if ((c) < 0x10000) \
|
|
478 (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7); \
|
|
479 else \
|
|
480 (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7); \
|
17052
|
481 } while (0)
|
|
482
|
29004
|
483
|
20531
|
484
|
88364
|
485 /* 1 iff CHARSET may contain the character C. */
|
88744
|
486 #define CHAR_CHARSET_P(c, charset) \
|
|
487 ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \
|
|
488 || (CHARSET_UNIFIED_P (charset) \
|
|
489 ? encode_char ((charset), (c)) != (charset)->invalid_code \
|
|
490 : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map) \
|
|
491 && ((charset)->method == CHARSET_METHOD_OFFSET \
|
|
492 ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \
|
|
493 : ((charset)->method == CHARSET_METHOD_MAP \
|
|
494 && (charset)->compact_codes_p) \
|
|
495 ? ! NILP (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \
|
88364
|
496 : encode_char ((charset), (c)) != (charset)->invalid_code))))
|
29004
|
497
|
88875
|
498
|
|
499 /* Special macros for emacs-mule encoding. */
|
|
500
|
|
501 /* Leading-code followed by extended leading-code. DIMENSION/COLUMN */
|
|
502 #define EMACS_MULE_LEADING_CODE_PRIVATE_11 0x9A /* 1/1 */
|
|
503 #define EMACS_MULE_LEADING_CODE_PRIVATE_12 0x9B /* 1/2 */
|
|
504 #define EMACS_MULE_LEADING_CODE_PRIVATE_21 0x9C /* 2/2 */
|
|
505 #define EMACS_MULE_LEADING_CODE_PRIVATE_22 0x9D /* 2/2 */
|
|
506
|
|
507 extern struct charset *emacs_mule_charset[256];
|
|
508
|
|
509
|
29004
|
510
|
88364
|
511 extern Lisp_Object Qcharsetp;
|
29004
|
512
|
88364
|
513 extern Lisp_Object Qascii, Qunicode;
|
88949
|
514 extern int charset_ascii, charset_eight_bit;
|
88364
|
515 extern int charset_iso_8859_1;
|
88682
|
516 extern int charset_jisx0201_roman;
|
|
517 extern int charset_jisx0208_1978;
|
|
518 extern int charset_jisx0208;
|
20932
|
519
|
89064
|
520 extern int charset_unibyte;
|
|
521
|
88364
|
522 extern struct charset *char_charset P_ ((int, Lisp_Object, unsigned *));
|
|
523 extern Lisp_Object charset_attributes P_ ((int));
|
17726
|
524
|
88364
|
525 extern int decode_char P_ ((struct charset *, unsigned));
|
|
526 extern unsigned encode_char P_ ((struct charset *, int));
|
|
527 extern int string_xstring_p P_ ((Lisp_Object));
|
23488
|
528
|
88744
|
529 extern void map_charset_chars P_ ((void (*) (Lisp_Object, Lisp_Object),
|
|
530 Lisp_Object, Lisp_Object,
|
|
531 struct charset *, unsigned, unsigned));
|
|
532
|
88875
|
533 EXFUN (Funify_charset, 3);
|
20719
|
534
|
29570
|
535 #endif /* EMACS_CHARSET_H */
|