26843
|
1 /* Header for multibyte character handler.
|
20708
|
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
|
18341
|
3 Licensed to the Free Software Foundation.
|
17052
|
4
|
17071
|
5 This file is part of GNU Emacs.
|
|
6
|
|
7 GNU Emacs is free software; you can redistribute it and/or modify
|
|
8 it under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 2, or (at your option)
|
|
10 any later version.
|
17052
|
11
|
17071
|
12 GNU Emacs is distributed in the hope that it will be useful,
|
|
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 GNU General Public License for more details.
|
17052
|
16
|
17071
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GNU Emacs; see the file COPYING. If not, write to
|
|
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
20 Boston, MA 02111-1307, USA. */
|
17052
|
21
|
29570
|
22 #ifndef EMACS_CHARSET_H
|
|
23 #define EMACS_CHARSET_H
|
17052
|
24
|
29004
|
25 /* #define BYTE_COMBINING_DEBUG */
|
|
26
|
17052
|
27 /*** GENERAL NOTE on CHARACTER SET (CHARSET) ***
|
|
28
|
|
29 A character set ("charset" hereafter) is a meaningful collection
|
|
30 (i.e. language, culture, functionality, etc) of characters. Emacs
|
|
31 handles multiple charsets at once. Each charset corresponds to one
|
26843
|
32 of ISO charsets. Emacs identifies a charset by a unique
|
|
33 identification number, whereas ISO identifies a charset by a triplet
|
|
34 of DIMENSION, CHARS and FINAL-CHAR. So, hereafter, just saying
|
|
35 "charset" means an identification number (integer value).
|
17052
|
36
|
26843
|
37 The value range of charset is 0x00, 0x81..0xFE. There are four
|
17052
|
38 kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or
|
|
39 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94
|
26843
|
40 characters.
|
17052
|
41
|
|
42 Within Emacs Lisp, a charset is treated as a symbol which has a
|
|
43 property `charset'. The property value is a vector containing
|
|
44 various information about the charset. For readability of C codes,
|
26843
|
45 we use the following convention for C variable names:
|
17052
|
46 charset_symbol: Emacs Lisp symbol of a charset
|
|
47 charset_id: Emacs Lisp integer of an identification number of a charset
|
|
48 charset: C integer of an identification number of a charset
|
|
49
|
29004
|
50 Each charset (except for ascii) is assigned a base leading-code
|
|
51 (range 0x80..0x9E). In addition, a charset of greater than 0xA0
|
17052
|
52 (whose base leading-code is 0x9A..0x9D) is assigned an extended
|
|
53 leading-code (range 0xA0..0xFE). In this case, each base
|
|
54 leading-code specify the allowable range of extended leading-code as
|
|
55 shown in the table below. A leading-code is used to represent a
|
|
56 character in Emacs' buffer and string.
|
|
57
|
|
58 We call a charset which has extended leading-code as "private
|
26843
|
59 charset" because those are mainly for a charset which is not yet
|
17052
|
60 registered by ISO. On the contrary, we call a charset which does
|
|
61 not have extended leading-code as "official charset".
|
|
62
|
|
63 ---------------------------------------------------------------------------
|
|
64 charset dimension base leading-code extended leading-code
|
|
65 ---------------------------------------------------------------------------
|
|
66 0x00 official dim1 -- none -- -- none --
|
|
67 (ASCII)
|
|
68 0x01..0x7F --never used--
|
29539
|
69 0x80 official dim1 -- none -- -- none --
|
|
70 (eight-bit-graphic)
|
17052
|
71 0x81..0x8F official dim1 same as charset -- none --
|
|
72 0x90..0x99 official dim2 same as charset -- none --
|
29004
|
73 0x9A..0x9D --never used--
|
|
74 0x9E official dim1 same as charset -- none --
|
|
75 (eight-bit-control)
|
29539
|
76 0x9F --never used--
|
17052
|
77 0xA0..0xDF private dim1 0x9A same as charset
|
|
78 of 1-column width
|
|
79 0xE0..0xEF private dim1 0x9B same as charset
|
|
80 of 2-column width
|
|
81 0xF0..0xF4 private dim2 0x9C same as charset
|
|
82 of 1-column width
|
|
83 0xF5..0xFE private dim2 0x9D same as charset
|
|
84 of 2-column width
|
|
85 0xFF --never used--
|
|
86 ---------------------------------------------------------------------------
|
|
87
|
|
88 */
|
|
89
|
|
90 /* Definition of special leading-codes. */
|
|
91 /* Leading-code followed by extended leading-code. */
|
|
92 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
|
|
93 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
|
|
94 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
|
19643
|
95 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
|
17052
|
96
|
29004
|
97 #define LEADING_CODE_8_BIT_CONTROL 0x9E /* for `eight-bit-control' */
|
|
98
|
17052
|
99 /* Extended leading-code. */
|
|
100 /* Start of each extended leading-codes. */
|
|
101 #define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */
|
|
102 #define LEADING_CODE_EXT_12 0xE0 /* follows LEADING_CODE_PRIVATE_12 */
|
|
103 #define LEADING_CODE_EXT_21 0xF0 /* follows LEADING_CODE_PRIVATE_21 */
|
|
104 #define LEADING_CODE_EXT_22 0xF5 /* follows LEADING_CODE_PRIVATE_22 */
|
|
105 /* Maximum value of extended leading-codes. */
|
|
106 #define LEADING_CODE_EXT_MAX 0xFE
|
|
107
|
|
108 /* Definition of minimum/maximum charset of each DIMENSION. */
|
|
109 #define MIN_CHARSET_OFFICIAL_DIMENSION1 0x81
|
|
110 #define MAX_CHARSET_OFFICIAL_DIMENSION1 0x8F
|
|
111 #define MIN_CHARSET_OFFICIAL_DIMENSION2 0x90
|
|
112 #define MAX_CHARSET_OFFICIAL_DIMENSION2 0x99
|
|
113 #define MIN_CHARSET_PRIVATE_DIMENSION1 LEADING_CODE_EXT_11
|
|
114 #define MIN_CHARSET_PRIVATE_DIMENSION2 LEADING_CODE_EXT_21
|
|
115
|
17185
|
116 /* Maximum value of overall charset identification number. */
|
|
117 #define MAX_CHARSET 0xFE
|
|
118
|
17052
|
119 /* Definition of special charsets. */
|
29004
|
120 #define CHARSET_ASCII 0 /* 0x00..0x7F */
|
|
121 #define CHARSET_8_BIT_CONTROL 0x9E /* 0x80..0x9F */
|
29539
|
122 #define CHARSET_8_BIT_GRAPHIC 0x80 /* 0xA0..0xFF */
|
17052
|
123
|
|
124 extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
|
|
125 extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
|
|
126 extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
|
|
127 extern int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
|
|
128 extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
|
|
129 extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
|
|
130 extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
|
|
131
|
29004
|
132 /* Check if CH is an ASCII character or a base leading-code.
|
|
133 Nowadays, any byte can be the first byte of a character in a
|
|
134 multibyte buffer/string. So this macro name is not appropriate. */
|
20531
|
135 #define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0)
|
17052
|
136
|
|
137 /*** GENERAL NOTE on CHARACTER REPRESENTATION ***
|
|
138
|
|
139 At first, the term "character" or "char" is used for a multilingual
|
|
140 character (of course, including ASCII character), not for a byte in
|
|
141 computer memory. We use the term "code" or "byte" for the latter
|
|
142 case.
|
|
143
|
|
144 A character is identified by charset and one or two POSITION-CODEs.
|
|
145 POSITION-CODE is the position of the character in the charset. A
|
|
146 character of DIMENSION1 charset has one POSITION-CODE: POSITION-CODE-1.
|
|
147 A character of DIMENSION2 charset has two POSITION-CODE:
|
|
148 POSITION-CODE-1 and POSITION-CODE-2. The code range of
|
|
149 POSITION-CODE is 0x20..0x7F.
|
|
150
|
|
151 Emacs has two kinds of representation of a character: multi-byte
|
|
152 form (for buffer and string) and single-word form (for character
|
|
153 object in Emacs Lisp). The latter is called "character code" here
|
22529
|
154 after. Both representations encode the information of charset and
|
17052
|
155 POSITION-CODE but in a different way (for instance, MSB of
|
|
156 POSITION-CODE is set in multi-byte form).
|
|
157
|
|
158 For details of multi-byte form, see the section "2. Emacs internal
|
|
159 format handlers" of `coding.c'.
|
|
160
|
|
161 Emacs uses 19 bits for a character code. The bits are divided into
|
|
162 3 fields: FIELD1(5bits):FIELD2(7bits):FIELD3(7bits).
|
|
163
|
|
164 A character code of DIMENSION1 character uses FIELD2 to hold charset
|
|
165 and FIELD3 to hold POSITION-CODE-1. A character code of DIMENSION2
|
|
166 character uses FIELD1 to hold charset, FIELD2 and FIELD3 to hold
|
|
167 POSITION-CODE-1 and POSITION-CODE-2 respectively.
|
|
168
|
|
169 More precisely...
|
|
170
|
29004
|
171 FIELD2 of DIMENSION1 character (except for ascii, eight-bit-control,
|
|
172 and eight-bit-graphic) is "charset - 0x70". This is to make all
|
|
173 character codes except for ASCII and 8-bit codes greater than 256.
|
|
174 So, the range of FIELD2 of DIMENSION1 character is 0, 1, or
|
|
175 0x11..0x7F.
|
17052
|
176
|
|
177 FIELD1 of DIMENSION2 character is "charset - 0x8F" for official
|
|
178 charset and "charset - 0xE0" for private charset. So, the range of
|
|
179 FIELD1 of DIMENSION2 character is 0x01..0x1E.
|
|
180
|
29004
|
181 -----------------------------------------------------------------------------
|
|
182 charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit)
|
|
183 -----------------------------------------------------------------------------
|
|
184 ascii 0 0 0x00..0x7F
|
|
185 eight-bit-control 0 1 0x00..0x1F
|
|
186 eight-bit-graphic 0 1 0x20..0x7F
|
|
187 DIMENSION1 0 charset - 0x70 POSITION-CODE-1
|
|
188 DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2
|
|
189 DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2
|
|
190 -----------------------------------------------------------------------------
|
17052
|
191 "(o)": official, "(p)": private
|
29004
|
192 -----------------------------------------------------------------------------
|
17052
|
193 */
|
|
194
|
|
195 /* Masks of each field of character code. */
|
|
196 #define CHAR_FIELD1_MASK (0x1F << 14)
|
|
197 #define CHAR_FIELD2_MASK (0x7F << 7)
|
|
198 #define CHAR_FIELD3_MASK 0x7F
|
|
199
|
|
200 /* Macros to access each field of character C. */
|
|
201 #define CHAR_FIELD1(c) (((c) & CHAR_FIELD1_MASK) >> 14)
|
|
202 #define CHAR_FIELD2(c) (((c) & CHAR_FIELD2_MASK) >> 7)
|
|
203 #define CHAR_FIELD3(c) ((c) & CHAR_FIELD3_MASK)
|
|
204
|
|
205 /* Minimum character code of character of each DIMENSION. */
|
|
206 #define MIN_CHAR_OFFICIAL_DIMENSION1 \
|
|
207 ((MIN_CHARSET_OFFICIAL_DIMENSION1 - 0x70) << 7)
|
|
208 #define MIN_CHAR_PRIVATE_DIMENSION1 \
|
|
209 ((MIN_CHARSET_PRIVATE_DIMENSION1 - 0x70) << 7)
|
|
210 #define MIN_CHAR_OFFICIAL_DIMENSION2 \
|
|
211 ((MIN_CHARSET_OFFICIAL_DIMENSION2 - 0x8F) << 14)
|
|
212 #define MIN_CHAR_PRIVATE_DIMENSION2 \
|
|
213 ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14)
|
26843
|
214 /* Maximum character code currently used plus 1. */
|
|
215 #define MAX_CHAR (0x1F << 14)
|
17052
|
216
|
29004
|
217 /* 1 if C is a single byte character, else 0. */
|
|
218 #define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
|
17052
|
219
|
29004
|
220 /* 1 if BYTE is an ASCII character in itself, in multibyte mode. */
|
20257
|
221 #define ASCII_BYTE_P(byte) ((byte) < 0x80)
|
|
222
|
17052
|
223 /* A char-table containing information of each character set.
|
|
224
|
|
225 Unlike ordinary char-tables, this doesn't contain any nested table.
|
|
226 Only the top level elements are used. Each element is a vector of
|
|
227 the following information:
|
|
228 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
|
|
229 LEADING-CODE-BASE, LEADING-CODE-EXT,
|
|
230 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
|
|
231 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
|
|
232 PLIST.
|
|
233
|
|
234 CHARSET-ID (integer) is the identification number of the charset.
|
|
235
|
24779
|
236 BYTES (integer) is the length of multi-byte form of a character in
|
17052
|
237 the charset: one of 1, 2, 3, and 4.
|
|
238
|
|
239 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
|
|
240
|
|
241 CHARS (integer) is the number of characters in a dimension: 94 or 96.
|
|
242
|
|
243 WIDTH (integer) is the number of columns a character in the charset
|
29004
|
244 occupies on the screen: one of 0, 1, and 2..
|
17052
|
245
|
|
246 DIRECTION (integer) is the rendering direction of characters in the
|
24777
|
247 charset when rendering. If 0, render from left to right, else
|
|
248 render from right to left.
|
17052
|
249
|
|
250 LEADING-CODE-BASE (integer) is the base leading-code for the
|
|
251 charset.
|
|
252
|
|
253 LEADING-CODE-EXT (integer) is the extended leading-code for the
|
|
254 charset. All charsets of less than 0xA0 has the value 0.
|
|
255
|
|
256 ISO-FINAL-CHAR (character) is the final character of the
|
29004
|
257 corresponding ISO 2022 charset. It is -1 for such a character
|
|
258 that is used only internally (e.g. `eight-bit-control').
|
17052
|
259
|
|
260 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
|
|
261 while encoding to variants of ISO 2022 coding system, one of the
|
29004
|
262 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). It
|
|
263 is -1 for such a character that is used only internally
|
|
264 (e.g. `eight-bit-control').
|
17052
|
265
|
|
266 REVERSE-CHARSET (integer) is the charset which differs only in
|
|
267 LEFT-TO-RIGHT value from the charset. If there's no such a
|
|
268 charset, the value is -1.
|
|
269
|
|
270 SHORT-NAME (string) is the short name to refer to the charset.
|
|
271
|
|
272 LONG-NAME (string) is the long name to refer to the charset.
|
|
273
|
|
274 DESCRIPTION (string) is the description string of the charset.
|
|
275
|
|
276 PLIST (property list) may contain any type of information a user
|
|
277 want to put and get by functions `put-charset-property' and
|
|
278 `get-charset-property' respectively. */
|
|
279 extern Lisp_Object Vcharset_table;
|
|
280
|
|
281 /* Macros to access various information of CHARSET in Vcharset_table.
|
|
282 We provide these macros for efficiency. No range check of CHARSET. */
|
|
283
|
28513
1fec001e68c5
(CHARSET_TABLE_ENTRY): Fix comment -- argument is a C int, not a lisp integer.
Ken Raeburn <raeburn@raeburn.org>
diff
changeset
|
284 /* Return entry of CHARSET (C integer) in Vcharset_table. */
|
17321
|
285 #define CHARSET_TABLE_ENTRY(charset) \
|
|
286 XCHAR_TABLE (Vcharset_table)->contents[((charset) == CHARSET_ASCII \
|
|
287 ? 0 : (charset) + 128)]
|
17052
|
288
|
|
289 /* Return information INFO-IDX of CHARSET. */
|
|
290 #define CHARSET_TABLE_INFO(charset, info_idx) \
|
|
291 XVECTOR (CHARSET_TABLE_ENTRY (charset))->contents[info_idx]
|
|
292
|
|
293 #define CHARSET_ID_IDX (0)
|
|
294 #define CHARSET_BYTES_IDX (1)
|
|
295 #define CHARSET_DIMENSION_IDX (2)
|
|
296 #define CHARSET_CHARS_IDX (3)
|
|
297 #define CHARSET_WIDTH_IDX (4)
|
|
298 #define CHARSET_DIRECTION_IDX (5)
|
|
299 #define CHARSET_LEADING_CODE_BASE_IDX (6)
|
|
300 #define CHARSET_LEADING_CODE_EXT_IDX (7)
|
|
301 #define CHARSET_ISO_FINAL_CHAR_IDX (8)
|
|
302 #define CHARSET_ISO_GRAPHIC_PLANE_IDX (9)
|
|
303 #define CHARSET_REVERSE_CHARSET_IDX (10)
|
|
304 #define CHARSET_SHORT_NAME_IDX (11)
|
|
305 #define CHARSET_LONG_NAME_IDX (12)
|
|
306 #define CHARSET_DESCRIPTION_IDX (13)
|
|
307 #define CHARSET_PLIST_IDX (14)
|
|
308 /* Size of a vector of each entry of Vcharset_table. */
|
|
309 #define CHARSET_MAX_IDX (15)
|
|
310
|
|
311 /* And several more macros to be used frequently. */
|
|
312 #define CHARSET_BYTES(charset) \
|
|
313 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX))
|
|
314 #define CHARSET_DIMENSION(charset) \
|
|
315 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX))
|
|
316 #define CHARSET_CHARS(charset) \
|
|
317 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX))
|
|
318 #define CHARSET_WIDTH(charset) \
|
|
319 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX))
|
|
320 #define CHARSET_DIRECTION(charset) \
|
|
321 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX))
|
|
322 #define CHARSET_LEADING_CODE_BASE(charset) \
|
|
323 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX))
|
|
324 #define CHARSET_LEADING_CODE_EXT(charset) \
|
|
325 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX))
|
|
326 #define CHARSET_ISO_FINAL_CHAR(charset) \
|
29004
|
327 XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX))
|
17052
|
328 #define CHARSET_ISO_GRAPHIC_PLANE(charset) \
|
29004
|
329 XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX))
|
17052
|
330 #define CHARSET_REVERSE_CHARSET(charset) \
|
|
331 XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX))
|
|
332
|
|
333 /* Macros to specify direction of a charset. */
|
|
334 #define CHARSET_DIRECTION_LEFT_TO_RIGHT 0
|
|
335 #define CHARSET_DIRECTION_RIGHT_TO_LEFT 1
|
|
336
|
|
337 /* A vector of charset symbol indexed by charset-id. This is used
|
|
338 only for returning charset symbol from C functions. */
|
|
339 extern Lisp_Object Vcharset_symbol_table;
|
|
340
|
|
341 /* Return symbol of CHARSET. */
|
|
342 #define CHARSET_SYMBOL(charset) \
|
|
343 XVECTOR (Vcharset_symbol_table)->contents[charset]
|
|
344
|
25505
|
345 /* 1 if CHARSET is in valid value range, else 0. */
|
17052
|
346 #define CHARSET_VALID_P(charset) \
|
|
347 ((charset) == 0 \
|
26843
|
348 || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
|
29004
|
349 || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 \
|
|
350 && (charset) <= MAX_CHARSET) \
|
|
351 || ((charset) == CHARSET_8_BIT_CONTROL) \
|
|
352 || ((charset) == CHARSET_8_BIT_GRAPHIC))
|
17052
|
353
|
26843
|
354 /* 1 if CHARSET is already defined, else 0. */
|
17052
|
355 #define CHARSET_DEFINED_P(charset) \
|
17185
|
356 (((charset) >= 0) && ((charset) <= MAX_CHARSET) \
|
17052
|
357 && !NILP (CHARSET_TABLE_ENTRY (charset)))
|
|
358
|
|
359 /* Since the information CHARSET-BYTES and CHARSET-WIDTH of
|
29004
|
360 Vcharset_table can be retrieved only by the first byte of
|
17052
|
361 multi-byte form (an ASCII code or a base leading-code), we provide
|
|
362 here tables to be used by macros BYTES_BY_CHAR_HEAD and
|
|
363 WIDTH_BY_CHAR_HEAD for faster information retrieval. */
|
|
364 extern int bytes_by_char_head[256];
|
|
365 extern int width_by_char_head[256];
|
|
366
|
29004
|
367 #define BYTES_BY_CHAR_HEAD(char_head) \
|
|
368 (ASCII_BYTE_P (char_head) ? 1 : bytes_by_char_head[char_head])
|
|
369 #define WIDTH_BY_CHAR_HEAD(char_head) \
|
|
370 (ASCII_BYTE_P (char_head) ? 1 : width_by_char_head[char_head])
|
17052
|
371
|
|
372 /* Charset of the character C. */
|
29004
|
373 #define CHAR_CHARSET(c) \
|
|
374 (SINGLE_BYTE_CHAR_P (c) \
|
|
375 ? (ASCII_BYTE_P (c) \
|
|
376 ? CHARSET_ASCII \
|
|
377 : (c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC) \
|
|
378 : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \
|
|
379 ? CHAR_FIELD2 (c) + 0x70 \
|
|
380 : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \
|
|
381 ? CHAR_FIELD1 (c) + 0x8F \
|
26843
|
382 : CHAR_FIELD1 (c) + 0xE0)))
|
17052
|
383
|
26843
|
384 /* Check if two characters C1 and C2 belong to the same charset. */
|
29004
|
385 #define SAME_CHARSET_P(c1, c2) \
|
|
386 (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \
|
|
387 ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \
|
|
388 : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK))
|
17052
|
389
|
|
390 /* Return a character of which charset is CHARSET and position-codes
|
|
391 are C1 and C2. DIMENSION1 character ignores C2. */
|
29004
|
392 #define MAKE_CHAR(charset, c1, c2) \
|
|
393 ((charset) == CHARSET_ASCII \
|
|
394 ? (c1) & 0x7F \
|
|
395 : (((charset) == CHARSET_8_BIT_CONTROL \
|
|
396 || (charset) == CHARSET_8_BIT_GRAPHIC) \
|
|
397 ? ((c1) & 0x7F) | 0x80 \
|
30282
|
398 : ((CHARSET_DEFINED_P (charset) \
|
|
399 ? CHARSET_DIMENSION (charset) == 1 \
|
|
400 : (charset) < MIN_CHARSET_PRIVATE_DIMENSION2) \
|
29004
|
401 ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1)) \
|
|
402 : ((((charset) \
|
|
403 - ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \
|
|
404 << 14) \
|
|
405 | ((c2) <= 0 ? 0 : ((c2) & 0x7F)) \
|
|
406 | ((c1) <= 0 ? 0 : (((c1) & 0x7F) << 7))))))
|
|
407
|
17052
|
408
|
20151
|
409 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
|
|
410 generic character. If GENERICP is zero, return nonzero iff C is a
|
|
411 valid normal character. */
|
|
412 #define CHAR_VALID_P(c, genericp) \
|
|
413 ((c) >= 0 \
|
|
414 && (SINGLE_BYTE_CHAR_P (c) || char_valid_p (c, genericp)))
|
17833
|
415
|
22184
|
416 /* This default value is used when nonascii-translation-table or
|
21033
|
417 nonascii-insert-offset fail to convert unibyte character to a valid
|
|
418 multibyte character. This makes a Latin-1 character. */
|
|
419
|
|
420 #define DEFAULT_NONASCII_INSERT_OFFSET 0x800
|
|
421
|
29004
|
422 /* Parse multibyte string STR of length LENGTH and set BYTES to the
|
|
423 byte length of a character at STR. */
|
|
424
|
|
425 #ifdef BYTE_COMBINING_DEBUG
|
25505
|
426
|
|
427 #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
|
|
428 do { \
|
|
429 int i = 1; \
|
26843
|
430 while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \
|
29004
|
431 (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \
|
|
432 if ((bytes) > i) \
|
|
433 abort (); \
|
25505
|
434 } while (0)
|
21033
|
435
|
29004
|
436 #else /* not BYTE_COMBINING_DEBUG */
|
|
437
|
|
438 #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
|
|
439 (bytes) = BYTES_BY_CHAR_HEAD ((str)[0])
|
19319
|
440
|
29004
|
441 #endif /* not BYTE_COMBINING_DEBUG */
|
|
442
|
|
443 /* Return 1 iff the byte sequence at unibyte string STR (LENGTH bytes)
|
|
444 is valid as a multibyte form. If valid, by a side effect, BYTES is
|
|
445 set to the byte length of the multibyte form. */
|
19319
|
446
|
29004
|
447 #define UNIBYTE_STR_AS_MULTIBYTE_P(str, length, bytes) \
|
29539
|
448 (((str)[0] < 0x80 || (str)[0] >= 0xA0) \
|
|
449 ? (bytes) = 1 \
|
|
450 : (((bytes) = BYTES_BY_CHAR_HEAD ((str)[0])), \
|
|
451 ((str)[0] != LEADING_CODE_8_BIT_CONTROL \
|
29004
|
452 && (bytes) <= (length) \
|
|
453 && !CHAR_HEAD_P ((str)[1]) \
|
|
454 && ((bytes) == 2 \
|
|
455 || (!CHAR_HEAD_P ((str)[2]) \
|
|
456 && ((bytes) == 3 \
|
29539
|
457 || !CHAR_HEAD_P ((str)[3])))))))
|
29004
|
458
|
|
459 /* Return 1 iff the byte sequence at multibyte string STR is valid as
|
|
460 a unibyte form. By a side effect, BYTES is set to the byte length
|
|
461 of one character at STR. */
|
|
462
|
|
463 #define MULTIBYTE_STR_AS_UNIBYTE_P(str, bytes) \
|
|
464 ((bytes) = BYTES_BY_CHAR_HEAD ((str)[0]), \
|
|
465 (str)[0] != LEADING_CODE_8_BIT_CONTROL)
|
17052
|
466
|
19319
|
467 /* The charset of character C is stored in CHARSET, and the
|
|
468 position-codes of C are stored in C1 and C2.
|
25583
|
469 We store -1 in C2 if the dimension of the charset is 1. */
|
19319
|
470
|
30282
|
471 #define SPLIT_CHAR(c, charset, c1, c2) \
|
|
472 (SINGLE_BYTE_CHAR_P (c) \
|
|
473 ? ((charset \
|
|
474 = (ASCII_BYTE_P (c) \
|
|
475 ? CHARSET_ASCII \
|
|
476 : ((c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC))), \
|
|
477 c1 = (c), c2 = -1) \
|
|
478 : ((c) & CHAR_FIELD1_MASK \
|
|
479 ? (charset = (CHAR_FIELD1 (c) \
|
|
480 + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)), \
|
|
481 c1 = CHAR_FIELD2 (c), \
|
|
482 c2 = CHAR_FIELD3 (c)) \
|
|
483 : (charset = CHAR_FIELD2 (c) + 0x70, \
|
|
484 c1 = CHAR_FIELD3 (c), \
|
29004
|
485 c2 = -1)))
|
17052
|
486
|
25505
|
487 /* Return 1 iff character C has valid printable glyph. */
|
29004
|
488 #define CHAR_PRINTABLE_P(c) (ASCII_BYTE_P (c) || char_printable_p (c))
|
25505
|
489
|
19319
|
490 /* The charset of the character at STR is stored in CHARSET, and the
|
|
491 position-codes are stored in C1 and C2.
|
26843
|
492 We store -1 in C2 if the character is just 2 bytes. */
|
19319
|
493
|
26843
|
494 #define SPLIT_STRING(str, len, charset, c1, c2) \
|
|
495 ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2 \
|
|
496 || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len \
|
|
497 || split_string (str, len, &charset, &c1, &c2) < 0) \
|
|
498 ? c1 = *(str), charset = CHARSET_ASCII \
|
17052
|
499 : charset)
|
|
500
|
|
501 /* Mapping table from ISO2022's charset (specified by DIMENSION,
|
|
502 CHARS, and FINAL_CHAR) to Emacs' charset. Should be accessed by
|
|
503 macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */
|
|
504 extern int iso_charset_table[2][2][128];
|
|
505
|
|
506 #define ISO_CHARSET_TABLE(dimension, chars, final_char) \
|
|
507 iso_charset_table[XINT (dimension) - 1][XINT (chars) > 94][XINT (final_char)]
|
|
508
|
|
509 #define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1)
|
|
510
|
23130
|
511 /* Return how many bytes C will occupy in a multibyte buffer. */
|
29004
|
512 #define CHAR_BYTES(c) \
|
|
513 (SINGLE_BYTE_CHAR_P (c) \
|
|
514 ? ((ASCII_BYTE_P (c) || (c) >= 0xA0) ? 1 : 2) \
|
|
515 : char_bytes (c))
|
23130
|
516
|
17052
|
517 /* The following two macros CHAR_STRING and STRING_CHAR are the main
|
|
518 entry points to convert between Emacs two types of character
|
|
519 representations: multi-byte form and single-word form (character
|
|
520 code). */
|
|
521
|
26843
|
522 /* Store multi-byte form of the character C in STR. The caller should
|
29004
|
523 allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in
|
|
524 advance. Returns the length of the multi-byte form. If C is an
|
|
525 invalid character code, signal an error. */
|
17052
|
526
|
26843
|
527 #define CHAR_STRING(c, str) \
|
29004
|
528 (ASCII_BYTE_P (c) \
|
|
529 ? (*(str) = (unsigned char)(c), 1) \
|
|
530 : char_to_string (c, (unsigned char *) str))
|
17052
|
531
|
|
532 /* Return a character code of the character of which multi-byte form
|
|
533 is at STR and the length is LEN. If STR doesn't contain valid
|
|
534 multi-byte form, only the first byte in STR is returned. */
|
|
535
|
23653
|
536 #define STRING_CHAR(str, len) \
|
|
537 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
|
|
538 ? (unsigned char) *(str) \
|
26843
|
539 : string_to_char (str, len, 0))
|
17052
|
540
|
25505
|
541 /* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the
|
|
542 length of the multi-byte form. Just to know the length, use
|
17052
|
543 MULTIBYTE_FORM_LENGTH. */
|
|
544
|
25505
|
545 #define STRING_CHAR_AND_LENGTH(str, len, actual_len) \
|
|
546 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
|
|
547 ? ((actual_len) = 1), (unsigned char) *(str) \
|
26843
|
548 : string_to_char (str, len, &(actual_len)))
|
22184
|
549
|
29004
|
550 /* Fetch the "next" character from Lisp string STRING at byte position
|
|
551 BYTEIDX, character position CHARIDX. Store it into OUTPUT.
|
20589
|
552
|
|
553 All the args must be side-effect-free.
|
|
554 BYTEIDX and CHARIDX must be lvalues;
|
|
555 we increment them past the character fetched. */
|
|
556
|
29004
|
557 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
|
|
558 if (1) \
|
|
559 { \
|
|
560 CHARIDX++; \
|
|
561 if (STRING_MULTIBYTE (STRING)) \
|
|
562 { \
|
|
563 unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
|
|
564 int space_left = XSTRING (STRING)->size_byte - BYTEIDX; \
|
|
565 int actual_len; \
|
|
566 \
|
|
567 OUTPUT = STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \
|
|
568 BYTEIDX += actual_len; \
|
|
569 } \
|
|
570 else \
|
|
571 OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
|
|
572 } \
|
|
573 else
|
|
574
|
|
575 /* Like FETCH_STRING_CHAR_ADVANCE but assume STRING is multibyte. */
|
|
576
|
|
577 #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
|
20589
|
578 if (1) \
|
|
579 { \
|
|
580 unsigned char *fetch_string_char_ptr = &XSTRING (STRING)->data[BYTEIDX]; \
|
|
581 int fetch_string_char_space_left = XSTRING (STRING)->size_byte - BYTEIDX; \
|
|
582 int actual_len; \
|
|
583 \
|
|
584 OUTPUT \
|
|
585 = STRING_CHAR_AND_LENGTH (fetch_string_char_ptr, \
|
|
586 fetch_string_char_space_left, actual_len); \
|
|
587 \
|
|
588 BYTEIDX += actual_len; \
|
|
589 CHARIDX++; \
|
|
590 } \
|
|
591 else
|
|
592
|
29004
|
593 /* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
|
|
594 buffer. */
|
26843
|
595
|
|
596 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
|
|
597 if (1) \
|
|
598 { \
|
|
599 CHARIDX++; \
|
29004
|
600 if (!NILP (current_buffer->enable_multibyte_characters)) \
|
|
601 { \
|
|
602 unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
|
|
603 int space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE) - BYTEIDX); \
|
|
604 int actual_len; \
|
|
605 \
|
|
606 OUTPUT= STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \
|
|
607 BYTEIDX += actual_len; \
|
|
608 } \
|
|
609 else \
|
|
610 { \
|
|
611 OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
|
|
612 BYTEIDX++; \
|
|
613 } \
|
26843
|
614 } \
|
|
615 else
|
|
616
|
17052
|
617 /* Return the length of the multi-byte form at string STR of length LEN. */
|
|
618
|
21444
|
619 #define MULTIBYTE_FORM_LENGTH(str, len) \
|
|
620 (BYTES_BY_CHAR_HEAD (*(unsigned char *)(str)) == 1 \
|
|
621 ? 1 \
|
17052
|
622 : multibyte_form_length (str, len))
|
|
623
|
|
624 #ifdef emacs
|
|
625
|
25505
|
626 /* Increase the buffer byte position POS_BYTE of the current buffer to
|
|
627 the next character boundary. This macro relies on the fact that
|
|
628 *GPT_ADDR and *Z_ADDR are always accessible and the values are
|
|
629 '\0'. No range checking of POS. */
|
29004
|
630
|
|
631 #ifdef BYTE_COMBINING_DEBUG
|
|
632
|
25505
|
633 #define INC_POS(pos_byte) \
|
|
634 do { \
|
|
635 unsigned char *p = BYTE_POS_ADDR (pos_byte); \
|
|
636 if (BASE_LEADING_CODE_P (*p)) \
|
|
637 { \
|
|
638 int len, bytes; \
|
|
639 len = Z_BYTE - pos_byte; \
|
|
640 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
|
|
641 pos_byte += bytes; \
|
|
642 } \
|
|
643 else \
|
|
644 pos_byte++; \
|
17052
|
645 } while (0)
|
|
646
|
29004
|
647 #else /* not BYTE_COMBINING_DEBUG */
|
|
648
|
|
649 #define INC_POS(pos_byte) \
|
|
650 do { \
|
|
651 unsigned char *p = BYTE_POS_ADDR (pos_byte); \
|
|
652 pos_byte += BYTES_BY_CHAR_HEAD (*p); \
|
|
653 } while (0)
|
|
654
|
|
655 #endif /* not BYTE_COMBINING_DEBUG */
|
|
656
|
25505
|
657 /* Decrease the buffer byte position POS_BYTE of the current buffer to
|
|
658 the previous character boundary. No range checking of POS. */
|
|
659 #define DEC_POS(pos_byte) \
|
|
660 do { \
|
|
661 unsigned char *p, *p_min; \
|
|
662 \
|
|
663 pos_byte--; \
|
|
664 if (pos_byte < GPT_BYTE) \
|
|
665 p = BEG_ADDR + pos_byte - 1, p_min = BEG_ADDR; \
|
|
666 else \
|
|
667 p = BEG_ADDR + GAP_SIZE + pos_byte - 1, p_min = GAP_END_ADDR; \
|
|
668 if (p > p_min && !CHAR_HEAD_P (*p)) \
|
|
669 { \
|
|
670 unsigned char *pend = p--; \
|
|
671 int len, bytes; \
|
|
672 while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
|
|
673 len = pend + 1 - p; \
|
|
674 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
|
|
675 if (bytes == len) \
|
|
676 pos_byte -= len - 1; \
|
|
677 } \
|
20531
|
678 } while (0)
|
|
679
|
|
680 /* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
|
|
681
|
20904
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
682 #define INC_BOTH(charpos, bytepos) \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
683 do \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
684 { \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
685 (charpos)++; \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
686 if (NILP (current_buffer->enable_multibyte_characters)) \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
687 (bytepos)++; \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
688 else \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
689 INC_POS ((bytepos)); \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
690 } \
|
20531
|
691 while (0)
|
|
692
|
|
693 /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
|
|
694
|
20904
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
695 #define DEC_BOTH(charpos, bytepos) \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
696 do \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
697 { \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
698 (charpos)--; \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
699 if (NILP (current_buffer->enable_multibyte_characters)) \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
700 (bytepos)--; \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
701 else \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
702 DEC_POS ((bytepos)); \
|
79d73f468e38
(INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
703 } \
|
20531
|
704 while (0)
|
|
705
|
25505
|
706 /* Increase the buffer byte position POS_BYTE of the current buffer to
|
|
707 the next character boundary. This macro relies on the fact that
|
|
708 *GPT_ADDR and *Z_ADDR are always accessible and the values are
|
|
709 '\0'. No range checking of POS_BYTE. */
|
29004
|
710
|
|
711 #ifdef BYTE_COMBINING_DEBUG
|
|
712
|
25505
|
713 #define BUF_INC_POS(buf, pos_byte) \
|
|
714 do { \
|
|
715 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
|
|
716 if (BASE_LEADING_CODE_P (*p)) \
|
|
717 { \
|
|
718 int len, bytes; \
|
|
719 len = BUF_Z_BYTE (buf) - pos_byte; \
|
|
720 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
|
|
721 pos_byte += bytes; \
|
|
722 } \
|
|
723 else \
|
|
724 pos_byte++; \
|
20531
|
725 } while (0)
|
|
726
|
29004
|
727 #else /* not BYTE_COMBINING_DEBUG */
|
|
728
|
|
729 #define BUF_INC_POS(buf, pos_byte) \
|
|
730 do { \
|
|
731 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
|
|
732 pos_byte += BYTES_BY_CHAR_HEAD (*p); \
|
|
733 } while (0)
|
|
734
|
|
735 #endif /* not BYTE_COMBINING_DEBUG */
|
|
736
|
25505
|
737 /* Decrease the buffer byte position POS_BYTE of the current buffer to
|
|
738 the previous character boundary. No range checking of POS_BYTE. */
|
|
739 #define BUF_DEC_POS(buf, pos_byte) \
|
|
740 do { \
|
|
741 unsigned char *p, *p_min; \
|
|
742 pos_byte--; \
|
|
743 if (pos_byte < BUF_GPT_BYTE (buf)) \
|
|
744 { \
|
|
745 p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
|
|
746 p_min = BUF_BEG_ADDR (buf); \
|
|
747 } \
|
|
748 else \
|
|
749 { \
|
|
750 p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
|
|
751 p_min = BUF_GAP_END_ADDR (buf); \
|
|
752 } \
|
|
753 if (p > p_min && !CHAR_HEAD_P (*p)) \
|
|
754 { \
|
|
755 unsigned char *pend = p--; \
|
|
756 int len, bytes; \
|
|
757 while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
|
|
758 len = pend + 1 - p; \
|
|
759 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
|
|
760 if (bytes == len) \
|
|
761 pos_byte -= len - 1; \
|
|
762 } \
|
17052
|
763 } while (0)
|
|
764
|
|
765 #endif /* emacs */
|
|
766
|
26843
|
767 /* This is the maximum byte length of multi-byte sequence. */
|
|
768 #define MAX_MULTIBYTE_LENGTH 4
|
17185
|
769
|
20932
|
770 extern void invalid_character P_ ((int));
|
|
771
|
22120
|
772 extern int translate_char P_ ((Lisp_Object, int, int, int, int));
|
26843
|
773 extern int split_string P_ ((const unsigned char *, int, int *,
|
20308
|
774 unsigned char *, unsigned char *));
|
26843
|
775 extern int char_to_string P_ ((int, unsigned char *));
|
|
776 extern int string_to_char P_ ((const unsigned char *, int, int *));
|
25505
|
777 extern int char_printable_p P_ ((int c));
|
21419
|
778 extern int multibyte_form_length P_ ((const unsigned char *, int));
|
29004
|
779 extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *));
|
|
780 extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
|
|
781 extern int str_to_multibyte P_ ((unsigned char *, int, int));
|
|
782 extern int str_as_unibyte P_ ((unsigned char *, int));
|
20308
|
783 extern int get_charset_id P_ ((Lisp_Object));
|
29004
|
784 extern int find_charset_in_text P_ ((unsigned char *, int, int, int *,
|
|
785 Lisp_Object));
|
20308
|
786 extern int strwidth P_ ((unsigned char *, int));
|
23130
|
787 extern int char_bytes P_ ((int));
|
23810
|
788 extern int char_valid_p P_ ((int, int));
|
17726
|
789
|
22184
|
790 extern Lisp_Object Vtranslation_table_vector;
|
22120
|
791
|
22184
|
792 /* Return a translation table of id number ID. */
|
22120
|
793 #define GET_TRANSLATION_TABLE(id) \
|
25637
|
794 (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
|
20719
|
795
|
23488
|
796 /* A char-table for characters which may invoke auto-filling. */
|
|
797 extern Lisp_Object Vauto_fill_chars;
|
|
798
|
20719
|
799 /* Copy LEN bytes from FROM to TO. This macro should be used only
|
|
800 when a caller knows that LEN is short and the obvious copy loop is
|
29004
|
801 faster than calling bcopy which has some overhead. Copying a
|
|
802 multibyte sequence of a multibyte character is the typical case. */
|
20719
|
803
|
|
804 #define BCOPY_SHORT(from, to, len) \
|
|
805 do { \
|
|
806 int i = len; \
|
25234
|
807 unsigned char *from_p = from, *to_p = to; \
|
29004
|
808 while (i--) *to_p++ = *from_p++; \
|
20719
|
809 } while (0)
|
|
810
|
29570
|
811 #endif /* EMACS_CHARSET_H */
|