Mercurial > emacs
comparison src/charset.h @ 29004:383e4e21306a
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
CHARSET_8_BIT_GRAPHIC): New macros.
(SINGLE_BYTE_CHAR_P): Make it faster by using casting.
(CHARSET_ISO_GRAPHIC_PLANE): Use XINT instead of XFASTINT.
(CHARSET_REVERSE_CHARSET): Likewise.
(CHARSET_VALID_P): Handle new charsets; eight-bit-control and
eight-bit-graphic.
(BYTES_BY_CHAR_HEAD, WIDTH_BY_CHAR_HEAD): Optimize for ASCII.
(CHAR_CHARSET, MAKE_CHAR, SPLIT_CHAR, CHAR_BYTES): Likewise.
(PARSE_MULTIBYTE_SEQ) [BYTE_COMBINING_DEBUG]: Abort if we
encounter an invalid multibyte sequence.
(PARSE_MULTIBYTE_SEQ) [not BYTE_COMBINING_DEBUG]: Assume multibyte
sequence is always valid.
(MAKE_NON_ASCII_CHAR, SPLIT_NON_ASCII_CHAR): These macros Deleted.
(UNIBYTE_STR_AS_MULTIBYTE_P, MULTIBYTE_STR_AS_UNIBYTE_P): New
macros.
(CHAR_STRING): For 8-bit characters, call char_to_string.
(INC_POS) [not BYTE_COMBINING_DEBUG]: Faster version. Assume
multibyte sequence is always valid.
(BUF_INC_POS) [not BYTE_COMBINING_DEBUG]: Likewise.
(parse_str_as_multibyte, str_as_multibyte, str_to_multibyte,
str_as_unibyte): Extern them.
(BCOPY_SHORT): Fix a bug.
(CHAR_LEN): This macro deleted. Callers changed to use
CHAR_BYTES.
(FETCH_STRING_CHAR_ADVANCE): Check multibyteness of STRING.
(FETCH_STRING_CHAR_ADVANCE_NO_CHECK): New macro.
(FETCH_CHAR_ADVANCE): Check multibyteness of the current buffer.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 19 May 2000 23:54:05 +0000 |
parents | 1fec001e68c5 |
children | a6f1d75744d8 |
comparison
equal
deleted
inserted
replaced
29003:72eafb39ec65 | 29004:383e4e21306a |
---|---|
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
20 Boston, MA 02111-1307, USA. */ | 20 Boston, MA 02111-1307, USA. */ |
21 | 21 |
22 #ifndef _CHARSET_H | 22 #ifndef _CHARSET_H |
23 #define _CHARSET_H | 23 #define _CHARSET_H |
24 | |
25 /* #define BYTE_COMBINING_DEBUG */ | |
24 | 26 |
25 /*** GENERAL NOTE on CHARACTER SET (CHARSET) *** | 27 /*** GENERAL NOTE on CHARACTER SET (CHARSET) *** |
26 | 28 |
27 A character set ("charset" hereafter) is a meaningful collection | 29 A character set ("charset" hereafter) is a meaningful collection |
28 (i.e. language, culture, functionality, etc) of characters. Emacs | 30 (i.e. language, culture, functionality, etc) of characters. Emacs |
43 we use the following convention for C variable names: | 45 we use the following convention for C variable names: |
44 charset_symbol: Emacs Lisp symbol of a charset | 46 charset_symbol: Emacs Lisp symbol of a charset |
45 charset_id: Emacs Lisp integer of an identification number of a charset | 47 charset_id: Emacs Lisp integer of an identification number of a charset |
46 charset: C integer of an identification number of a charset | 48 charset: C integer of an identification number of a charset |
47 | 49 |
48 Each charset (except for ASCII) is assigned a base leading-code | 50 Each charset (except for ascii) is assigned a base leading-code |
49 (range 0x80..0x9D). In addition, a charset of greater than 0xA0 | 51 (range 0x80..0x9E). In addition, a charset of greater than 0xA0 |
50 (whose base leading-code is 0x9A..0x9D) is assigned an extended | 52 (whose base leading-code is 0x9A..0x9D) is assigned an extended |
51 leading-code (range 0xA0..0xFE). In this case, each base | 53 leading-code (range 0xA0..0xFE). In this case, each base |
52 leading-code specify the allowable range of extended leading-code as | 54 leading-code specify the allowable range of extended leading-code as |
53 shown in the table below. A leading-code is used to represent a | 55 shown in the table below. A leading-code is used to represent a |
54 character in Emacs' buffer and string. | 56 character in Emacs' buffer and string. |
65 (ASCII) | 67 (ASCII) |
66 0x01..0x7F --never used-- | 68 0x01..0x7F --never used-- |
67 0x80 --never used-- | 69 0x80 --never used-- |
68 0x81..0x8F official dim1 same as charset -- none -- | 70 0x81..0x8F official dim1 same as charset -- none -- |
69 0x90..0x99 official dim2 same as charset -- none -- | 71 0x90..0x99 official dim2 same as charset -- none -- |
70 0x9A..0x9F --never used-- | 72 0x9A..0x9D --never used-- |
73 0x9E official dim1 same as charset -- none -- | |
74 (eight-bit-control) | |
75 0x9F official dim1 -- none -- -- none -- | |
76 (eight-bit-graphic) | |
71 0xA0..0xDF private dim1 0x9A same as charset | 77 0xA0..0xDF private dim1 0x9A same as charset |
72 of 1-column width | 78 of 1-column width |
73 0xE0..0xEF private dim1 0x9B same as charset | 79 0xE0..0xEF private dim1 0x9B same as charset |
74 of 2-column width | 80 of 2-column width |
75 0xF0..0xF4 private dim2 0x9C same as charset | 81 0xF0..0xF4 private dim2 0x9C same as charset |
86 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */ | 92 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */ |
87 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */ | 93 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */ |
88 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */ | 94 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */ |
89 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */ | 95 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */ |
90 | 96 |
97 #define LEADING_CODE_8_BIT_CONTROL 0x9E /* for `eight-bit-control' */ | |
98 | |
91 /* Extended leading-code. */ | 99 /* Extended leading-code. */ |
92 /* Start of each extended leading-codes. */ | 100 /* Start of each extended leading-codes. */ |
93 #define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */ | 101 #define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */ |
94 #define LEADING_CODE_EXT_12 0xE0 /* follows LEADING_CODE_PRIVATE_12 */ | 102 #define LEADING_CODE_EXT_12 0xE0 /* follows LEADING_CODE_PRIVATE_12 */ |
95 #define LEADING_CODE_EXT_21 0xF0 /* follows LEADING_CODE_PRIVATE_21 */ | 103 #define LEADING_CODE_EXT_21 0xF0 /* follows LEADING_CODE_PRIVATE_21 */ |
107 | 115 |
108 /* Maximum value of overall charset identification number. */ | 116 /* Maximum value of overall charset identification number. */ |
109 #define MAX_CHARSET 0xFE | 117 #define MAX_CHARSET 0xFE |
110 | 118 |
111 /* Definition of special charsets. */ | 119 /* Definition of special charsets. */ |
112 #define CHARSET_ASCII 0 | 120 #define CHARSET_ASCII 0 /* 0x00..0x7F */ |
113 | 121 #define CHARSET_8_BIT_CONTROL 0x9E /* 0x80..0x9F */ |
114 extern int charset_ascii; /* ASCII */ | 122 #define CHARSET_8_BIT_GRAPHIC 0x9F /* 0xA0..0xFF */ |
123 | |
115 extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ | 124 extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ |
116 extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ | 125 extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ |
117 extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ | 126 extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ |
118 extern int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */ | 127 extern int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */ |
119 extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */ | 128 extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */ |
120 extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */ | 129 extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */ |
121 extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | 130 extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ |
122 | 131 |
123 /* Check if CH is the head of multi-byte form, i.e., | 132 /* Check if CH is an ASCII character or a base leading-code. |
124 an ASCII character or a base leading-code. */ | 133 Nowadays, any byte can be the first byte of a character in a |
134 multibyte buffer/string. So this macro name is not appropriate. */ | |
125 #define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0) | 135 #define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0) |
126 | 136 |
127 /*** GENERAL NOTE on CHARACTER REPRESENTATION *** | 137 /*** GENERAL NOTE on CHARACTER REPRESENTATION *** |
128 | 138 |
129 At first, the term "character" or "char" is used for a multilingual | 139 At first, the term "character" or "char" is used for a multilingual |
156 character uses FIELD1 to hold charset, FIELD2 and FIELD3 to hold | 166 character uses FIELD1 to hold charset, FIELD2 and FIELD3 to hold |
157 POSITION-CODE-1 and POSITION-CODE-2 respectively. | 167 POSITION-CODE-1 and POSITION-CODE-2 respectively. |
158 | 168 |
159 More precisely... | 169 More precisely... |
160 | 170 |
161 FIELD2 of DIMENSION1 character (except for ASCII) is "charset - 0x70". | 171 FIELD2 of DIMENSION1 character (except for ascii, eight-bit-control, |
162 This is to make all character codes except for ASCII greater than | 172 and eight-bit-graphic) is "charset - 0x70". This is to make all |
163 256 (ASCII's FIELD2 is 0). So, the range of FIELD2 of DIMENSION1 | 173 character codes except for ASCII and 8-bit codes greater than 256. |
164 character is 0 or 0x11..0x7F. | 174 So, the range of FIELD2 of DIMENSION1 character is 0, 1, or |
175 0x11..0x7F. | |
165 | 176 |
166 FIELD1 of DIMENSION2 character is "charset - 0x8F" for official | 177 FIELD1 of DIMENSION2 character is "charset - 0x8F" for official |
167 charset and "charset - 0xE0" for private charset. So, the range of | 178 charset and "charset - 0xE0" for private charset. So, the range of |
168 FIELD1 of DIMENSION2 character is 0x01..0x1E. | 179 FIELD1 of DIMENSION2 character is 0x01..0x1E. |
169 | 180 |
170 ----------------------------------------------------------------------- | 181 ----------------------------------------------------------------------------- |
171 charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit) | 182 charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit) |
172 ----------------------------------------------------------------------- | 183 ----------------------------------------------------------------------------- |
173 ASCII 0 0 POSITION-CODE-1 | 184 ascii 0 0 0x00..0x7F |
174 DIMENSION1 0 charset - 0x70 POSITION-CODE-1 | 185 eight-bit-control 0 1 0x00..0x1F |
175 DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2 | 186 eight-bit-graphic 0 1 0x20..0x7F |
176 DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2 | 187 DIMENSION1 0 charset - 0x70 POSITION-CODE-1 |
177 ----------------------------------------------------------------------- | 188 DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2 |
189 DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2 | |
190 ----------------------------------------------------------------------------- | |
178 "(o)": official, "(p)": private | 191 "(o)": official, "(p)": private |
179 ----------------------------------------------------------------------- | 192 ----------------------------------------------------------------------------- |
180 | |
181 */ | 193 */ |
182 | 194 |
183 /* Masks of each field of character code. */ | 195 /* Masks of each field of character code. */ |
184 #define CHAR_FIELD1_MASK (0x1F << 14) | 196 #define CHAR_FIELD1_MASK (0x1F << 14) |
185 #define CHAR_FIELD2_MASK (0x7F << 7) | 197 #define CHAR_FIELD2_MASK (0x7F << 7) |
200 #define MIN_CHAR_PRIVATE_DIMENSION2 \ | 212 #define MIN_CHAR_PRIVATE_DIMENSION2 \ |
201 ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14) | 213 ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14) |
202 /* Maximum character code currently used plus 1. */ | 214 /* Maximum character code currently used plus 1. */ |
203 #define MAX_CHAR (0x1F << 14) | 215 #define MAX_CHAR (0x1F << 14) |
204 | 216 |
205 /* 1 if C is an ASCII character, else 0. */ | 217 /* 1 if C is a single byte character, else 0. */ |
206 #define SINGLE_BYTE_CHAR_P(c) ((c) >= 0 && (c) < 0x100) | 218 #define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100) |
207 | 219 |
208 /* 1 if BYTE is a character in itself, in multibyte mode. */ | 220 /* 1 if BYTE is an ASCII character in itself, in multibyte mode. */ |
209 #define ASCII_BYTE_P(byte) ((byte) < 0x80) | 221 #define ASCII_BYTE_P(byte) ((byte) < 0x80) |
210 | 222 |
211 /* A char-table containing information of each character set. | 223 /* A char-table containing information of each character set. |
212 | 224 |
213 Unlike ordinary char-tables, this doesn't contain any nested table. | 225 Unlike ordinary char-tables, this doesn't contain any nested table. |
227 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2. | 239 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2. |
228 | 240 |
229 CHARS (integer) is the number of characters in a dimension: 94 or 96. | 241 CHARS (integer) is the number of characters in a dimension: 94 or 96. |
230 | 242 |
231 WIDTH (integer) is the number of columns a character in the charset | 243 WIDTH (integer) is the number of columns a character in the charset |
232 occupies on the screen: one of 0, 1, and 2. | 244 occupies on the screen: one of 0, 1, and 2.. |
233 | 245 |
234 DIRECTION (integer) is the rendering direction of characters in the | 246 DIRECTION (integer) is the rendering direction of characters in the |
235 charset when rendering. If 0, render from left to right, else | 247 charset when rendering. If 0, render from left to right, else |
236 render from right to left. | 248 render from right to left. |
237 | 249 |
240 | 252 |
241 LEADING-CODE-EXT (integer) is the extended leading-code for the | 253 LEADING-CODE-EXT (integer) is the extended leading-code for the |
242 charset. All charsets of less than 0xA0 has the value 0. | 254 charset. All charsets of less than 0xA0 has the value 0. |
243 | 255 |
244 ISO-FINAL-CHAR (character) is the final character of the | 256 ISO-FINAL-CHAR (character) is the final character of the |
245 corresponding ISO 2022 charset. | 257 corresponding ISO 2022 charset. It is -1 for such a character |
258 that is used only internally (e.g. `eight-bit-control'). | |
246 | 259 |
247 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked | 260 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked |
248 while encoding to variants of ISO 2022 coding system, one of the | 261 while encoding to variants of ISO 2022 coding system, one of the |
249 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). | 262 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). It |
263 is -1 for such a character that is used only internally | |
264 (e.g. `eight-bit-control'). | |
250 | 265 |
251 REVERSE-CHARSET (integer) is the charset which differs only in | 266 REVERSE-CHARSET (integer) is the charset which differs only in |
252 LEFT-TO-RIGHT value from the charset. If there's no such a | 267 LEFT-TO-RIGHT value from the charset. If there's no such a |
253 charset, the value is -1. | 268 charset, the value is -1. |
254 | 269 |
307 #define CHARSET_LEADING_CODE_BASE(charset) \ | 322 #define CHARSET_LEADING_CODE_BASE(charset) \ |
308 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)) | 323 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)) |
309 #define CHARSET_LEADING_CODE_EXT(charset) \ | 324 #define CHARSET_LEADING_CODE_EXT(charset) \ |
310 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)) | 325 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)) |
311 #define CHARSET_ISO_FINAL_CHAR(charset) \ | 326 #define CHARSET_ISO_FINAL_CHAR(charset) \ |
312 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX)) | 327 XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX)) |
313 #define CHARSET_ISO_GRAPHIC_PLANE(charset) \ | 328 #define CHARSET_ISO_GRAPHIC_PLANE(charset) \ |
314 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)) | 329 XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)) |
315 #define CHARSET_REVERSE_CHARSET(charset) \ | 330 #define CHARSET_REVERSE_CHARSET(charset) \ |
316 XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)) | 331 XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)) |
317 | 332 |
318 /* Macros to specify direction of a charset. */ | 333 /* Macros to specify direction of a charset. */ |
319 #define CHARSET_DIRECTION_LEFT_TO_RIGHT 0 | 334 #define CHARSET_DIRECTION_LEFT_TO_RIGHT 0 |
329 | 344 |
330 /* 1 if CHARSET is in valid value range, else 0. */ | 345 /* 1 if CHARSET is in valid value range, else 0. */ |
331 #define CHARSET_VALID_P(charset) \ | 346 #define CHARSET_VALID_P(charset) \ |
332 ((charset) == 0 \ | 347 ((charset) == 0 \ |
333 || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \ | 348 || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \ |
334 || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET)) | 349 || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 \ |
350 && (charset) <= MAX_CHARSET) \ | |
351 || ((charset) == CHARSET_8_BIT_CONTROL) \ | |
352 || ((charset) == CHARSET_8_BIT_GRAPHIC)) | |
335 | 353 |
336 /* 1 if CHARSET is already defined, else 0. */ | 354 /* 1 if CHARSET is already defined, else 0. */ |
337 #define CHARSET_DEFINED_P(charset) \ | 355 #define CHARSET_DEFINED_P(charset) \ |
338 (((charset) >= 0) && ((charset) <= MAX_CHARSET) \ | 356 (((charset) >= 0) && ((charset) <= MAX_CHARSET) \ |
339 && !NILP (CHARSET_TABLE_ENTRY (charset))) | 357 && !NILP (CHARSET_TABLE_ENTRY (charset))) |
340 | 358 |
341 /* Since the information CHARSET-BYTES and CHARSET-WIDTH of | 359 /* Since the information CHARSET-BYTES and CHARSET-WIDTH of |
342 Vcharset_table can be retrieved only the first byte of | 360 Vcharset_table can be retrieved only by the first byte of |
343 multi-byte form (an ASCII code or a base leading-code), we provide | 361 multi-byte form (an ASCII code or a base leading-code), we provide |
344 here tables to be used by macros BYTES_BY_CHAR_HEAD and | 362 here tables to be used by macros BYTES_BY_CHAR_HEAD and |
345 WIDTH_BY_CHAR_HEAD for faster information retrieval. */ | 363 WIDTH_BY_CHAR_HEAD for faster information retrieval. */ |
346 extern int bytes_by_char_head[256]; | 364 extern int bytes_by_char_head[256]; |
347 extern int width_by_char_head[256]; | 365 extern int width_by_char_head[256]; |
348 | 366 |
349 #define BYTES_BY_CHAR_HEAD(char_head) bytes_by_char_head[char_head] | 367 #define BYTES_BY_CHAR_HEAD(char_head) \ |
350 #define WIDTH_BY_CHAR_HEAD(char_head) width_by_char_head[char_head] | 368 (ASCII_BYTE_P (char_head) ? 1 : bytes_by_char_head[char_head]) |
369 #define WIDTH_BY_CHAR_HEAD(char_head) \ | |
370 (ASCII_BYTE_P (char_head) ? 1 : width_by_char_head[char_head]) | |
351 | 371 |
352 /* Charset of the character C. */ | 372 /* Charset of the character C. */ |
353 #define CHAR_CHARSET(c) \ | 373 #define CHAR_CHARSET(c) \ |
354 (SINGLE_BYTE_CHAR_P (c) \ | 374 (SINGLE_BYTE_CHAR_P (c) \ |
355 ? CHARSET_ASCII \ | 375 ? (ASCII_BYTE_P (c) \ |
356 : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \ | 376 ? CHARSET_ASCII \ |
357 ? CHAR_FIELD2 (c) + 0x70 \ | 377 : (c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC) \ |
358 : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \ | 378 : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \ |
359 ? CHAR_FIELD1 (c) + 0x8F \ | 379 ? CHAR_FIELD2 (c) + 0x70 \ |
380 : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \ | |
381 ? CHAR_FIELD1 (c) + 0x8F \ | |
360 : CHAR_FIELD1 (c) + 0xE0))) | 382 : CHAR_FIELD1 (c) + 0xE0))) |
361 | 383 |
362 /* Return charset at the place pointed by P. */ | |
363 #define CHARSET_AT(p) \ | |
364 (*(p) < 0x80 \ | |
365 ? CHARSET_ASCII \ | |
366 : (*(p) < LEADING_CODE_PRIVATE_11 \ | |
367 ? (int)*(p) \ | |
368 : (*(p) <= LEADING_CODE_PRIVATE_22 \ | |
369 ? (int)*((p) + 1) \ | |
370 : -1))) | |
371 | |
372 /* Same as `CHARSET_AT ()' but perhaps runs faster because of an | |
373 additional argument C which is the code (byte) at P. */ | |
374 #define FIRST_CHARSET_AT(p, c) \ | |
375 ((c) < 0x80 \ | |
376 ? CHARSET_ASCII \ | |
377 : ((c) < LEADING_CODE_PRIVATE_11 \ | |
378 ? (int)(c) \ | |
379 : ((c) <= LEADING_CODE_PRIVATE_22 \ | |
380 ? (int)*((p) + 1) \ | |
381 : -1))) | |
382 | |
383 /* Check if two characters C1 and C2 belong to the same charset. */ | 384 /* Check if two characters C1 and C2 belong to the same charset. */ |
384 #define SAME_CHARSET_P(c1, c2) \ | 385 #define SAME_CHARSET_P(c1, c2) \ |
385 (SINGLE_BYTE_CHAR_P (c1) \ | 386 (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \ |
386 ? SINGLE_BYTE_CHAR_P (c2) \ | 387 ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \ |
387 : (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \ | 388 : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK)) |
388 ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \ | |
389 : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK))) | |
390 | |
391 /* Return a non-ASCII character of which charset is CHARSET and | |
392 position-codes are C1 and C2. DIMENSION1 character ignores C2. */ | |
393 #define MAKE_NON_ASCII_CHAR(charset, c1, c2) \ | |
394 (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \ | |
395 ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1)) \ | |
396 : ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 \ | |
397 ? ((((charset) - 0x8F) << 14) \ | |
398 | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))) \ | |
399 : ((((charset) - 0xE0) << 14) \ | |
400 | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))))) | |
401 | 389 |
402 /* Return a character of which charset is CHARSET and position-codes | 390 /* Return a character of which charset is CHARSET and position-codes |
403 are C1 and C2. DIMENSION1 character ignores C2. */ | 391 are C1 and C2. DIMENSION1 character ignores C2. */ |
404 #define MAKE_CHAR(charset, c1, c2) \ | 392 #define MAKE_CHAR(charset, c1, c2) \ |
405 ((charset) == CHARSET_ASCII \ | 393 ((charset) == CHARSET_ASCII \ |
406 ? (c1) \ | 394 ? (c1) & 0x7F \ |
407 : MAKE_NON_ASCII_CHAR ((charset), (c1), (c2))) | 395 : (((charset) == CHARSET_8_BIT_CONTROL \ |
396 || (charset) == CHARSET_8_BIT_GRAPHIC) \ | |
397 ? ((c1) & 0x7F) | 0x80 \ | |
398 : (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \ | |
399 ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1)) \ | |
400 : ((((charset) \ | |
401 - ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \ | |
402 << 14) \ | |
403 | ((c2) <= 0 ? 0 : ((c2) & 0x7F)) \ | |
404 | ((c1) <= 0 ? 0 : (((c1) & 0x7F) << 7)))))) | |
405 | |
408 | 406 |
409 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or | 407 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or |
410 generic character. If GENERICP is zero, return nonzero iff C is a | 408 generic character. If GENERICP is zero, return nonzero iff C is a |
411 valid normal character. */ | 409 valid normal character. */ |
412 #define CHAR_VALID_P(c, genericp) \ | 410 #define CHAR_VALID_P(c, genericp) \ |
417 nonascii-insert-offset fail to convert unibyte character to a valid | 415 nonascii-insert-offset fail to convert unibyte character to a valid |
418 multibyte character. This makes a Latin-1 character. */ | 416 multibyte character. This makes a Latin-1 character. */ |
419 | 417 |
420 #define DEFAULT_NONASCII_INSERT_OFFSET 0x800 | 418 #define DEFAULT_NONASCII_INSERT_OFFSET 0x800 |
421 | 419 |
422 /* Parse string STR of length LENGTH and check if a multibyte | 420 /* Parse multibyte string STR of length LENGTH and set BYTES to the |
423 characters is at STR. If so, set BYTES for that character, else | 421 byte length of a character at STR. */ |
424 set BYTES to 1. */ | 422 |
423 #ifdef BYTE_COMBINING_DEBUG | |
425 | 424 |
426 #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ | 425 #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ |
427 do { \ | 426 do { \ |
428 int i = 1; \ | 427 int i = 1; \ |
429 while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \ | 428 while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \ |
430 if (i == 1) \ | 429 (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \ |
431 (bytes) = 1; \ | 430 if ((bytes) > i) \ |
432 else \ | 431 abort (); \ |
433 { \ | |
434 (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \ | |
435 if ((bytes) > (length)) \ | |
436 (bytes) = (length); \ | |
437 } \ | |
438 } while (0) | 432 } while (0) |
439 | 433 |
440 /* The charset of non-ASCII character C is stored in CHARSET, and the | 434 #else /* not BYTE_COMBINING_DEBUG */ |
441 position-codes of C are stored in C1 and C2. | 435 |
442 We store -1 in C2 if the character is just 2 bytes. | 436 #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ |
443 | 437 (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]) |
444 Do not use this macro for an ASCII character. */ | 438 |
445 | 439 #endif /* not BYTE_COMBINING_DEBUG */ |
446 #define SPLIT_NON_ASCII_CHAR(c, charset, c1, c2) \ | 440 |
447 ((c) & CHAR_FIELD1_MASK \ | 441 /* Return 1 iff the byte sequence at unibyte string STR (LENGTH bytes) |
448 ? (charset = (CHAR_FIELD1 (c) \ | 442 is valid as a multibyte form. If valid, by a side effect, BYTES is |
449 + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)), \ | 443 set to the byte length of the multibyte form. */ |
450 c1 = CHAR_FIELD2 (c), \ | 444 |
451 c2 = CHAR_FIELD3 (c)) \ | 445 #define UNIBYTE_STR_AS_MULTIBYTE_P(str, length, bytes) \ |
452 : (charset = CHAR_FIELD2 (c) + 0x70, \ | 446 (((bytes) = BYTES_BY_CHAR_HEAD ((str)[0])) == 1 \ |
453 c1 = CHAR_FIELD3 (c), \ | 447 || ((str)[0] != LEADING_CODE_8_BIT_CONTROL \ |
454 c2 = -1)) | 448 && (bytes) <= (length) \ |
449 && !CHAR_HEAD_P ((str)[1]) \ | |
450 && ((bytes) == 2 \ | |
451 || (!CHAR_HEAD_P ((str)[2]) \ | |
452 && ((bytes) == 3 \ | |
453 || !CHAR_HEAD_P ((str)[3])))))) | |
454 | |
455 /* Return 1 iff the byte sequence at multibyte string STR is valid as | |
456 a unibyte form. By a side effect, BYTES is set to the byte length | |
457 of one character at STR. */ | |
458 | |
459 #define MULTIBYTE_STR_AS_UNIBYTE_P(str, bytes) \ | |
460 ((bytes) = BYTES_BY_CHAR_HEAD ((str)[0]), \ | |
461 (str)[0] != LEADING_CODE_8_BIT_CONTROL) | |
455 | 462 |
456 /* The charset of character C is stored in CHARSET, and the | 463 /* The charset of character C is stored in CHARSET, and the |
457 position-codes of C are stored in C1 and C2. | 464 position-codes of C are stored in C1 and C2. |
458 We store -1 in C2 if the dimension of the charset is 1. */ | 465 We store -1 in C2 if the dimension of the charset is 1. */ |
459 | 466 |
460 #define SPLIT_CHAR(c, charset, c1, c2) \ | 467 #define SPLIT_CHAR(c, charset, c1, c2) \ |
461 (SINGLE_BYTE_CHAR_P (c) \ | 468 (SINGLE_BYTE_CHAR_P (c) \ |
462 ? charset = CHARSET_ASCII, c1 = (c), c2 = -1 \ | 469 ? ((charset = ASCII_BYTE_P (c) \ |
463 : SPLIT_NON_ASCII_CHAR (c, charset, c1, c2)) | 470 ? CHARSET_ASCII \ |
471 : (c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC), \ | |
472 c1 = (c), c2 = -1) \ | |
473 : ((c) & CHAR_FIELD1_MASK \ | |
474 ? (charset = (CHAR_FIELD1 (c) \ | |
475 + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)), \ | |
476 c1 = CHAR_FIELD2 (c), \ | |
477 c2 = CHAR_FIELD3 (c)) \ | |
478 : (charset = CHAR_FIELD2 (c) + 0x70, \ | |
479 c1 = CHAR_FIELD3 (c), \ | |
480 c2 = -1))) | |
464 | 481 |
465 /* Return 1 iff character C has valid printable glyph. */ | 482 /* Return 1 iff character C has valid printable glyph. */ |
466 #define CHAR_PRINTABLE_P(c) \ | 483 #define CHAR_PRINTABLE_P(c) (ASCII_BYTE_P (c) || char_printable_p (c)) |
467 (SINGLE_BYTE_CHAR_P (c) \ | |
468 || char_printable_p (c)) | |
469 | 484 |
470 /* The charset of the character at STR is stored in CHARSET, and the | 485 /* The charset of the character at STR is stored in CHARSET, and the |
471 position-codes are stored in C1 and C2. | 486 position-codes are stored in C1 and C2. |
472 We store -1 in C2 if the character is just 2 bytes. */ | 487 We store -1 in C2 if the character is just 2 bytes. */ |
473 | 488 |
487 iso_charset_table[XINT (dimension) - 1][XINT (chars) > 94][XINT (final_char)] | 502 iso_charset_table[XINT (dimension) - 1][XINT (chars) > 94][XINT (final_char)] |
488 | 503 |
489 #define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1) | 504 #define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1) |
490 | 505 |
491 /* Return how many bytes C will occupy in a multibyte buffer. */ | 506 /* Return how many bytes C will occupy in a multibyte buffer. */ |
492 #define CHAR_BYTES(c) \ | 507 #define CHAR_BYTES(c) \ |
493 ((SINGLE_BYTE_CHAR_P ((c)) || ((c) & ~((1 << CHARACTERBITS) - 1))) \ | 508 (SINGLE_BYTE_CHAR_P (c) \ |
494 ? 1 : char_bytes (c)) | 509 ? ((ASCII_BYTE_P (c) || (c) >= 0xA0) ? 1 : 2) \ |
510 : char_bytes (c)) | |
495 | 511 |
496 /* The following two macros CHAR_STRING and STRING_CHAR are the main | 512 /* The following two macros CHAR_STRING and STRING_CHAR are the main |
497 entry points to convert between Emacs two types of character | 513 entry points to convert between Emacs two types of character |
498 representations: multi-byte form and single-word form (character | 514 representations: multi-byte form and single-word form (character |
499 code). */ | 515 code). */ |
500 | 516 |
501 /* Store multi-byte form of the character C in STR. The caller should | 517 /* Store multi-byte form of the character C in STR. The caller should |
502 allocate at least 4-byte area at STR in advance. Returns the | 518 allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in |
503 length of the multi-byte form. If C is an invalid character code, | 519 advance. Returns the length of the multi-byte form. If C is an |
504 signal an error. */ | 520 invalid character code, signal an error. */ |
505 | 521 |
506 #define CHAR_STRING(c, str) \ | 522 #define CHAR_STRING(c, str) \ |
507 (SINGLE_BYTE_CHAR_P (c) \ | 523 (ASCII_BYTE_P (c) \ |
508 ? *(str) = (unsigned char)(c), 1 \ | 524 ? (*(str) = (unsigned char)(c), 1) \ |
509 : char_to_string (c, (unsigned char *)str)) | 525 : char_to_string (c, (unsigned char *) str)) |
510 | 526 |
511 /* Return a character code of the character of which multi-byte form | 527 /* Return a character code of the character of which multi-byte form |
512 is at STR and the length is LEN. If STR doesn't contain valid | 528 is at STR and the length is LEN. If STR doesn't contain valid |
513 multi-byte form, only the first byte in STR is returned. */ | 529 multi-byte form, only the first byte in STR is returned. */ |
514 | 530 |
524 #define STRING_CHAR_AND_LENGTH(str, len, actual_len) \ | 540 #define STRING_CHAR_AND_LENGTH(str, len, actual_len) \ |
525 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \ | 541 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \ |
526 ? ((actual_len) = 1), (unsigned char) *(str) \ | 542 ? ((actual_len) = 1), (unsigned char) *(str) \ |
527 : string_to_char (str, len, &(actual_len))) | 543 : string_to_char (str, len, &(actual_len))) |
528 | 544 |
529 /* Fetch the "next" multibyte character from Lisp string STRING | 545 /* Fetch the "next" character from Lisp string STRING at byte position |
530 at byte position BYTEIDX, character position CHARIDX. | 546 BYTEIDX, character position CHARIDX. Store it into OUTPUT. |
531 Store it into OUTPUT. | |
532 | 547 |
533 All the args must be side-effect-free. | 548 All the args must be side-effect-free. |
534 BYTEIDX and CHARIDX must be lvalues; | 549 BYTEIDX and CHARIDX must be lvalues; |
535 we increment them past the character fetched. */ | 550 we increment them past the character fetched. */ |
536 | 551 |
537 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \ | 552 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \ |
553 if (1) \ | |
554 { \ | |
555 CHARIDX++; \ | |
556 if (STRING_MULTIBYTE (STRING)) \ | |
557 { \ | |
558 unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \ | |
559 int space_left = XSTRING (STRING)->size_byte - BYTEIDX; \ | |
560 int actual_len; \ | |
561 \ | |
562 OUTPUT = STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \ | |
563 BYTEIDX += actual_len; \ | |
564 } \ | |
565 else \ | |
566 OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \ | |
567 } \ | |
568 else | |
569 | |
570 /* Like FETCH_STRING_CHAR_ADVANCE but assume STRING is multibyte. */ | |
571 | |
572 #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \ | |
538 if (1) \ | 573 if (1) \ |
539 { \ | 574 { \ |
540 unsigned char *fetch_string_char_ptr = &XSTRING (STRING)->data[BYTEIDX]; \ | 575 unsigned char *fetch_string_char_ptr = &XSTRING (STRING)->data[BYTEIDX]; \ |
541 int fetch_string_char_space_left = XSTRING (STRING)->size_byte - BYTEIDX; \ | 576 int fetch_string_char_space_left = XSTRING (STRING)->size_byte - BYTEIDX; \ |
542 int actual_len; \ | 577 int actual_len; \ |
548 BYTEIDX += actual_len; \ | 583 BYTEIDX += actual_len; \ |
549 CHARIDX++; \ | 584 CHARIDX++; \ |
550 } \ | 585 } \ |
551 else | 586 else |
552 | 587 |
553 /* Like FETCH_STRING_CHAR_SPACE_LEFT but fetch character from the | 588 /* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current |
554 current buffer. */ | 589 buffer. */ |
555 | 590 |
556 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \ | 591 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \ |
557 if (1) \ | 592 if (1) \ |
558 { \ | 593 { \ |
559 unsigned char *fetch_buf_char_ptr = BYTE_POS_ADDR (BYTEIDX); \ | |
560 int fetch_buf_char_space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE) \ | |
561 - BYTEIDX); \ | |
562 int actual_len; \ | |
563 \ | |
564 OUTPUT \ | |
565 = STRING_CHAR_AND_LENGTH (fetch_buf_char_ptr, \ | |
566 fetch_buf_char_space_left, actual_len); \ | |
567 \ | |
568 BYTEIDX += actual_len; \ | |
569 CHARIDX++; \ | 594 CHARIDX++; \ |
595 if (!NILP (current_buffer->enable_multibyte_characters)) \ | |
596 { \ | |
597 unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \ | |
598 int space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE) - BYTEIDX); \ | |
599 int actual_len; \ | |
600 \ | |
601 OUTPUT= STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \ | |
602 BYTEIDX += actual_len; \ | |
603 } \ | |
604 else \ | |
605 { \ | |
606 OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \ | |
607 BYTEIDX++; \ | |
608 } \ | |
570 } \ | 609 } \ |
571 else | 610 else |
572 | 611 |
573 /* Return the length of the multi-byte form at string STR of length LEN. */ | 612 /* Return the length of the multi-byte form at string STR of length LEN. */ |
574 | 613 |
581 | 620 |
582 /* Increase the buffer byte position POS_BYTE of the current buffer to | 621 /* Increase the buffer byte position POS_BYTE of the current buffer to |
583 the next character boundary. This macro relies on the fact that | 622 the next character boundary. This macro relies on the fact that |
584 *GPT_ADDR and *Z_ADDR are always accessible and the values are | 623 *GPT_ADDR and *Z_ADDR are always accessible and the values are |
585 '\0'. No range checking of POS. */ | 624 '\0'. No range checking of POS. */ |
625 | |
626 #ifdef BYTE_COMBINING_DEBUG | |
627 | |
586 #define INC_POS(pos_byte) \ | 628 #define INC_POS(pos_byte) \ |
587 do { \ | 629 do { \ |
588 unsigned char *p = BYTE_POS_ADDR (pos_byte); \ | 630 unsigned char *p = BYTE_POS_ADDR (pos_byte); \ |
589 if (BASE_LEADING_CODE_P (*p)) \ | 631 if (BASE_LEADING_CODE_P (*p)) \ |
590 { \ | 632 { \ |
594 pos_byte += bytes; \ | 636 pos_byte += bytes; \ |
595 } \ | 637 } \ |
596 else \ | 638 else \ |
597 pos_byte++; \ | 639 pos_byte++; \ |
598 } while (0) | 640 } while (0) |
641 | |
642 #else /* not BYTE_COMBINING_DEBUG */ | |
643 | |
644 #define INC_POS(pos_byte) \ | |
645 do { \ | |
646 unsigned char *p = BYTE_POS_ADDR (pos_byte); \ | |
647 pos_byte += BYTES_BY_CHAR_HEAD (*p); \ | |
648 } while (0) | |
649 | |
650 #endif /* not BYTE_COMBINING_DEBUG */ | |
599 | 651 |
600 /* Decrease the buffer byte position POS_BYTE of the current buffer to | 652 /* Decrease the buffer byte position POS_BYTE of the current buffer to |
601 the previous character boundary. No range checking of POS. */ | 653 the previous character boundary. No range checking of POS. */ |
602 #define DEC_POS(pos_byte) \ | 654 #define DEC_POS(pos_byte) \ |
603 do { \ | 655 do { \ |
648 | 700 |
649 /* Increase the buffer byte position POS_BYTE of the current buffer to | 701 /* Increase the buffer byte position POS_BYTE of the current buffer to |
650 the next character boundary. This macro relies on the fact that | 702 the next character boundary. This macro relies on the fact that |
651 *GPT_ADDR and *Z_ADDR are always accessible and the values are | 703 *GPT_ADDR and *Z_ADDR are always accessible and the values are |
652 '\0'. No range checking of POS_BYTE. */ | 704 '\0'. No range checking of POS_BYTE. */ |
705 | |
706 #ifdef BYTE_COMBINING_DEBUG | |
707 | |
653 #define BUF_INC_POS(buf, pos_byte) \ | 708 #define BUF_INC_POS(buf, pos_byte) \ |
654 do { \ | 709 do { \ |
655 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \ | 710 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \ |
656 if (BASE_LEADING_CODE_P (*p)) \ | 711 if (BASE_LEADING_CODE_P (*p)) \ |
657 { \ | 712 { \ |
661 pos_byte += bytes; \ | 716 pos_byte += bytes; \ |
662 } \ | 717 } \ |
663 else \ | 718 else \ |
664 pos_byte++; \ | 719 pos_byte++; \ |
665 } while (0) | 720 } while (0) |
721 | |
722 #else /* not BYTE_COMBINING_DEBUG */ | |
723 | |
724 #define BUF_INC_POS(buf, pos_byte) \ | |
725 do { \ | |
726 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \ | |
727 pos_byte += BYTES_BY_CHAR_HEAD (*p); \ | |
728 } while (0) | |
729 | |
730 #endif /* not BYTE_COMBINING_DEBUG */ | |
666 | 731 |
667 /* Decrease the buffer byte position POS_BYTE of the current buffer to | 732 /* Decrease the buffer byte position POS_BYTE of the current buffer to |
668 the previous character boundary. No range checking of POS_BYTE. */ | 733 the previous character boundary. No range checking of POS_BYTE. */ |
669 #define BUF_DEC_POS(buf, pos_byte) \ | 734 #define BUF_DEC_POS(buf, pos_byte) \ |
670 do { \ | 735 do { \ |
704 unsigned char *, unsigned char *)); | 769 unsigned char *, unsigned char *)); |
705 extern int char_to_string P_ ((int, unsigned char *)); | 770 extern int char_to_string P_ ((int, unsigned char *)); |
706 extern int string_to_char P_ ((const unsigned char *, int, int *)); | 771 extern int string_to_char P_ ((const unsigned char *, int, int *)); |
707 extern int char_printable_p P_ ((int c)); | 772 extern int char_printable_p P_ ((int c)); |
708 extern int multibyte_form_length P_ ((const unsigned char *, int)); | 773 extern int multibyte_form_length P_ ((const unsigned char *, int)); |
774 extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *)); | |
775 extern int str_as_multibyte P_ ((unsigned char *, int, int, int *)); | |
776 extern int str_to_multibyte P_ ((unsigned char *, int, int)); | |
777 extern int str_as_unibyte P_ ((unsigned char *, int)); | |
709 extern int get_charset_id P_ ((Lisp_Object)); | 778 extern int get_charset_id P_ ((Lisp_Object)); |
710 extern int find_charset_in_str P_ ((unsigned char *, int, int *, | 779 extern int find_charset_in_text P_ ((unsigned char *, int, int, int *, |
711 Lisp_Object, int)); | 780 Lisp_Object)); |
712 extern int strwidth P_ ((unsigned char *, int)); | 781 extern int strwidth P_ ((unsigned char *, int)); |
713 extern int char_bytes P_ ((int)); | 782 extern int char_bytes P_ ((int)); |
714 extern int char_valid_p P_ ((int, int)); | 783 extern int char_valid_p P_ ((int, int)); |
715 | 784 |
716 extern Lisp_Object Vtranslation_table_vector; | 785 extern Lisp_Object Vtranslation_table_vector; |
722 /* A char-table for characters which may invoke auto-filling. */ | 791 /* A char-table for characters which may invoke auto-filling. */ |
723 extern Lisp_Object Vauto_fill_chars; | 792 extern Lisp_Object Vauto_fill_chars; |
724 | 793 |
725 /* Copy LEN bytes from FROM to TO. This macro should be used only | 794 /* Copy LEN bytes from FROM to TO. This macro should be used only |
726 when a caller knows that LEN is short and the obvious copy loop is | 795 when a caller knows that LEN is short and the obvious copy loop is |
727 faster than calling bcopy which has some overhead. */ | 796 faster than calling bcopy which has some overhead. Copying a |
797 multibyte sequence of a multibyte character is the typical case. */ | |
728 | 798 |
729 #define BCOPY_SHORT(from, to, len) \ | 799 #define BCOPY_SHORT(from, to, len) \ |
730 do { \ | 800 do { \ |
731 int i = len; \ | 801 int i = len; \ |
732 unsigned char *from_p = from, *to_p = to; \ | 802 unsigned char *from_p = from, *to_p = to; \ |
733 while (i--) *from_p++ = *to_p++; \ | 803 while (i--) *to_p++ = *from_p++; \ |
734 } while (0) | 804 } while (0) |
735 | 805 |
736 /* Length of C in bytes. */ | |
737 | |
738 #define CHAR_LEN(C) CHARSET_BYTES (CHAR_CHARSET ((C))) | |
739 | |
740 #endif /* _CHARSET_H */ | 806 #endif /* _CHARSET_H */ |