Mercurial > emacs
annotate src/character.h @ 99492:ee792794d888
(isearch-search-fun): Compare the length of the
current search string with the length of the string from the
previous search state to detect the situation when the user
adds or removes characters in the search string.
Use word-search-forward-lax and word-search-backward-lax in this
case, and otherwise word-search-forward and word-search-backward.
author | Juri Linkov <juri@jurta.org> |
---|---|
date | Tue, 11 Nov 2008 19:43:09 +0000 |
parents | d4e07000ca4c |
children | 33f97c9e6889 |
rev | line source |
---|---|
88363 | 1 /* Header for multibyte character handler. |
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. | |
89483 | 3 Licensed to the Free Software Foundation. |
91444
1d067b286f05
Update copyright years and GPL version.
Glenn Morris <rgm@gnu.org>
parents:
90798
diff
changeset
|
4 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 |
88363 | 5 National Institute of Advanced Industrial Science and Technology (AIST) |
6 Registration Number H13PRO009 | |
7 | |
8 This file is part of GNU Emacs. | |
9 | |
94994
29adfc9354e7
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91805
diff
changeset
|
10 GNU Emacs is free software: you can redistribute it and/or modify |
88363 | 11 it under the terms of the GNU General Public License as published by |
94994
29adfc9354e7
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91805
diff
changeset
|
12 the Free Software Foundation, either version 3 of the License, or |
29adfc9354e7
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91805
diff
changeset
|
13 (at your option) any later version. |
88363 | 14 |
15 GNU Emacs is distributed in the hope that it will be useful, | |
16 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 GNU General Public License for more details. | |
19 | |
20 You should have received a copy of the GNU General Public License | |
94994
29adfc9354e7
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91805
diff
changeset
|
21 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ |
88363 | 22 |
23 #ifndef EMACS_CHARACTER_H | |
24 #define EMACS_CHARACTER_H | |
25 | |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
26 /* character code 1st byte byte sequence |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
27 -------------- -------- ------------- |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
28 0-7F 00..7F 0xxxxxxx |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
29 80-7FF C2..DF 110xxxxx 10xxxxxx |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
30 800-FFFF E0..EF 1110xxxx 10xxxxxx 10xxxxxx |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
31 10000-1FFFFF F0..F7 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
32 200000-3FFF7F F8 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx |
89692 | 33 3FFF80-3FFFFF C0..C1 1100000x 10xxxxxx (for eight-bit-char) |
34 400000-... invalid | |
88363 | 35 |
89692 | 36 invalid 1st byte 80..BF 10xxxxxx |
37 F9..FF 11111xxx (xxx != 000) | |
88363 | 38 */ |
39 | |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
40 /* Maximum character code ((1 << CHARACTERBITS) - 1). */ |
88363 | 41 #define MAX_CHAR 0x3FFFFF |
42 | |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
43 /* Maximum Unicode character code. */ |
88363 | 44 #define MAX_UNICODE_CHAR 0x10FFFF |
45 | |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
46 /* Maximum N-byte character codes. */ |
88363 | 47 #define MAX_1_BYTE_CHAR 0x7F |
48 #define MAX_2_BYTE_CHAR 0x7FF | |
49 #define MAX_3_BYTE_CHAR 0xFFFF | |
50 #define MAX_4_BYTE_CHAR 0x1FFFFF | |
51 #define MAX_5_BYTE_CHAR 0x3FFF7F | |
52 | |
90791
0603ad3252c9
(MIN_MULTIBYTE_LEADING_CODE)
Kenichi Handa <handa@m17n.org>
parents:
90533
diff
changeset
|
53 /* Minimum leading code of multibyte characters. */ |
0603ad3252c9
(MIN_MULTIBYTE_LEADING_CODE)
Kenichi Handa <handa@m17n.org>
parents:
90533
diff
changeset
|
54 #define MIN_MULTIBYTE_LEADING_CODE 0xC0 |
0603ad3252c9
(MIN_MULTIBYTE_LEADING_CODE)
Kenichi Handa <handa@m17n.org>
parents:
90533
diff
changeset
|
55 /* Maximum leading code of multibyte characters. */ |
0603ad3252c9
(MIN_MULTIBYTE_LEADING_CODE)
Kenichi Handa <handa@m17n.org>
parents:
90533
diff
changeset
|
56 #define MAX_MULTIBYTE_LEADING_CODE 0xF8 |
0603ad3252c9
(MIN_MULTIBYTE_LEADING_CODE)
Kenichi Handa <handa@m17n.org>
parents:
90533
diff
changeset
|
57 |
88946
233c080b5756
(CHAR_TO_BYTE8): If C is not eight-bit char, call
Kenichi Handa <handa@m17n.org>
parents:
88915
diff
changeset
|
58 /* Nonzero iff C is a character that corresponds to a raw 8-bit |
233c080b5756
(CHAR_TO_BYTE8): If C is not eight-bit char, call
Kenichi Handa <handa@m17n.org>
parents:
88915
diff
changeset
|
59 byte. */ |
233c080b5756
(CHAR_TO_BYTE8): If C is not eight-bit char, call
Kenichi Handa <handa@m17n.org>
parents:
88915
diff
changeset
|
60 #define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR) |
233c080b5756
(CHAR_TO_BYTE8): If C is not eight-bit char, call
Kenichi Handa <handa@m17n.org>
parents:
88915
diff
changeset
|
61 |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
62 /* Return the character code for raw 8-bit byte BYTE. */ |
88363 | 63 #define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00) |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
64 |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
65 /* Return the raw 8-bit byte for character C. */ |
88946
233c080b5756
(CHAR_TO_BYTE8): If C is not eight-bit char, call
Kenichi Handa <handa@m17n.org>
parents:
88915
diff
changeset
|
66 #define CHAR_TO_BYTE8(c) \ |
233c080b5756
(CHAR_TO_BYTE8): If C is not eight-bit char, call
Kenichi Handa <handa@m17n.org>
parents:
88915
diff
changeset
|
67 (CHAR_BYTE8_P (c) \ |
233c080b5756
(CHAR_TO_BYTE8): If C is not eight-bit char, call
Kenichi Handa <handa@m17n.org>
parents:
88915
diff
changeset
|
68 ? (c) - 0x3FFF00 \ |
233c080b5756
(CHAR_TO_BYTE8): If C is not eight-bit char, call
Kenichi Handa <handa@m17n.org>
parents:
88915
diff
changeset
|
69 : multibyte_char_to_unibyte (c, Qnil)) |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
70 |
95856
f13a77e0e34f
* character.h (CHAR_TO_BYTE_SAFE): New macro.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
95576
diff
changeset
|
71 /* Return the raw 8-bit byte for character C, |
f13a77e0e34f
* character.h (CHAR_TO_BYTE_SAFE): New macro.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
95576
diff
changeset
|
72 or -1 if C doesn't correspond to a byte. */ |
f13a77e0e34f
* character.h (CHAR_TO_BYTE_SAFE): New macro.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
95576
diff
changeset
|
73 #define CHAR_TO_BYTE_SAFE(c) \ |
f13a77e0e34f
* character.h (CHAR_TO_BYTE_SAFE): New macro.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
95576
diff
changeset
|
74 (CHAR_BYTE8_P (c) \ |
f13a77e0e34f
* character.h (CHAR_TO_BYTE_SAFE): New macro.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
95576
diff
changeset
|
75 ? (c) - 0x3FFF00 \ |
95858
d719efd8c628
(CHAR_TO_BYTE_SAFE): Cut&paste error.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
95856
diff
changeset
|
76 : multibyte_char_to_unibyte_safe (c)) |
95856
f13a77e0e34f
* character.h (CHAR_TO_BYTE_SAFE): New macro.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
95576
diff
changeset
|
77 |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
78 /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
79 that corresponds to a raw 8-bit byte. */ |
88363 | 80 #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1) |
81 | |
89053
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
82 /* Mapping table from unibyte chars to multibyte chars. */ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
83 extern int unibyte_to_multibyte_table[256]; |
89018
a9f683a73092
(MAKE_CHAR_UNIBYTE, MAKE_CHAR_MULTIBYTE): New macros.
Kenichi Handa <handa@m17n.org>
parents:
88946
diff
changeset
|
84 |
89053
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
85 /* Convert the unibyte character C to the corresponding multibyte |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
86 character. If C can't be converted, return C. */ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
87 #define unibyte_char_to_multibyte(c) \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
88 ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c)) |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
89 |
90019
1987dfad4543
(unibyte_has_multibyte_table): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
89911
diff
changeset
|
90 /* Nth element is 1 iff unibyte char N can be mapped to a multibyte |
1987dfad4543
(unibyte_has_multibyte_table): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
89911
diff
changeset
|
91 char. */ |
1987dfad4543
(unibyte_has_multibyte_table): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
89911
diff
changeset
|
92 extern char unibyte_has_multibyte_table[256]; |
1987dfad4543
(unibyte_has_multibyte_table): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
89911
diff
changeset
|
93 |
1987dfad4543
(unibyte_has_multibyte_table): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
89911
diff
changeset
|
94 #define UNIBYTE_CHAR_HAS_MULTIBYTE_P(c) (unibyte_has_multibyte_table[(c)]) |
1987dfad4543
(unibyte_has_multibyte_table): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
89911
diff
changeset
|
95 |
89053
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
96 /* If C is not ASCII, make it unibyte. */ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
97 #define MAKE_CHAR_UNIBYTE(c) \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
98 do { \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
99 if (! ASCII_CHAR_P (c)) \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
100 c = CHAR_TO_BYTE8 (c); \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
101 } while (0) |
89018
a9f683a73092
(MAKE_CHAR_UNIBYTE, MAKE_CHAR_MULTIBYTE): New macros.
Kenichi Handa <handa@m17n.org>
parents:
88946
diff
changeset
|
102 |
a9f683a73092
(MAKE_CHAR_UNIBYTE, MAKE_CHAR_MULTIBYTE): New macros.
Kenichi Handa <handa@m17n.org>
parents:
88946
diff
changeset
|
103 |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
104 /* If C is not ASCII, make it multibyte. Assumes C < 256. */ |
95576
abcb40dd43a2
(MAKE_CHAR_MULTIBYTE): Check the arg is a (uni)byte.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94994
diff
changeset
|
105 #define MAKE_CHAR_MULTIBYTE(c) \ |
abcb40dd43a2
(MAKE_CHAR_MULTIBYTE): Check the arg is a (uni)byte.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94994
diff
changeset
|
106 (eassert ((c) >= 0 && (c) < 256), (c) = unibyte_to_multibyte_table[(c)]) |
89018
a9f683a73092
(MAKE_CHAR_UNIBYTE, MAKE_CHAR_MULTIBYTE): New macros.
Kenichi Handa <handa@m17n.org>
parents:
88946
diff
changeset
|
107 |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
108 /* This is the maximum byte length of multibyte form. */ |
88363 | 109 #define MAX_MULTIBYTE_LENGTH 5 |
110 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
111 /* Return a Lisp character whose character code is C. Assumes C is |
89887
028a1f06f612
(LEADING_CODE_LATIN_1_MIN) (LEADING_CODE_LATIN_1_MAX): Delete these
Kenichi Handa <handa@m17n.org>
parents:
89692
diff
changeset
|
112 a valid character code. */ |
88363 | 113 #define make_char(c) make_number (c) |
114 | |
115 /* Nonzero iff C is an ASCII byte. */ | |
116 #define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80) | |
117 | |
118 /* Nonzero iff X is a character. */ | |
119 #define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR) | |
120 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
121 /* Nonzero iff C is valid as a character code. GENERICP is not used. */ |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
122 #define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR) |
88363 | 123 |
124 /* Check if Lisp object X is a character or not. */ | |
90533 | 125 #define CHECK_CHARACTER(x) \ |
126 CHECK_TYPE (CHARACTERP (x), Qcharacterp, x) | |
88363 | 127 |
89483 | 128 #define CHECK_CHARACTER_CAR(x) \ |
129 do { \ | |
130 Lisp_Object tmp = XCAR (x); \ | |
131 CHECK_CHARACTER (tmp); \ | |
132 XSETCAR ((x), tmp); \ | |
133 } while (0) | |
134 | |
135 #define CHECK_CHARACTER_CDR(x) \ | |
136 do { \ | |
137 Lisp_Object tmp = XCDR (x); \ | |
138 CHECK_CHARACTER (tmp); \ | |
139 XSETCDR ((x), tmp); \ | |
140 } while (0) | |
141 | |
88363 | 142 /* Nonzero iff C is an ASCII character. */ |
143 #define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80) | |
144 | |
145 /* Nonzero iff C is a character of code less than 0x100. */ | |
146 #define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100) | |
147 | |
148 /* Nonzero if character C has a printable glyph. */ | |
149 #define CHAR_PRINTABLE_P(c) \ | |
150 (((c) >= 32 && ((c) < 127) \ | |
151 || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c))))) | |
152 | |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
153 /* Return byte length of multibyte form for character C. */ |
88363 | 154 #define CHAR_BYTES(c) \ |
155 ( (c) <= MAX_1_BYTE_CHAR ? 1 \ | |
156 : (c) <= MAX_2_BYTE_CHAR ? 2 \ | |
157 : (c) <= MAX_3_BYTE_CHAR ? 3 \ | |
158 : (c) <= MAX_4_BYTE_CHAR ? 4 \ | |
159 : (c) <= MAX_5_BYTE_CHAR ? 5 \ | |
160 : 2) | |
161 | |
89053
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
162 |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
163 /* Return the leading code of multibyte form of C. */ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
164 #define CHAR_LEADING_CODE(c) \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
165 ((c) <= MAX_1_BYTE_CHAR ? c \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
166 : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6)) \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
167 : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12)) \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
168 : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18)) \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
169 : (c) <= MAX_5_BYTE_CHAR ? 0xF8 \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
170 : (0xC0 | (((c) >> 6) & 0x01))) |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
171 |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
172 |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
173 /* Store multibyte form of the character C in P. The caller should |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
174 allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance. |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
175 Returns the length of the multibyte form. */ |
88363 | 176 |
177 #define CHAR_STRING(c, p) \ | |
178 ((unsigned) (c) <= MAX_1_BYTE_CHAR \ | |
179 ? ((p)[0] = (c), \ | |
180 1) \ | |
181 : (unsigned) (c) <= MAX_2_BYTE_CHAR \ | |
182 ? ((p)[0] = (0xC0 | ((c) >> 6)), \ | |
183 (p)[1] = (0x80 | ((c) & 0x3F)), \ | |
184 2) \ | |
185 : (unsigned) (c) <= MAX_3_BYTE_CHAR \ | |
186 ? ((p)[0] = (0xE0 | ((c) >> 12)), \ | |
187 (p)[1] = (0x80 | (((c) >> 6) & 0x3F)), \ | |
188 (p)[2] = (0x80 | ((c) & 0x3F)), \ | |
189 3) \ | |
90798
ad70c7654800
(CHAR_STRING): Cast C to unsigned on calling
Kenichi Handa <handa@m17n.org>
parents:
90791
diff
changeset
|
190 : char_string ((unsigned) c, p)) |
88363 | 191 |
89180
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
192 /* Store multibyte form of byte B in P. The caller should allocate at |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
193 least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
194 length of the multibyte form. */ |
89038
f60ed671d6e4
(BYTE8_STRING): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89018
diff
changeset
|
195 |
f60ed671d6e4
(BYTE8_STRING): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89018
diff
changeset
|
196 #define BYTE8_STRING(b, p) \ |
f60ed671d6e4
(BYTE8_STRING): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89018
diff
changeset
|
197 ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)), \ |
90051 | 198 (p)[1] = (0x80 | ((b) & 0x3F)), \ |
89038
f60ed671d6e4
(BYTE8_STRING): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89018
diff
changeset
|
199 2) |
f60ed671d6e4
(BYTE8_STRING): New macro.
Kenichi Handa <handa@m17n.org>
parents:
89018
diff
changeset
|
200 |
88363 | 201 |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
202 /* Store multibyte form of the character C in P and advance P to the |
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
203 end of the multibyte form. The caller should allocate at least |
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
204 MAX_MULTIBYTE_LENGTH bytes area at P in advance. */ |
88363 | 205 |
89180
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
206 #define CHAR_STRING_ADVANCE(c, p) \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
207 do { \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
208 if ((c) <= MAX_1_BYTE_CHAR) \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
209 *(p)++ = (c); \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
210 else if ((c) <= MAX_2_BYTE_CHAR) \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
211 *(p)++ = (0xC0 | ((c) >> 6)), \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
212 *(p)++ = (0x80 | ((c) & 0x3F)); \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
213 else if ((c) <= MAX_3_BYTE_CHAR) \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
214 *(p)++ = (0xE0 | ((c) >> 12)), \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
215 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
216 *(p)++ = (0x80 | ((c) & 0x3F)); \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
217 else \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
218 (p) += char_string ((c), (p)); \ |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
219 } while (0) |
88363 | 220 |
89180
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
221 |
88363 | 222 /* Nonzero iff BYTE starts a non-ASCII character in a multibyte |
223 form. */ | |
224 #define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0) | |
225 | |
88873
7d441bc35e9b
(TRAILING_CODE_P): New macro.
Kenichi Handa <handa@m17n.org>
parents:
88832
diff
changeset
|
226 /* Nonzero iff BYTE is a trailing code of a non-ASCII character in a |
7d441bc35e9b
(TRAILING_CODE_P): New macro.
Kenichi Handa <handa@m17n.org>
parents:
88832
diff
changeset
|
227 multibyte form. */ |
7d441bc35e9b
(TRAILING_CODE_P): New macro.
Kenichi Handa <handa@m17n.org>
parents:
88832
diff
changeset
|
228 #define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80) |
7d441bc35e9b
(TRAILING_CODE_P): New macro.
Kenichi Handa <handa@m17n.org>
parents:
88832
diff
changeset
|
229 |
88428
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
230 /* Nonzero iff BYTE starts a character in a multibyte form. |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
231 This is equivalent to: |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
232 (ASCII_BYTE_P (byte) || LEADING_CODE_P (byte)) */ |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
233 #define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80) |
5eaa8c11ab45
(CHAR_VALID_P): Don't call CHARACTERP.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
234 |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
235 /* Kept for backward compatibility. This macro will be removed in the |
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
236 future. */ |
88363 | 237 #define BASE_LEADING_CODE_P LEADING_CODE_P |
238 | |
239 /* How many bytes a character that starts with BYTE occupies in a | |
240 multibyte form. */ | |
241 #define BYTES_BY_CHAR_HEAD(byte) \ | |
242 (!((byte) & 0x80) ? 1 \ | |
243 : !((byte) & 0x20) ? 2 \ | |
244 : !((byte) & 0x10) ? 3 \ | |
245 : !((byte) & 0x08) ? 4 \ | |
246 : 5) | |
247 | |
248 | |
249 /* Return the length of the multi-byte form at string STR of length | |
250 LEN while assuming that STR points a valid multi-byte form. As | |
251 this macro isn't necessary anymore, all callers will be changed to | |
252 use BYTES_BY_CHAR_HEAD directly in the future. */ | |
253 | |
254 #define MULTIBYTE_FORM_LENGTH(str, len) \ | |
255 BYTES_BY_CHAR_HEAD (*(str)) | |
256 | |
257 /* Parse multibyte string STR of length LENGTH and set BYTES to the | |
258 byte length of a character at STR while assuming that STR points a | |
259 valid multibyte form. As this macro isn't necessary anymore, all | |
260 callers will be changed to use BYTES_BY_CHAR_HEAD directly in the | |
261 future. */ | |
262 | |
263 #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ | |
264 (bytes) = BYTES_BY_CHAR_HEAD (*(str)) | |
265 | |
266 /* The byte length of multibyte form at unibyte string P ending at | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
267 PEND. If STR doesn't point to a valid multibyte form, return 0. */ |
88363 | 268 |
269 #define MULTIBYTE_LENGTH(p, pend) \ | |
270 (p >= pend ? 0 \ | |
271 : !((p)[0] & 0x80) ? 1 \ | |
272 : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0 \ | |
273 : ((p)[0] & 0xE0) == 0xC0 ? 2 \ | |
274 : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0 \ | |
275 : ((p)[0] & 0xF0) == 0xE0 ? 3 \ | |
276 : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0 \ | |
277 : ((p)[0] & 0xF8) == 0xF0 ? 4 \ | |
278 : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0 \ | |
279 : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \ | |
280 : 0) | |
281 | |
282 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
283 /* Like MULTIBYTE_LENGTH, but don't check the ending address. */ |
88363 | 284 |
285 #define MULTIBYTE_LENGTH_NO_CHECK(p) \ | |
286 (!((p)[0] & 0x80) ? 1 \ | |
287 : ((p)[1] & 0xC0) != 0x80 ? 0 \ | |
288 : ((p)[0] & 0xE0) == 0xC0 ? 2 \ | |
289 : ((p)[2] & 0xC0) != 0x80 ? 0 \ | |
290 : ((p)[0] & 0xF0) == 0xE0 ? 3 \ | |
291 : ((p)[3] & 0xC0) != 0x80 ? 0 \ | |
292 : ((p)[0] & 0xF8) == 0xF0 ? 4 \ | |
293 : ((p)[4] & 0xC0) != 0x80 ? 0 \ | |
294 : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \ | |
295 : 0) | |
296 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
297 /* If P is before LIMIT, advance P to the next character boundary. |
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
298 Assumes that P is already at a character boundary of the same |
89483 | 299 mulitbyte form whose end address is LIMIT. */ |
300 | |
301 #define NEXT_CHAR_BOUNDARY(p, limit) \ | |
302 do { \ | |
303 if ((p) < (limit)) \ | |
304 (p) += BYTES_BY_CHAR_HEAD (*(p)); \ | |
305 } while (0) | |
306 | |
307 | |
308 /* If P is after LIMIT, advance P to the previous character boundary. | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
309 Assumes that P is already at a character boundary of the same |
89483 | 310 mulitbyte form whose beginning address is LIMIT. */ |
311 | |
312 #define PREV_CHAR_BOUNDARY(p, limit) \ | |
313 do { \ | |
314 if ((p) > (limit)) \ | |
315 { \ | |
316 const unsigned char *p0 = (p); \ | |
317 do { \ | |
318 p0--; \ | |
319 } while (p0 >= limit && ! CHAR_HEAD_P (*p0)); \ | |
320 (p) = (BYTES_BY_CHAR_HEAD (*p0) == (p) - p0) ? p0 : (p) - 1; \ | |
321 } \ | |
322 } while (0) | |
88363 | 323 |
324 /* Return the character code of character whose multibyte form is at | |
325 P. The argument LEN is ignored. It will be removed in the | |
326 future. */ | |
327 | |
328 #define STRING_CHAR(p, len) \ | |
329 (!((p)[0] & 0x80) \ | |
330 ? (p)[0] \ | |
331 : ! ((p)[0] & 0x20) \ | |
332 ? (((((p)[0] & 0x1F) << 6) \ | |
333 | ((p)[1] & 0x3F)) \ | |
334 + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \ | |
335 : ! ((p)[0] & 0x10) \ | |
336 ? ((((p)[0] & 0x0F) << 12) \ | |
337 | (((p)[1] & 0x3F) << 6) \ | |
338 | ((p)[2] & 0x3F)) \ | |
89180
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
339 : string_char ((p), NULL, NULL)) |
88363 | 340 |
341 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
342 /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte |
88363 | 343 form. The argument LEN is ignored. It will be removed in the |
344 future. */ | |
345 | |
346 #define STRING_CHAR_AND_LENGTH(p, len, actual_len) \ | |
347 (!((p)[0] & 0x80) \ | |
348 ? ((actual_len) = 1, (p)[0]) \ | |
349 : ! ((p)[0] & 0x20) \ | |
350 ? ((actual_len) = 2, \ | |
351 (((((p)[0] & 0x1F) << 6) \ | |
352 | ((p)[1] & 0x3F)) \ | |
353 + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \ | |
354 : ! ((p)[0] & 0x10) \ | |
355 ? ((actual_len) = 3, \ | |
356 ((((p)[0] & 0x0F) << 12) \ | |
357 | (((p)[1] & 0x3F) << 6) \ | |
358 | ((p)[2] & 0x3F))) \ | |
89180
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
359 : string_char ((p), NULL, &actual_len)) |
88363 | 360 |
361 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
362 /* Like STRING_CHAR, but advance P to the end of multibyte form. */ |
88363 | 363 |
364 #define STRING_CHAR_ADVANCE(p) \ | |
365 (!((p)[0] & 0x80) \ | |
366 ? *(p)++ \ | |
367 : ! ((p)[0] & 0x20) \ | |
368 ? ((p) += 2, \ | |
369 ((((p)[-2] & 0x1F) << 6) \ | |
370 | ((p)[-1] & 0x3F) \ | |
89483 | 371 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \ |
88363 | 372 : ! ((p)[0] & 0x10) \ |
373 ? ((p) += 3, \ | |
374 ((((p)[-3] & 0x0F) << 12) \ | |
375 | (((p)[-2] & 0x3F) << 6) \ | |
376 | ((p)[-1] & 0x3F))) \ | |
89180
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
377 : string_char ((p), &(p), NULL)) |
88363 | 378 |
379 | |
380 /* Fetch the "next" character from Lisp string STRING at byte position | |
381 BYTEIDX, character position CHARIDX. Store it into OUTPUT. | |
382 | |
383 All the args must be side-effect-free. | |
384 BYTEIDX and CHARIDX must be lvalues; | |
385 we increment them past the character fetched. */ | |
386 | |
387 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \ | |
91620
acb32817f0e8
Use "do...while (0)", not "if (1)..else" in macro definitions.
Jan Djärv <jan.h.d@swipnet.se>
parents:
91444
diff
changeset
|
388 do \ |
88363 | 389 { \ |
390 CHARIDX++; \ | |
391 if (STRING_MULTIBYTE (STRING)) \ | |
392 { \ | |
91805
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
393 unsigned char *ptr = &SDATA (STRING)[BYTEIDX]; \ |
88363 | 394 int len; \ |
395 \ | |
396 OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \ | |
397 BYTEIDX += len; \ | |
398 } \ | |
399 else \ | |
91805
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
400 { \ |
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
401 OUTPUT = SREF (STRING, BYTEIDX); \ |
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
402 BYTEIDX++; \ |
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
403 } \ |
88363 | 404 } \ |
91620
acb32817f0e8
Use "do...while (0)", not "if (1)..else" in macro definitions.
Jan Djärv <jan.h.d@swipnet.se>
parents:
91444
diff
changeset
|
405 while (0) |
88363 | 406 |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
407 /* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character |
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
408 even if STRING is unibyte. */ |
89053
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
409 |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
410 #define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \ |
91620
acb32817f0e8
Use "do...while (0)", not "if (1)..else" in macro definitions.
Jan Djärv <jan.h.d@swipnet.se>
parents:
91444
diff
changeset
|
411 do \ |
89053
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
412 { \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
413 CHARIDX++; \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
414 if (STRING_MULTIBYTE (STRING)) \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
415 { \ |
91805
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
416 unsigned char *ptr = &SDATA (STRING)[BYTEIDX]; \ |
89053
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
417 int len; \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
418 \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
419 OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
420 BYTEIDX += len; \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
421 } \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
422 else \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
423 { \ |
91805
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
424 OUTPUT = SREF (STRING, BYTEIDX); \ |
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
425 BYTEIDX++; \ |
89053
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
426 MAKE_CHAR_MULTIBYTE (OUTPUT); \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
427 } \ |
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
428 } \ |
91620
acb32817f0e8
Use "do...while (0)", not "if (1)..else" in macro definitions.
Jan Djärv <jan.h.d@swipnet.se>
parents:
91444
diff
changeset
|
429 while (0) |
89053
06a2cbbeaee9
(LEADING_CODE_LATIN_1_MIN)
Kenichi Handa <handa@m17n.org>
parents:
89038
diff
changeset
|
430 |
88363 | 431 |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
432 /* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte. */ |
88363 | 433 |
434 #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \ | |
91620
acb32817f0e8
Use "do...while (0)", not "if (1)..else" in macro definitions.
Jan Djärv <jan.h.d@swipnet.se>
parents:
91444
diff
changeset
|
435 do \ |
88363 | 436 { \ |
91805
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
437 unsigned char *ptr = &SDATA (STRING)[BYTEIDX]; \ |
88363 | 438 int len; \ |
439 \ | |
440 OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \ | |
441 BYTEIDX += len; \ | |
442 CHARIDX++; \ | |
443 } \ | |
91620
acb32817f0e8
Use "do...while (0)", not "if (1)..else" in macro definitions.
Jan Djärv <jan.h.d@swipnet.se>
parents:
91444
diff
changeset
|
444 while (0) |
88363 | 445 |
446 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
447 /* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current |
88363 | 448 buffer. */ |
449 | |
450 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \ | |
91620
acb32817f0e8
Use "do...while (0)", not "if (1)..else" in macro definitions.
Jan Djärv <jan.h.d@swipnet.se>
parents:
91444
diff
changeset
|
451 do \ |
88363 | 452 { \ |
453 CHARIDX++; \ | |
454 if (!NILP (current_buffer->enable_multibyte_characters)) \ | |
455 { \ | |
456 unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \ | |
457 int len; \ | |
458 \ | |
459 OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \ | |
460 BYTEIDX += len; \ | |
461 } \ | |
462 else \ | |
463 { \ | |
464 OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \ | |
465 BYTEIDX++; \ | |
466 } \ | |
467 } \ | |
91620
acb32817f0e8
Use "do...while (0)", not "if (1)..else" in macro definitions.
Jan Djärv <jan.h.d@swipnet.se>
parents:
91444
diff
changeset
|
468 while (0) |
88363 | 469 |
470 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
471 /* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte. */ |
88363 | 472 |
473 #define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \ | |
91620
acb32817f0e8
Use "do...while (0)", not "if (1)..else" in macro definitions.
Jan Djärv <jan.h.d@swipnet.se>
parents:
91444
diff
changeset
|
474 do \ |
88363 | 475 { \ |
476 unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \ | |
477 int len; \ | |
478 \ | |
479 OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \ | |
480 BYTEIDX += len; \ | |
481 CHARIDX++; \ | |
482 } \ | |
91620
acb32817f0e8
Use "do...while (0)", not "if (1)..else" in macro definitions.
Jan Djärv <jan.h.d@swipnet.se>
parents:
91444
diff
changeset
|
483 while (0) |
88363 | 484 |
485 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
486 /* Increment the buffer byte position POS_BYTE of the current buffer to |
88363 | 487 the next character boundary. No range checking of POS. */ |
488 | |
489 #define INC_POS(pos_byte) \ | |
490 do { \ | |
491 unsigned char *p = BYTE_POS_ADDR (pos_byte); \ | |
492 pos_byte += BYTES_BY_CHAR_HEAD (*p); \ | |
493 } while (0) | |
494 | |
495 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
496 /* Decrement the buffer byte position POS_BYTE of the current buffer to |
88363 | 497 the previous character boundary. No range checking of POS. */ |
498 | |
499 #define DEC_POS(pos_byte) \ | |
500 do { \ | |
501 unsigned char *p; \ | |
502 \ | |
503 pos_byte--; \ | |
504 if (pos_byte < GPT_BYTE) \ | |
91805
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
505 p = BEG_ADDR + pos_byte - BEG_BYTE; \ |
88363 | 506 else \ |
91805
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
507 p = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE;\ |
88363 | 508 while (!CHAR_HEAD_P (*p)) \ |
509 { \ | |
510 p--; \ | |
511 pos_byte--; \ | |
512 } \ | |
513 } while (0) | |
514 | |
515 /* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */ | |
516 | |
517 #define INC_BOTH(charpos, bytepos) \ | |
518 do \ | |
519 { \ | |
520 (charpos)++; \ | |
521 if (NILP (current_buffer->enable_multibyte_characters)) \ | |
522 (bytepos)++; \ | |
523 else \ | |
524 INC_POS ((bytepos)); \ | |
525 } \ | |
526 while (0) | |
527 | |
528 | |
529 /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */ | |
530 | |
531 #define DEC_BOTH(charpos, bytepos) \ | |
532 do \ | |
533 { \ | |
534 (charpos)--; \ | |
535 if (NILP (current_buffer->enable_multibyte_characters)) \ | |
536 (bytepos)--; \ | |
537 else \ | |
538 DEC_POS ((bytepos)); \ | |
539 } \ | |
540 while (0) | |
541 | |
542 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
543 /* Increment the buffer byte position POS_BYTE of the current buffer to |
88363 | 544 the next character boundary. This macro relies on the fact that |
545 *GPT_ADDR and *Z_ADDR are always accessible and the values are | |
546 '\0'. No range checking of POS_BYTE. */ | |
547 | |
548 #define BUF_INC_POS(buf, pos_byte) \ | |
549 do { \ | |
550 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \ | |
551 pos_byte += BYTES_BY_CHAR_HEAD (*p); \ | |
552 } while (0) | |
553 | |
554 | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
555 /* Decrement the buffer byte position POS_BYTE of the current buffer to |
88363 | 556 the previous character boundary. No range checking of POS_BYTE. */ |
557 | |
558 #define BUF_DEC_POS(buf, pos_byte) \ | |
559 do { \ | |
560 unsigned char *p; \ | |
561 pos_byte--; \ | |
562 if (pos_byte < BUF_GPT_BYTE (buf)) \ | |
91805
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
563 p = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE; \ |
88363 | 564 else \ |
91805
c330bf7419fd
(FETCH_STRING_CHAR_ADVANCE, FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE)
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91620
diff
changeset
|
565 p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\ |
88363 | 566 while (!CHAR_HEAD_P (*p)) \ |
567 { \ | |
568 p--; \ | |
569 pos_byte--; \ | |
570 } \ | |
571 } while (0) | |
572 | |
573 | |
89887
028a1f06f612
(LEADING_CODE_LATIN_1_MIN) (LEADING_CODE_LATIN_1_MAX): Delete these
Kenichi Handa <handa@m17n.org>
parents:
89692
diff
changeset
|
574 /* If C is a character to be unified with a Unicode character, return |
028a1f06f612
(LEADING_CODE_LATIN_1_MIN) (LEADING_CODE_LATIN_1_MAX): Delete these
Kenichi Handa <handa@m17n.org>
parents:
89692
diff
changeset
|
575 the unified Unicode character. */ |
028a1f06f612
(LEADING_CODE_LATIN_1_MIN) (LEADING_CODE_LATIN_1_MAX): Delete these
Kenichi Handa <handa@m17n.org>
parents:
89692
diff
changeset
|
576 |
88742
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
577 #define MAYBE_UNIFY_CHAR(c) \ |
89180
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
578 if (c > MAX_UNICODE_CHAR \ |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
579 && CHAR_TABLE_P (Vchar_unify_table)) \ |
88742
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
580 { \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
581 Lisp_Object val; \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
582 int unified; \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
583 \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
584 val = CHAR_TABLE_REF (Vchar_unify_table, c); \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
585 if (! NILP (val)) \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
586 { \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
587 if (SYMBOLP (val)) \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
588 { \ |
88873
7d441bc35e9b
(TRAILING_CODE_P): New macro.
Kenichi Handa <handa@m17n.org>
parents:
88832
diff
changeset
|
589 Funify_charset (val, Qnil, Qnil); \ |
88742
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
590 val = CHAR_TABLE_REF (Vchar_unify_table, c); \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
591 } \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
592 if ((unified = XINT (val)) >= 0) \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
593 c = unified; \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
594 } \ |
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
595 } \ |
88363 | 596 else |
597 | |
88742
55e36a0cf0ee
(MAYBE_UNIFY_CHAR): Adjusted for the change of
Kenichi Handa <handa@m17n.org>
parents:
88545
diff
changeset
|
598 |
88363 | 599 /* Return the width of ASCII character C. The width is measured by |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
600 how many columns C will occupy on the screen when displayed in the |
88363 | 601 current buffer. */ |
602 | |
603 #define ASCII_CHAR_WIDTH(c) \ | |
604 (c < 0x20 \ | |
605 ? (c == '\t' \ | |
606 ? XFASTINT (current_buffer->tab_width) \ | |
607 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \ | |
608 : (c < 0x7f \ | |
609 ? 1 \ | |
610 : ((NILP (current_buffer->ctl_arrow) ? 4 : 2)))) | |
611 | |
612 /* Return the width of character C. The width is measured by how many | |
98025
d4e07000ca4c
Fix spelling and wording of comments.
Eli Zaretskii <eliz@gnu.org>
parents:
97817
diff
changeset
|
613 columns C will occupy on the screen when displayed in the current |
88363 | 614 buffer. */ |
615 | |
616 #define CHAR_WIDTH(c) \ | |
617 (ASCII_CHAR_P (c) \ | |
618 ? ASCII_CHAR_WIDTH (c) \ | |
619 : XINT (CHAR_TABLE_REF (Vchar_width_table, c))) | |
620 | |
89180
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
621 extern int char_resolve_modifier_mask P_ ((int)); |
90798
ad70c7654800
(CHAR_STRING): Cast C to unsigned on calling
Kenichi Handa <handa@m17n.org>
parents:
90791
diff
changeset
|
622 extern int char_string P_ ((unsigned, unsigned char *)); |
89180
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
623 extern int string_char P_ ((const unsigned char *, |
1d29c2b108e6
(CHAR_STRING): Call char_string if C is greater than
Kenichi Handa <handa@m17n.org>
parents:
89053
diff
changeset
|
624 const unsigned char **, int *)); |
88363 | 625 |
626 extern int translate_char P_ ((Lisp_Object, int c)); | |
627 extern int char_printable_p P_ ((int c)); | |
89483 | 628 extern void parse_str_as_multibyte P_ ((const unsigned char *, int, int *, |
629 int *)); | |
88363 | 630 extern int parse_str_to_multibyte P_ ((unsigned char *, int)); |
631 extern int str_as_multibyte P_ ((unsigned char *, int, int, int *)); | |
632 extern int str_to_multibyte P_ ((unsigned char *, int, int)); | |
633 extern int str_as_unibyte P_ ((unsigned char *, int)); | |
96250
423373db93ef
(str_to_unibyte): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
95858
diff
changeset
|
634 extern EMACS_INT str_to_unibyte P_ ((const unsigned char *, unsigned char *, |
423373db93ef
(str_to_unibyte): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
95858
diff
changeset
|
635 EMACS_INT, int)); |
88363 | 636 extern int strwidth P_ ((unsigned char *, int)); |
89483 | 637 extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *)); |
88363 | 638 extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *)); |
639 | |
640 extern Lisp_Object Vprintable_chars; | |
641 | |
642 extern Lisp_Object Qcharacterp, Qauto_fill_chars; | |
643 extern Lisp_Object Vtranslation_table_vector; | |
644 extern Lisp_Object Vchar_width_table; | |
645 extern Lisp_Object Vchar_direction_table; | |
646 extern Lisp_Object Vchar_unify_table; | |
97817
f15876e66681
(Vunicode_category_table): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
96250
diff
changeset
|
647 extern Lisp_Object Vunicode_category_table; |
88363 | 648 |
88545 | 649 extern Lisp_Object string_escape_byte8 P_ ((Lisp_Object)); |
650 | |
88363 | 651 /* Return a translation table of id number ID. */ |
652 #define GET_TRANSLATION_TABLE(id) \ | |
653 (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)])) | |
654 | |
655 /* A char-table for characters which may invoke auto-filling. */ | |
656 extern Lisp_Object Vauto_fill_chars; | |
657 | |
88915
94184802d0cc
(Vchar_script_table): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
88873
diff
changeset
|
658 extern Lisp_Object Vchar_script_table; |
90402
69ac9cbd4be5
(Vscript_representative_chars): Extern it.
Kenichi Handa <handa@m17n.org>
parents:
90051
diff
changeset
|
659 extern Lisp_Object Vscript_representative_chars; |
88873
7d441bc35e9b
(TRAILING_CODE_P): New macro.
Kenichi Handa <handa@m17n.org>
parents:
88832
diff
changeset
|
660 |
88363 | 661 /* Copy LEN bytes from FROM to TO. This macro should be used only |
662 when a caller knows that LEN is short and the obvious copy loop is | |
663 faster than calling bcopy which has some overhead. Copying a | |
664 multibyte sequence of a character is the typical case. */ | |
665 | |
666 #define BCOPY_SHORT(from, to, len) \ | |
667 do { \ | |
668 int i = len; \ | |
669 unsigned char *from_p = from, *to_p = to; \ | |
670 while (i--) *to_p++ = *from_p++; \ | |
671 } while (0) | |
672 | |
673 #define DEFSYM(sym, name) \ | |
674 do { (sym) = intern ((name)); staticpro (&(sym)); } while (0) | |
675 | |
676 #endif /* EMACS_CHARACTER_H */ | |
89911 | 677 |
678 /* arch-tag: 4ef86004-2eff-4073-8cea-cfcbcf7188ac | |
679 (do not change this comment) */ |