Mercurial > emacs
annotate src/character.c @ 97528:184bb2071e3f
mail/: Add new (temporary) libaries for which to test Rmail/mbox such
that Rmail/babyl is not affected. This creates a facility/feature
called "pmail" (analagous to "rmail") that can be used independently
from Rmail for testing purposes. The plan is to replace the "rmail"
files eventually and remove "pmail" entirely at that point. In the
interim, interested developers can use either Rmail or Pmail or both
(which is not recommended for the casual User or the faint of heart).
author | Paul Reilly <pmr@pajato.com> |
---|---|
date | Mon, 18 Aug 2008 04:51:28 +0000 |
parents | 919775e9f3bf |
children | 9999d9194509 |
rev | line source |
---|---|
88363 | 1 /* Basic character support. |
2 Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN. | |
89483 | 3 Licensed to the Free Software Foundation. |
91443
541da5d81b14
Update copyright years and GPL version.
Glenn Morris <rgm@gnu.org>
parents:
90942
diff
changeset
|
4 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
541da5d81b14
Update copyright years and GPL version.
Glenn Morris <rgm@gnu.org>
parents:
90942
diff
changeset
|
5 Free Software Foundation, Inc. |
541da5d81b14
Update copyright years and GPL version.
Glenn Morris <rgm@gnu.org>
parents:
90942
diff
changeset
|
6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 |
88363 | 7 National Institute of Advanced Industrial Science and Technology (AIST) |
8 Registration Number H13PRO009 | |
9 | |
10 This file is part of GNU Emacs. | |
11 | |
94963
8971ddf55736
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94146
diff
changeset
|
12 GNU Emacs is free software: you can redistribute it and/or modify |
88363 | 13 it under the terms of the GNU General Public License as published by |
94963
8971ddf55736
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94146
diff
changeset
|
14 the Free Software Foundation, either version 3 of the License, or |
8971ddf55736
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94146
diff
changeset
|
15 (at your option) any later version. |
88363 | 16 |
17 GNU Emacs is distributed in the hope that it will be useful, | |
18 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 GNU General Public License for more details. | |
21 | |
22 You should have received a copy of the GNU General Public License | |
94963
8971ddf55736
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
94146
diff
changeset
|
23 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ |
88363 | 24 |
25 /* At first, see the document in `character.h' to understand the code | |
26 in this file. */ | |
27 | |
28 #ifdef emacs | |
29 #include <config.h> | |
30 #endif | |
31 | |
32 #include <stdio.h> | |
33 | |
34 #ifdef emacs | |
35 | |
36 #include <sys/types.h> | |
37 #include "lisp.h" | |
38 #include "character.h" | |
39 #include "buffer.h" | |
40 #include "charset.h" | |
41 #include "composite.h" | |
42 #include "disptab.h" | |
43 | |
44 #else /* not emacs */ | |
45 | |
46 #include "mulelib.h" | |
47 | |
48 #endif /* emacs */ | |
49 | |
50 Lisp_Object Qcharacterp; | |
51 | |
52 /* Vector of translation table ever defined. | |
53 ID of a translation table is used to index this vector. */ | |
54 Lisp_Object Vtranslation_table_vector; | |
55 | |
56 /* A char-table for characters which may invoke auto-filling. */ | |
57 Lisp_Object Vauto_fill_chars; | |
58 | |
59 Lisp_Object Qauto_fill_chars; | |
60 | |
89888 | 61 /* Char-table of information about which character to unify to which |
62 Unicode character. */ | |
88363 | 63 Lisp_Object Vchar_unify_table; |
64 | |
65 /* A char-table. An element is non-nil iff the corresponding | |
66 character has a printable glyph. */ | |
67 Lisp_Object Vprintable_chars; | |
68 | |
69 /* A char-table. An elemnent is a column-width of the corresponding | |
70 character. */ | |
71 Lisp_Object Vchar_width_table; | |
72 | |
73 /* A char-table. An element is a symbol indicating the direction | |
74 property of corresponding character. */ | |
75 Lisp_Object Vchar_direction_table; | |
76 | |
89020
b0277093a5f2
(_fetch_multibyte_char_len): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88990
diff
changeset
|
77 /* Variable used locally in the macro FETCH_MULTIBYTE_CHAR. */ |
88363 | 78 unsigned char *_fetch_multibyte_char_p; |
79 | |
88913
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
80 /* Char table of scripts. */ |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
81 Lisp_Object Vchar_script_table; |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
82 |
90403
81f1bbab281f
(Vscript_representative_chars): New variable.
Kenichi Handa <handa@m17n.org>
parents:
90256
diff
changeset
|
83 /* Alist of scripts vs representative characters. */ |
81f1bbab281f
(Vscript_representative_chars): New variable.
Kenichi Handa <handa@m17n.org>
parents:
90256
diff
changeset
|
84 Lisp_Object Vscript_representative_chars; |
81f1bbab281f
(Vscript_representative_chars): New variable.
Kenichi Handa <handa@m17n.org>
parents:
90256
diff
changeset
|
85 |
88913
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
86 static Lisp_Object Qchar_script_table; |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
87 |
89054
8a6da305ac40
(unibyte_to_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89029
diff
changeset
|
88 /* Mapping table from unibyte chars to multibyte chars. */ |
8a6da305ac40
(unibyte_to_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89029
diff
changeset
|
89 int unibyte_to_multibyte_table[256]; |
88878
a0ae1a5876c7
(Vscript_alist): New variable.
Kenichi Handa <handa@m17n.org>
parents:
88853
diff
changeset
|
90 |
90020
680f4ba0cdc9
(unibyte_has_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89911
diff
changeset
|
91 /* Nth element is 1 iff unibyte char N can be mapped to a multibyte |
680f4ba0cdc9
(unibyte_has_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89911
diff
changeset
|
92 char. */ |
680f4ba0cdc9
(unibyte_has_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89911
diff
changeset
|
93 char unibyte_has_multibyte_table[256]; |
680f4ba0cdc9
(unibyte_has_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89911
diff
changeset
|
94 |
88363 | 95 |
96 | |
92483
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
97 /* If character code C has modifier masks, reflect them to the |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
98 character code if possible. Return the resulting code. */ |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
99 |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
100 int |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
101 char_resolve_modifier_mask (c) |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
102 int c; |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
103 { |
92494
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
104 /* A non-ASCII character can't reflect modifier bits to the code. */ |
92483
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
105 if (! ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
106 return c; |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
107 |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
108 /* For Meta, Shift, and Control modifiers, we need special care. */ |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
109 if (c & CHAR_SHIFT) |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
110 { |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
111 /* Shift modifier is valid only with [A-Za-z]. */ |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
112 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
113 c &= ~CHAR_SHIFT; |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
114 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
115 c = (c & ~CHAR_SHIFT) - ('a' - 'A'); |
92499
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
116 /* Shift modifier for control characters and SPC is ignored. */ |
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
117 else if ((c & ~CHAR_MODIFIER_MASK) <= 0x20) |
92494
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
118 c &= ~CHAR_SHIFT; |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
119 } |
92483
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
120 if (c & CHAR_CTL) |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
121 { |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
122 /* Simulate the code in lread.c. */ |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
123 /* Allow `\C- ' and `\C-?'. */ |
92499
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
124 if ((c & 0377) == ' ') |
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
125 c &= ~0177 & ~ CHAR_CTL; |
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
126 else if ((c & 0377) == '?') |
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
127 c = 0177 | (c & ~0177 & ~CHAR_CTL); |
92483
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
128 /* ASCII control chars are made from letters (both cases), |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
129 as well as the non-letters within 0100...0137. */ |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
130 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132) |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
131 c &= (037 | (~0177 & ~CHAR_CTL)); |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
132 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137) |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
133 c &= (037 | (~0177 & ~CHAR_CTL)); |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
134 } |
92499
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
135 if (c & CHAR_META) |
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
136 { |
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
137 /* Move the meta bit to the right place for a string. */ |
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
138 c = (c & ~CHAR_META) | 0x80; |
88d1a8b0752e
(char_resolve_modifier_mask): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents:
92494
diff
changeset
|
139 } |
92483
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
140 |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
141 return c; |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
142 } |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
143 |
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
144 |
89888 | 145 /* Store multibyte form of character C at P. If C has modifier bits, |
146 handle them appropriately. */ | |
147 | |
88363 | 148 int |
89181
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
149 char_string (c, p) |
90797
b74794d7f11a
(char_string): Type of arg C changed to unsigned.
Kenichi Handa <handa@m17n.org>
parents:
90761
diff
changeset
|
150 unsigned c; |
88427
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
151 unsigned char *p; |
88363 | 152 { |
153 int bytes; | |
154 | |
89181
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
155 if (c & CHAR_MODIFIER_MASK) |
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
156 { |
92483
95fab6416567
(char_resolve_modifier_mask): New function.
Kenichi Handa <handa@m17n.org>
parents:
91807
diff
changeset
|
157 c = (unsigned) char_resolve_modifier_mask ((int) c); |
89181
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
158 /* If C still has any modifier bits, just ignore it. */ |
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
159 c &= ~CHAR_MODIFIER_MASK; |
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
160 } |
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
161 |
88363 | 162 MAYBE_UNIFY_CHAR (c); |
163 | |
89181
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
164 if (c <= MAX_3_BYTE_CHAR) |
88363 | 165 { |
166 bytes = CHAR_STRING (c, p); | |
167 } | |
168 else if (c <= MAX_4_BYTE_CHAR) | |
169 { | |
170 p[0] = (0xF0 | (c >> 18)); | |
171 p[1] = (0x80 | ((c >> 12) & 0x3F)); | |
172 p[2] = (0x80 | ((c >> 6) & 0x3F)); | |
173 p[3] = (0x80 | (c & 0x3F)); | |
174 bytes = 4; | |
175 } | |
89181
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
176 else if (c <= MAX_5_BYTE_CHAR) |
88363 | 177 { |
178 p[0] = 0xF8; | |
179 p[1] = (0x80 | ((c >> 18) & 0x0F)); | |
180 p[2] = (0x80 | ((c >> 12) & 0x3F)); | |
181 p[3] = (0x80 | ((c >> 6) & 0x3F)); | |
182 p[4] = (0x80 | (c & 0x3F)); | |
183 bytes = 5; | |
184 } | |
90797
b74794d7f11a
(char_string): Type of arg C changed to unsigned.
Kenichi Handa <handa@m17n.org>
parents:
90761
diff
changeset
|
185 else if (c <= MAX_CHAR) |
89181
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
186 { |
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
187 c = CHAR_TO_BYTE8 (c); |
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
188 bytes = BYTE8_STRING (c, p); |
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
189 } |
90797
b74794d7f11a
(char_string): Type of arg C changed to unsigned.
Kenichi Handa <handa@m17n.org>
parents:
90761
diff
changeset
|
190 else |
b74794d7f11a
(char_string): Type of arg C changed to unsigned.
Kenichi Handa <handa@m17n.org>
parents:
90761
diff
changeset
|
191 error ("Invalid character: %d", c); |
88427
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
192 |
88363 | 193 return bytes; |
194 } | |
195 | |
196 | |
89888 | 197 /* Return a character whose multibyte form is at P. Set LEN is not |
198 NULL, it must be a pointer to integer. In that case, set *LEN to | |
199 the byte length of the multibyte form. If ADVANCED is not NULL, is | |
200 must be a pointer to unsigned char. In that case, set *ADVANCED to | |
201 the ending address (i.e. the starting address of the next | |
202 character) of the multibyte form. */ | |
203 | |
88363 | 204 int |
89181
be75d5934738
(char_string): Renamed from
Kenichi Handa <handa@m17n.org>
parents:
89054
diff
changeset
|
205 string_char (p, advanced, len) |
88878
a0ae1a5876c7
(Vscript_alist): New variable.
Kenichi Handa <handa@m17n.org>
parents:
88853
diff
changeset
|
206 const unsigned char *p; |
a0ae1a5876c7
(Vscript_alist): New variable.
Kenichi Handa <handa@m17n.org>
parents:
88853
diff
changeset
|
207 const unsigned char **advanced; |
88363 | 208 int *len; |
209 { | |
88427
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
210 int c; |
88878
a0ae1a5876c7
(Vscript_alist): New variable.
Kenichi Handa <handa@m17n.org>
parents:
88853
diff
changeset
|
211 const unsigned char *saved_p = p; |
88363 | 212 |
213 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10)) | |
214 { | |
215 c = STRING_CHAR_ADVANCE (p); | |
216 } | |
217 else if (! (*p & 0x08)) | |
218 { | |
219 c = ((((p)[0] & 0xF) << 18) | |
220 | (((p)[1] & 0x3F) << 12) | |
221 | (((p)[2] & 0x3F) << 6) | |
222 | ((p)[3] & 0x3F)); | |
223 p += 4; | |
224 } | |
225 else | |
226 { | |
227 c = ((((p)[1] & 0x3F) << 18) | |
228 | (((p)[2] & 0x3F) << 12) | |
229 | (((p)[3] & 0x3F) << 6) | |
230 | ((p)[4] & 0x3F)); | |
231 p += 5; | |
232 } | |
233 | |
234 MAYBE_UNIFY_CHAR (c); | |
235 | |
236 if (len) | |
237 *len = p - saved_p; | |
238 if (advanced) | |
239 *advanced = p; | |
240 return c; | |
241 } | |
242 | |
243 | |
244 /* Translate character C by translation table TABLE. If C is | |
245 negative, translate a character specified by CHARSET and CODE. If | |
246 no translation is found in TABLE, return the untranslated | |
89757
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
247 character. If TABLE is a list, elements are char tables. In this |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
248 case, translace C by all tables. */ |
88363 | 249 |
250 int | |
251 translate_char (table, c) | |
252 Lisp_Object table; | |
253 int c; | |
254 { | |
89757
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
255 if (CHAR_TABLE_P (table)) |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
256 { |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
257 Lisp_Object ch; |
88363 | 258 |
89757
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
259 ch = CHAR_TABLE_REF (table, c); |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
260 if (CHARACTERP (ch)) |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
261 c = XINT (ch); |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
262 } |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
263 else |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
264 { |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
265 for (; CONSP (table); table = XCDR (table)) |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
266 c = translate_char (XCAR (table), c); |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
267 } |
482c15940000
(translate_char): Accept list of translation tables.
Kenichi Handa <handa@m17n.org>
parents:
89538
diff
changeset
|
268 return c; |
88363 | 269 } |
270 | |
271 /* Convert the multibyte character C to unibyte 8-bit character based | |
88983
2d504d707ce6
(unibyte_char_to_multibyte): Refer to
Kenichi Handa <handa@m17n.org>
parents:
88947
diff
changeset
|
272 on the current value of charset_unibyte. If dimension of |
2d504d707ce6
(unibyte_char_to_multibyte): Refer to
Kenichi Handa <handa@m17n.org>
parents:
88947
diff
changeset
|
273 charset_unibyte is more than one, return (C & 0xFF). |
88363 | 274 |
275 The argument REV_TBL is now ignored. It will be removed in the | |
276 future. */ | |
277 | |
278 int | |
279 multibyte_char_to_unibyte (c, rev_tbl) | |
280 int c; | |
281 Lisp_Object rev_tbl; | |
282 { | |
89054
8a6da305ac40
(unibyte_to_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89029
diff
changeset
|
283 struct charset *charset; |
8a6da305ac40
(unibyte_to_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89029
diff
changeset
|
284 unsigned c1; |
88363 | 285 |
89054
8a6da305ac40
(unibyte_to_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89029
diff
changeset
|
286 if (CHAR_BYTE8_P (c)) |
8a6da305ac40
(unibyte_to_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89029
diff
changeset
|
287 return CHAR_TO_BYTE8 (c); |
8a6da305ac40
(unibyte_to_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89029
diff
changeset
|
288 charset = CHARSET_FROM_ID (charset_unibyte); |
8a6da305ac40
(unibyte_to_multibyte_table): New variable.
Kenichi Handa <handa@m17n.org>
parents:
89029
diff
changeset
|
289 c1 = ENCODE_CHAR (charset, c); |
88363 | 290 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF); |
291 } | |
292 | |
90761
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
293 /* Like multibyte_char_to_unibyte, but return -1 if C is not supported |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
294 by charset_unibyte. */ |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
295 |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
296 int |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
297 multibyte_char_to_unibyte_safe (c) |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
298 int c; |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
299 { |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
300 struct charset *charset; |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
301 unsigned c1; |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
302 |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
303 if (CHAR_BYTE8_P (c)) |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
304 return CHAR_TO_BYTE8 (c); |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
305 charset = CHARSET_FROM_ID (charset_unibyte); |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
306 c1 = ENCODE_CHAR (charset, c); |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
307 return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1); |
2951f3d44eba
(multibyte_char_to_unibyte_safe): New function.
Kenichi Handa <handa@m17n.org>
parents:
90424
diff
changeset
|
308 } |
88363 | 309 |
310 DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0, | |
311 doc: /* Return non-nil if OBJECT is a character. */) | |
312 (object, ignore) | |
313 Lisp_Object object, ignore; | |
314 { | |
315 return (CHARACTERP (object) ? Qt : Qnil); | |
316 } | |
317 | |
318 DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0, | |
319 doc: /* Return the character of the maximum code. */) | |
320 () | |
321 { | |
322 return make_number (MAX_CHAR); | |
323 } | |
324 | |
325 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte, | |
326 Sunibyte_char_to_multibyte, 1, 1, 0, | |
94146
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
327 doc: /* Convert the byte CH to multibyte character. */) |
88363 | 328 (ch) |
329 Lisp_Object ch; | |
330 { | |
331 int c; | |
332 struct charset *charset; | |
333 | |
334 CHECK_CHARACTER (ch); | |
335 c = XFASTINT (ch); | |
336 if (c >= 0400) | |
337 error ("Invalid unibyte character: %d", c); | |
88983
2d504d707ce6
(unibyte_char_to_multibyte): Refer to
Kenichi Handa <handa@m17n.org>
parents:
88947
diff
changeset
|
338 charset = CHARSET_FROM_ID (charset_unibyte); |
88363 | 339 c = DECODE_CHAR (charset, c); |
340 if (c < 0) | |
88947
894b8bce3208
(Funibyte_char_to_multibyte): If C can't be decoded
Kenichi Handa <handa@m17n.org>
parents:
88926
diff
changeset
|
341 c = BYTE8_TO_CHAR (XFASTINT (ch)); |
88363 | 342 return make_number (c); |
343 } | |
344 | |
345 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte, | |
346 Smultibyte_char_to_unibyte, 1, 1, 0, | |
94146
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
347 doc: /* Convert the multibyte character CH to a byte. |
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
348 If the multibyte character does not represent a byte, return -1. */) |
88363 | 349 (ch) |
350 Lisp_Object ch; | |
351 { | |
94146
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
352 int cm; |
88363 | 353 |
354 CHECK_CHARACTER (ch); | |
94146
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
355 cm = XFASTINT (ch); |
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
356 if (cm < 256) |
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
357 /* Can't distinguish a byte read from a unibyte buffer from |
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
358 a latin1 char, so let's let it slide. */ |
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
359 return ch; |
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
360 else |
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
361 { |
95856
f13a77e0e34f
* character.h (CHAR_TO_BYTE_SAFE): New macro.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94963
diff
changeset
|
362 int cu = CHAR_TO_BYTE_SAFE (cm); |
94146
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
363 return make_number (cu); |
096ad76b9ccf
(Fmultibyte_char_to_unibyte): Return latin1 chars unchanged.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94114
diff
changeset
|
364 } |
88363 | 365 } |
366 | |
367 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0, | |
368 doc: /* Return 1 regardless of the argument CHAR. | |
94114
327f7207ff7e
(Fchar_bytes, Fchar_width, Fstring_width, Fchar_direction):
Juanma Barranquero <lekktu@gmail.com>
parents:
92499
diff
changeset
|
369 This is now an obsolete function. We keep it just for backward compatibility. |
327f7207ff7e
(Fchar_bytes, Fchar_width, Fstring_width, Fchar_direction):
Juanma Barranquero <lekktu@gmail.com>
parents:
92499
diff
changeset
|
370 usage: (char-bytes CHAR) */) |
88363 | 371 (ch) |
372 Lisp_Object ch; | |
373 { | |
374 CHECK_CHARACTER (ch); | |
375 return make_number (1); | |
376 } | |
377 | |
378 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0, | |
379 doc: /* Return width of CHAR when displayed in the current buffer. | |
380 The width is measured by how many columns it occupies on the screen. | |
94114
327f7207ff7e
(Fchar_bytes, Fchar_width, Fstring_width, Fchar_direction):
Juanma Barranquero <lekktu@gmail.com>
parents:
92499
diff
changeset
|
381 Tab is taken to occupy `tab-width' columns. |
327f7207ff7e
(Fchar_bytes, Fchar_width, Fstring_width, Fchar_direction):
Juanma Barranquero <lekktu@gmail.com>
parents:
92499
diff
changeset
|
382 usage: (char-width CHAR) */) |
88363 | 383 (ch) |
384 Lisp_Object ch; | |
385 { | |
386 Lisp_Object disp; | |
387 int c, width; | |
388 struct Lisp_Char_Table *dp = buffer_display_table (); | |
389 | |
390 CHECK_CHARACTER (ch); | |
391 c = XINT (ch); | |
392 | |
393 /* Get the way the display table would display it. */ | |
394 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil; | |
395 | |
396 if (VECTORP (disp)) | |
397 width = ASIZE (disp); | |
398 else | |
399 width = CHAR_WIDTH (c); | |
400 | |
401 return make_number (width); | |
402 } | |
403 | |
404 /* Return width of string STR of length LEN when displayed in the | |
405 current buffer. The width is measured by how many columns it | |
406 occupies on the screen. If PRECISION > 0, return the width of | |
407 longest substring that doesn't exceed PRECISION, and set number of | |
408 characters and bytes of the substring in *NCHARS and *NBYTES | |
409 respectively. */ | |
410 | |
88427
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
411 int |
88363 | 412 c_string_width (str, len, precision, nchars, nbytes) |
89483 | 413 const unsigned char *str; |
88363 | 414 int precision, *nchars, *nbytes; |
415 { | |
416 int i = 0, i_byte = 0; | |
417 int width = 0; | |
418 struct Lisp_Char_Table *dp = buffer_display_table (); | |
419 | |
420 while (i_byte < len) | |
421 { | |
422 int bytes, thiswidth; | |
423 Lisp_Object val; | |
424 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); | |
425 | |
426 if (dp) | |
427 { | |
428 val = DISP_CHAR_VECTOR (dp, c); | |
429 if (VECTORP (val)) | |
430 thiswidth = XVECTOR (val)->size; | |
431 else | |
432 thiswidth = CHAR_WIDTH (c); | |
433 } | |
434 else | |
435 { | |
436 thiswidth = CHAR_WIDTH (c); | |
437 } | |
438 | |
439 if (precision > 0 | |
440 && (width + thiswidth > precision)) | |
441 { | |
442 *nchars = i; | |
443 *nbytes = i_byte; | |
444 return width; | |
445 } | |
446 i++; | |
447 i_byte += bytes; | |
448 width += thiswidth; | |
449 } | |
450 | |
451 if (precision > 0) | |
452 { | |
453 *nchars = i; | |
454 *nbytes = i_byte; | |
455 } | |
456 | |
457 return width; | |
458 } | |
459 | |
88427
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
460 /* Return width of string STR of length LEN when displayed in the |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
461 current buffer. The width is measured by how many columns it |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
462 occupies on the screen. */ |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
463 |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
464 int |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
465 strwidth (str, len) |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
466 unsigned char *str; |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
467 int len; |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
468 { |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
469 return c_string_width (str, len, -1, NULL, NULL); |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
470 } |
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
471 |
88363 | 472 /* Return width of Lisp string STRING when displayed in the current |
473 buffer. The width is measured by how many columns it occupies on | |
474 the screen while paying attention to compositions. If PRECISION > | |
475 0, return the width of longest substring that doesn't exceed | |
476 PRECISION, and set number of characters and bytes of the substring | |
477 in *NCHARS and *NBYTES respectively. */ | |
478 | |
479 int | |
480 lisp_string_width (string, precision, nchars, nbytes) | |
481 Lisp_Object string; | |
482 int precision, *nchars, *nbytes; | |
483 { | |
89483 | 484 int len = SCHARS (string); |
90256
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
485 /* This set multibyte to 0 even if STRING is multibyte when it |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
486 contains only ascii and eight-bit-graphic, but that's |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
487 intentional. */ |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
488 int multibyte = len < SBYTES (string); |
89483 | 489 unsigned char *str = SDATA (string); |
88363 | 490 int i = 0, i_byte = 0; |
491 int width = 0; | |
492 struct Lisp_Char_Table *dp = buffer_display_table (); | |
493 | |
494 while (i < len) | |
495 { | |
496 int chars, bytes, thiswidth; | |
497 Lisp_Object val; | |
498 int cmp_id; | |
89511
bae1254b8517
(lisp_string_width): Make ignore and end EMACS_INT.
Dave Love <fx@gnu.org>
parents:
89483
diff
changeset
|
499 EMACS_INT ignore, end; |
88363 | 500 |
501 if (find_composition (i, -1, &ignore, &end, &val, string) | |
502 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string)) | |
503 >= 0)) | |
504 { | |
505 thiswidth = composition_table[cmp_id]->width; | |
506 chars = end - i; | |
507 bytes = string_char_to_byte (string, end) - i_byte; | |
508 } | |
509 else | |
510 { | |
90256
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
511 int c; |
88363 | 512 |
90256
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
513 if (multibyte) |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
514 c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
515 else |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
516 c = str[i_byte], bytes = 1; |
88363 | 517 chars = 1; |
90256
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
518 if (dp) |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
519 { |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
520 val = DISP_CHAR_VECTOR (dp, c); |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
521 if (VECTORP (val)) |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
522 thiswidth = XVECTOR (val)->size; |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
523 else |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
524 thiswidth = CHAR_WIDTH (c); |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
525 } |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
526 else |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
527 { |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
528 thiswidth = CHAR_WIDTH (c); |
d4755c5aa57d
(lisp_string_width): Check multibyteness of STRING.
Kenichi Handa <handa@m17n.org>
parents:
90159
diff
changeset
|
529 } |
88363 | 530 } |
531 | |
532 if (precision > 0 | |
533 && (width + thiswidth > precision)) | |
534 { | |
535 *nchars = i; | |
536 *nbytes = i_byte; | |
537 return width; | |
538 } | |
539 i += chars; | |
540 i_byte += bytes; | |
541 width += thiswidth; | |
542 } | |
543 | |
544 if (precision > 0) | |
545 { | |
546 *nchars = i; | |
547 *nbytes = i_byte; | |
548 } | |
549 | |
550 return width; | |
551 } | |
552 | |
553 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0, | |
554 doc: /* Return width of STRING when displayed in the current buffer. | |
555 Width is measured by how many columns it occupies on the screen. | |
556 When calculating width of a multibyte character in STRING, | |
557 only the base leading-code is considered; the validity of | |
558 the following bytes is not checked. Tabs in STRING are always | |
94114
327f7207ff7e
(Fchar_bytes, Fchar_width, Fstring_width, Fchar_direction):
Juanma Barranquero <lekktu@gmail.com>
parents:
92499
diff
changeset
|
559 taken to occupy `tab-width' columns. |
327f7207ff7e
(Fchar_bytes, Fchar_width, Fstring_width, Fchar_direction):
Juanma Barranquero <lekktu@gmail.com>
parents:
92499
diff
changeset
|
560 usage: (string-width STRING) */) |
88363 | 561 (str) |
562 Lisp_Object str; | |
563 { | |
564 Lisp_Object val; | |
565 | |
566 CHECK_STRING (str); | |
567 XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL)); | |
568 return val; | |
569 } | |
570 | |
571 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0, | |
572 doc: /* Return the direction of CHAR. | |
94114
327f7207ff7e
(Fchar_bytes, Fchar_width, Fstring_width, Fchar_direction):
Juanma Barranquero <lekktu@gmail.com>
parents:
92499
diff
changeset
|
573 The returned value is 0 for left-to-right and 1 for right-to-left. |
327f7207ff7e
(Fchar_bytes, Fchar_width, Fstring_width, Fchar_direction):
Juanma Barranquero <lekktu@gmail.com>
parents:
92499
diff
changeset
|
574 usage: (char-direction CHAR) */) |
88363 | 575 (ch) |
576 Lisp_Object ch; | |
577 { | |
578 int c; | |
579 | |
580 CHECK_CHARACTER (ch); | |
581 c = XINT (ch); | |
582 return CHAR_TABLE_REF (Vchar_direction_table, c); | |
583 } | |
584 | |
585 /* Return the number of characters in the NBYTES bytes at PTR. | |
586 This works by looking at the contents and checking for multibyte | |
587 sequences while assuming that there's no invalid sequence. | |
588 However, if the current buffer has enable-multibyte-characters = | |
589 nil, we treat each byte as a character. */ | |
590 | |
91807
507bcfb4342c
* coding.c (coding_set_destination): Use BEG_BYTE rather than hardcoding 1.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91443
diff
changeset
|
591 EMACS_INT |
88363 | 592 chars_in_text (ptr, nbytes) |
89483 | 593 const unsigned char *ptr; |
91807
507bcfb4342c
* coding.c (coding_set_destination): Use BEG_BYTE rather than hardcoding 1.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91443
diff
changeset
|
594 EMACS_INT nbytes; |
88363 | 595 { |
596 /* current_buffer is null at early stages of Emacs initialization. */ | |
597 if (current_buffer == 0 | |
598 || NILP (current_buffer->enable_multibyte_characters)) | |
599 return nbytes; | |
600 | |
601 return multibyte_chars_in_text (ptr, nbytes); | |
602 } | |
603 | |
604 /* Return the number of characters in the NBYTES bytes at PTR. | |
605 This works by looking at the contents and checking for multibyte | |
606 sequences while assuming that there's no invalid sequence. It | |
607 ignores enable-multibyte-characters. */ | |
608 | |
91807
507bcfb4342c
* coding.c (coding_set_destination): Use BEG_BYTE rather than hardcoding 1.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91443
diff
changeset
|
609 EMACS_INT |
88363 | 610 multibyte_chars_in_text (ptr, nbytes) |
89483 | 611 const unsigned char *ptr; |
91807
507bcfb4342c
* coding.c (coding_set_destination): Use BEG_BYTE rather than hardcoding 1.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
91443
diff
changeset
|
612 EMACS_INT nbytes; |
88363 | 613 { |
89483 | 614 const unsigned char *endp = ptr + nbytes; |
88363 | 615 int chars = 0; |
616 | |
617 while (ptr < endp) | |
618 { | |
619 int len = MULTIBYTE_LENGTH (ptr, endp); | |
620 | |
621 if (len == 0) | |
622 abort (); | |
623 ptr += len; | |
624 chars++; | |
625 } | |
626 | |
627 return chars; | |
628 } | |
629 | |
630 /* Parse unibyte text at STR of LEN bytes as a multibyte text, count | |
631 characters and bytes in it, and store them in *NCHARS and *NBYTES | |
632 respectively. On counting bytes, pay attention to that 8-bit | |
633 characters not constructing a valid multibyte sequence are | |
634 represented by 2-byte in a multibyte text. */ | |
635 | |
636 void | |
637 parse_str_as_multibyte (str, len, nchars, nbytes) | |
89483 | 638 const unsigned char *str; |
88363 | 639 int len, *nchars, *nbytes; |
640 { | |
89483 | 641 const unsigned char *endp = str + len; |
88363 | 642 int n, chars = 0, bytes = 0; |
643 | |
644 if (len >= MAX_MULTIBYTE_LENGTH) | |
645 { | |
89483 | 646 const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; |
88363 | 647 while (str < adjusted_endp) |
648 { | |
649 if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0) | |
650 str += n, bytes += n; | |
651 else | |
652 str++, bytes += 2; | |
653 chars++; | |
654 } | |
655 } | |
656 while (str < endp) | |
657 { | |
658 if ((n = MULTIBYTE_LENGTH (str, endp)) > 0) | |
659 str += n, bytes += n; | |
660 else | |
661 str++, bytes += 2; | |
662 chars++; | |
663 } | |
664 | |
665 *nchars = chars; | |
666 *nbytes = bytes; | |
667 return; | |
668 } | |
669 | |
670 /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text. | |
671 It actually converts only such 8-bit characters that don't contruct | |
672 a multibyte sequence to multibyte forms of Latin-1 characters. If | |
673 NCHARS is nonzero, set *NCHARS to the number of characters in the | |
674 text. It is assured that we can use LEN bytes at STR as a work | |
675 area and that is enough. Return the number of bytes of the | |
676 resulting text. */ | |
677 | |
678 int | |
679 str_as_multibyte (str, len, nbytes, nchars) | |
680 unsigned char *str; | |
681 int len, nbytes, *nchars; | |
682 { | |
683 unsigned char *p = str, *endp = str + nbytes; | |
684 unsigned char *to; | |
685 int chars = 0; | |
686 int n; | |
687 | |
688 if (nbytes >= MAX_MULTIBYTE_LENGTH) | |
689 { | |
690 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; | |
691 while (p < adjusted_endp | |
692 && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0) | |
693 p += n, chars++; | |
694 } | |
695 while ((n = MULTIBYTE_LENGTH (p, endp)) > 0) | |
696 p += n, chars++; | |
697 if (nchars) | |
698 *nchars = chars; | |
699 if (p == endp) | |
700 return nbytes; | |
701 | |
702 to = p; | |
703 nbytes = endp - p; | |
704 endp = str + len; | |
705 safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes); | |
706 p = endp - nbytes; | |
707 | |
708 if (nbytes >= MAX_MULTIBYTE_LENGTH) | |
709 { | |
710 unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; | |
711 while (p < adjusted_endp) | |
712 { | |
713 if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0) | |
714 { | |
715 while (n--) | |
716 *to++ = *p++; | |
717 } | |
718 else | |
719 { | |
720 int c = *p++; | |
721 c = BYTE8_TO_CHAR (c); | |
722 to += CHAR_STRING (c, to); | |
723 } | |
724 } | |
725 chars++; | |
726 } | |
727 while (p < endp) | |
728 { | |
729 if ((n = MULTIBYTE_LENGTH (p, endp)) > 0) | |
730 { | |
731 while (n--) | |
732 *to++ = *p++; | |
89483 | 733 } |
88363 | 734 else |
735 { | |
736 int c = *p++; | |
737 c = BYTE8_TO_CHAR (c); | |
738 to += CHAR_STRING (c, to); | |
739 } | |
740 chars++; | |
741 } | |
742 if (nchars) | |
743 *nchars = chars; | |
744 return (to - str); | |
745 } | |
746 | |
747 /* Parse unibyte string at STR of LEN bytes, and return the number of | |
748 bytes it may ocupy when converted to multibyte string by | |
749 `str_to_multibyte'. */ | |
750 | |
751 int | |
752 parse_str_to_multibyte (str, len) | |
753 unsigned char *str; | |
754 int len; | |
755 { | |
756 unsigned char *endp = str + len; | |
757 int bytes; | |
758 | |
759 for (bytes = 0; str < endp; str++) | |
760 bytes += (*str < 0x80) ? 1 : 2; | |
761 return bytes; | |
762 } | |
763 | |
764 | |
765 /* Convert unibyte text at STR of NBYTES bytes to a multibyte text | |
766 that contains the same single-byte characters. It actually | |
767 converts all 8-bit characters to multibyte forms. It is assured | |
768 that we can use LEN bytes at STR as a work area and that is | |
769 enough. */ | |
770 | |
771 int | |
772 str_to_multibyte (str, len, bytes) | |
773 unsigned char *str; | |
774 int len, bytes; | |
775 { | |
776 unsigned char *p = str, *endp = str + bytes; | |
777 unsigned char *to; | |
778 | |
779 while (p < endp && *p < 0x80) p++; | |
780 if (p == endp) | |
781 return bytes; | |
782 to = p; | |
783 bytes = endp - p; | |
784 endp = str + len; | |
785 safe_bcopy ((char *) p, (char *) (endp - bytes), bytes); | |
786 p = endp - bytes; | |
89483 | 787 while (p < endp) |
88363 | 788 { |
789 int c = *p++; | |
790 | |
791 if (c >= 0x80) | |
792 c = BYTE8_TO_CHAR (c); | |
793 to += CHAR_STRING (c, to); | |
794 } | |
795 return (to - str); | |
796 } | |
797 | |
798 /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It | |
799 actually converts characters in the range 0x80..0xFF to | |
800 unibyte. */ | |
801 | |
802 int | |
803 str_as_unibyte (str, bytes) | |
804 unsigned char *str; | |
805 int bytes; | |
806 { | |
88878
a0ae1a5876c7
(Vscript_alist): New variable.
Kenichi Handa <handa@m17n.org>
parents:
88853
diff
changeset
|
807 const unsigned char *p = str, *endp = str + bytes; |
a0ae1a5876c7
(Vscript_alist): New variable.
Kenichi Handa <handa@m17n.org>
parents:
88853
diff
changeset
|
808 unsigned char *to; |
88363 | 809 int c, len; |
810 | |
811 while (p < endp) | |
812 { | |
813 c = *p; | |
814 len = BYTES_BY_CHAR_HEAD (c); | |
815 if (CHAR_BYTE8_HEAD_P (c)) | |
816 break; | |
817 p += len; | |
818 } | |
88878
a0ae1a5876c7
(Vscript_alist): New variable.
Kenichi Handa <handa@m17n.org>
parents:
88853
diff
changeset
|
819 to = str + (p - str); |
89483 | 820 while (p < endp) |
88363 | 821 { |
822 c = *p; | |
823 len = BYTES_BY_CHAR_HEAD (c); | |
824 if (CHAR_BYTE8_HEAD_P (c)) | |
825 { | |
826 c = STRING_CHAR_ADVANCE (p); | |
827 *to++ = CHAR_TO_BYTE8 (c); | |
828 } | |
829 else | |
830 { | |
831 while (len--) *to++ = *p++; | |
832 } | |
833 } | |
834 return (to - str); | |
835 } | |
836 | |
96249
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
837 /* Convert eight-bit chars in SRC (in multibyte form) to the |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
838 corresponding byte and store in DST. CHARS is the number of |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
839 characters in SRC. The value is the number of bytes stored in DST. |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
840 Usually, the value is the same as CHARS, but is less than it if SRC |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
841 contains a non-ASCII, non-eight-bit characater. If ACCEPT_LATIN_1 |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
842 is nonzero, a Latin-1 character is accepted and converted to a byte |
96501
919775e9f3bf
(str_to_unibyte): Modify the comment.
Kenichi Handa <handa@m17n.org>
parents:
96249
diff
changeset
|
843 of that character code. |
919775e9f3bf
(str_to_unibyte): Modify the comment.
Kenichi Handa <handa@m17n.org>
parents:
96249
diff
changeset
|
844 Note: Currently the arg ACCEPT_LATIN_1 is not used. */ |
96249
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
845 |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
846 EMACS_INT |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
847 str_to_unibyte (src, dst, chars, accept_latin_1) |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
848 const unsigned char *src; |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
849 unsigned char *dst; |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
850 EMACS_INT chars; |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
851 int accept_latin_1; |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
852 { |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
853 EMACS_INT i; |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
854 |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
855 for (i = 0; i < chars; i++) |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
856 { |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
857 int c = STRING_CHAR_ADVANCE (src); |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
858 |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
859 if (CHAR_BYTE8_P (c)) |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
860 c = CHAR_TO_BYTE8 (c); |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
861 else if (! ASCII_CHAR_P (c) |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
862 && (! accept_latin_1 || c >= 0x100)) |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
863 return i; |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
864 *dst++ = c; |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
865 } |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
866 return i; |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
867 } |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
868 |
71c216bc3b71
(str_to_unibyte): New function.
Kenichi Handa <handa@m17n.org>
parents:
95856
diff
changeset
|
869 |
88363 | 870 int |
871 string_count_byte8 (string) | |
872 Lisp_Object string; | |
873 { | |
874 int multibyte = STRING_MULTIBYTE (string); | |
89483 | 875 int nbytes = SBYTES (string); |
876 unsigned char *p = SDATA (string); | |
88363 | 877 unsigned char *pend = p + nbytes; |
878 int count = 0; | |
879 int c, len; | |
880 | |
881 if (multibyte) | |
882 while (p < pend) | |
883 { | |
884 c = *p; | |
885 len = BYTES_BY_CHAR_HEAD (c); | |
886 | |
887 if (CHAR_BYTE8_HEAD_P (c)) | |
888 count++; | |
889 p += len; | |
890 } | |
891 else | |
892 while (p < pend) | |
893 { | |
894 if (*p++ >= 0x80) | |
895 count++; | |
896 } | |
897 return count; | |
898 } | |
899 | |
900 | |
901 Lisp_Object | |
902 string_escape_byte8 (string) | |
903 Lisp_Object string; | |
904 { | |
89483 | 905 int nchars = SCHARS (string); |
906 int nbytes = SBYTES (string); | |
88363 | 907 int multibyte = STRING_MULTIBYTE (string); |
908 int byte8_count; | |
88878
a0ae1a5876c7
(Vscript_alist): New variable.
Kenichi Handa <handa@m17n.org>
parents:
88853
diff
changeset
|
909 const unsigned char *src, *src_end; |
a0ae1a5876c7
(Vscript_alist): New variable.
Kenichi Handa <handa@m17n.org>
parents:
88853
diff
changeset
|
910 unsigned char *dst; |
88363 | 911 Lisp_Object val; |
912 int c, len; | |
913 | |
914 if (multibyte && nchars == nbytes) | |
915 return string; | |
916 | |
917 byte8_count = string_count_byte8 (string); | |
918 | |
919 if (byte8_count == 0) | |
920 return string; | |
921 | |
922 if (multibyte) | |
923 /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */ | |
88526
6bac4ae1c326
(string_escape_byte8): Make multibyte string with correct size.
Kenichi Handa <handa@m17n.org>
parents:
88427
diff
changeset
|
924 val = make_uninit_multibyte_string (nchars + byte8_count * 3, |
88363 | 925 nbytes + byte8_count * 2); |
926 else | |
927 /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */ | |
928 val = make_uninit_string (nbytes + byte8_count * 3); | |
929 | |
89483 | 930 src = SDATA (string); |
88363 | 931 src_end = src + nbytes; |
89483 | 932 dst = SDATA (val); |
88363 | 933 if (multibyte) |
934 while (src < src_end) | |
935 { | |
936 c = *src; | |
937 len = BYTES_BY_CHAR_HEAD (c); | |
938 | |
939 if (CHAR_BYTE8_HEAD_P (c)) | |
940 { | |
941 c = STRING_CHAR_ADVANCE (src); | |
942 c = CHAR_TO_BYTE8 (c); | |
88427
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
943 sprintf ((char *) dst, "\\%03o", c); |
88363 | 944 dst += 4; |
945 } | |
946 else | |
947 while (len--) *dst++ = *src++; | |
948 } | |
949 else | |
950 while (src < src_end) | |
951 { | |
952 c = *src++; | |
953 if (c >= 0x80) | |
954 { | |
88427
1a3aec316071
(c_string_width): Add return type `int'.
Kenichi Handa <handa@m17n.org>
parents:
88363
diff
changeset
|
955 sprintf ((char *) dst, "\\%03o", c); |
88363 | 956 dst += 4; |
957 } | |
958 else | |
959 *dst++ = c; | |
960 } | |
961 return val; | |
962 } | |
963 | |
964 | |
89483 | 965 DEFUN ("string", Fstring, Sstring, 0, MANY, 0, |
88363 | 966 doc: /* |
88640 | 967 Concatenate all the argument characters and make the result a string. |
968 usage: (string &rest CHARACTERS) */) | |
88363 | 969 (n, args) |
970 int n; | |
971 Lisp_Object *args; | |
972 { | |
973 int i; | |
974 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n); | |
975 unsigned char *p = buf; | |
976 int c; | |
977 | |
978 for (i = 0; i < n; i++) | |
979 { | |
980 CHECK_CHARACTER (args[i]); | |
981 c = XINT (args[i]); | |
982 p += CHAR_STRING (c, p); | |
983 } | |
984 | |
985 return make_string_from_bytes ((char *) buf, n, p - buf); | |
986 } | |
987 | |
90841
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
988 DEFUN ("unibyte-string", Funibyte_string, Sunibyte_string, 0, MANY, 0, |
90942
ba60bd18bc0b
(Funibyte_string): Add "usage: ..." to the docstring.
Kenichi Handa <handa@m17n.org>
parents:
90841
diff
changeset
|
989 doc: /* Concatenate all the argument bytes and make the result a unibyte string. |
ba60bd18bc0b
(Funibyte_string): Add "usage: ..." to the docstring.
Kenichi Handa <handa@m17n.org>
parents:
90841
diff
changeset
|
990 usage: (unibyte-string &rest BYTES) */) |
90841
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
991 (n, args) |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
992 int n; |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
993 Lisp_Object *args; |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
994 { |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
995 int i; |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
996 unsigned char *buf = (unsigned char *) alloca (n); |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
997 unsigned char *p = buf; |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
998 unsigned c; |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
999 |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1000 for (i = 0; i < n; i++) |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1001 { |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1002 CHECK_NATNUM (args[i]); |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1003 c = XFASTINT (args[i]); |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1004 if (c >= 256) |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1005 args_out_of_range_3 (args[i], make_number (0), make_number (255)); |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1006 *p++ = c; |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1007 } |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1008 |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1009 return make_string_from_bytes ((char *) buf, n, p - buf); |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1010 } |
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1011 |
92494
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1012 DEFUN ("char-resolve-modifers", Fchar_resolve_modifiers, |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1013 Schar_resolve_modifiers, 1, 1, 0, |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1014 doc: /* Resolve modifiers in the character CHAR. |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1015 The value is a character with modifiers resolved into the character |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1016 code. Unresolved modifiers are kept in the value. |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1017 usage: (char-resolve-modifers CHAR) */) |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1018 (character) |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1019 Lisp_Object character; |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1020 { |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1021 int c; |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1022 |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1023 CHECK_NUMBER (character); |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1024 c = XINT (character); |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1025 return make_number (char_resolve_modifier_mask (c)); |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1026 } |
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1027 |
88363 | 1028 void |
1029 init_character_once () | |
1030 { | |
1031 } | |
1032 | |
1033 #ifdef emacs | |
1034 | |
1035 void | |
1036 syms_of_character () | |
1037 { | |
1038 DEFSYM (Qcharacterp, "characterp"); | |
1039 DEFSYM (Qauto_fill_chars, "auto-fill-chars"); | |
1040 | |
1041 staticpro (&Vchar_unify_table); | |
1042 Vchar_unify_table = Qnil; | |
1043 | |
1044 defsubr (&Smax_char); | |
1045 defsubr (&Scharacterp); | |
1046 defsubr (&Sunibyte_char_to_multibyte); | |
1047 defsubr (&Smultibyte_char_to_unibyte); | |
1048 defsubr (&Schar_bytes); | |
1049 defsubr (&Schar_width); | |
1050 defsubr (&Sstring_width); | |
1051 defsubr (&Schar_direction); | |
1052 defsubr (&Sstring); | |
90841
fa02baec53ff
(Funibyte_string): New function.
Kenichi Handa <handa@m17n.org>
parents:
90797
diff
changeset
|
1053 defsubr (&Sunibyte_string); |
92494
b5a82292599c
(char_resolve_modifier_mask): Fix previous change
Kenichi Handa <handa@m17n.org>
parents:
92483
diff
changeset
|
1054 defsubr (&Schar_resolve_modifiers); |
88363 | 1055 |
1056 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector, | |
1057 doc: /* | |
88926
f026b7e4fc61
(syms_of_character) <translation-table-vector>: Doc
Dave Love <fx@gnu.org>
parents:
88913
diff
changeset
|
1058 Vector recording all translation tables ever defined. |
f026b7e4fc61
(syms_of_character) <translation-table-vector>: Doc
Dave Love <fx@gnu.org>
parents:
88913
diff
changeset
|
1059 Each element is a pair (SYMBOL . TABLE) relating the table to the |
f026b7e4fc61
(syms_of_character) <translation-table-vector>: Doc
Dave Love <fx@gnu.org>
parents:
88913
diff
changeset
|
1060 symbol naming it. The ID of a translation table is an index into this vector. */); |
88363 | 1061 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil); |
1062 | |
1063 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars, | |
1064 doc: /* | |
1065 A char-table for characters which invoke auto-filling. | |
1066 Such characters have value t in this table. */); | |
1067 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil); | |
88853
503c60238368
(syms_of_character): Fix CHAR_TABLE_SET call.
Dave Love <fx@gnu.org>
parents:
88743
diff
changeset
|
1068 CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt); |
503c60238368
(syms_of_character): Fix CHAR_TABLE_SET call.
Dave Love <fx@gnu.org>
parents:
88743
diff
changeset
|
1069 CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt); |
88363 | 1070 |
1071 DEFVAR_LISP ("char-width-table", &Vchar_width_table, | |
1072 doc: /* | |
1073 A char-table for width (columns) of each character. */); | |
1074 Vchar_width_table = Fmake_char_table (Qnil, make_number (1)); | |
88743
4570f1a4c391
(syms_of_character): Setup Vchar_width_table for
Kenichi Handa <handa@m17n.org>
parents:
88716
diff
changeset
|
1075 char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4)); |
4570f1a4c391
(syms_of_character): Setup Vchar_width_table for
Kenichi Handa <handa@m17n.org>
parents:
88716
diff
changeset
|
1076 char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR, |
4570f1a4c391
(syms_of_character): Setup Vchar_width_table for
Kenichi Handa <handa@m17n.org>
parents:
88716
diff
changeset
|
1077 make_number (4)); |
88363 | 1078 |
1079 DEFVAR_LISP ("char-direction-table", &Vchar_direction_table, | |
1080 doc: /* A char-table for direction of each character. */); | |
1081 Vchar_direction_table = Fmake_char_table (Qnil, make_number (1)); | |
1082 | |
1083 DEFVAR_LISP ("printable-chars", &Vprintable_chars, | |
1084 doc: /* A char-table for each printable character. */); | |
88716
f617b1bf301b
(syms_of_character): Set the default value of
Kenichi Handa <handa@m17n.org>
parents:
88640
diff
changeset
|
1085 Vprintable_chars = Fmake_char_table (Qnil, Qnil); |
89029
ad8f56efa7c6
(syms_of_character): Setup Vprintable_chars.
Kenichi Handa <handa@m17n.org>
parents:
89020
diff
changeset
|
1086 Fset_char_table_range (Vprintable_chars, |
ad8f56efa7c6
(syms_of_character): Setup Vprintable_chars.
Kenichi Handa <handa@m17n.org>
parents:
89020
diff
changeset
|
1087 Fcons (make_number (32), make_number (126)), Qt); |
ad8f56efa7c6
(syms_of_character): Setup Vprintable_chars.
Kenichi Handa <handa@m17n.org>
parents:
89020
diff
changeset
|
1088 Fset_char_table_range (Vprintable_chars, |
ad8f56efa7c6
(syms_of_character): Setup Vprintable_chars.
Kenichi Handa <handa@m17n.org>
parents:
89020
diff
changeset
|
1089 Fcons (make_number (160), |
ad8f56efa7c6
(syms_of_character): Setup Vprintable_chars.
Kenichi Handa <handa@m17n.org>
parents:
89020
diff
changeset
|
1090 make_number (MAX_5_BYTE_CHAR)), Qt); |
88878
a0ae1a5876c7
(Vscript_alist): New variable.
Kenichi Handa <handa@m17n.org>
parents:
88853
diff
changeset
|
1091 |
88913
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1092 DEFVAR_LISP ("char-script-table", &Vchar_script_table, |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1093 doc: /* Char table of script symbols. |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1094 It has one extra slot whose value is a list of script symbols. */); |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1095 |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1096 /* Intern this now in case it isn't already done. |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1097 Setting this variable twice is harmless. |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1098 But don't staticpro it here--that is done in alloc.c. */ |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1099 Qchar_table_extra_slots = intern ("char-table-extra-slots"); |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1100 DEFSYM (Qchar_script_table, "char-script-table"); |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1101 Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1)); |
76074dea1258
(Vscript_alist): This variable deleted.
Kenichi Handa <handa@m17n.org>
parents:
88878
diff
changeset
|
1102 Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil); |
90403
81f1bbab281f
(Vscript_representative_chars): New variable.
Kenichi Handa <handa@m17n.org>
parents:
90256
diff
changeset
|
1103 |
81f1bbab281f
(Vscript_representative_chars): New variable.
Kenichi Handa <handa@m17n.org>
parents:
90256
diff
changeset
|
1104 DEFVAR_LISP ("script-representative-chars", &Vscript_representative_chars, |
81f1bbab281f
(Vscript_representative_chars): New variable.
Kenichi Handa <handa@m17n.org>
parents:
90256
diff
changeset
|
1105 doc: /* Alist of scripts vs the representative characters. */); |
81f1bbab281f
(Vscript_representative_chars): New variable.
Kenichi Handa <handa@m17n.org>
parents:
90256
diff
changeset
|
1106 Vscript_representative_chars = Qnil; |
88363 | 1107 } |
1108 | |
1109 #endif /* emacs */ | |
89911 | 1110 |
1111 /* arch-tag: b6665960-3c3d-4184-85cd-af4318197999 | |
1112 (do not change this comment) */ |