comparison src/charset.c @ 26844:9e9a0735c824

In this entry, just `Modified' means that codes for a composite character is deleted. (Qcomposition) (leading_code_composition) (charset_composition) (min_composite_char) (cmpchar_table) (cmpchar_table_size) (n_cmpchars): Deleted. (SPLIT_COMPOSITE_SEQ): Deleted. (SPLIT_MULTIBYTE_SEQ): Modified. (char_to_string): Renamed from non_ascii_char_to_string. Modified. (string_to_char): Renamed from string_to_non_ascii_char. (split_string): Renamed from split_non_ascii_string. (char_printable_p) (Fsplit_char) (Ffind_charset_region) (Ffind_charset_string) (char_valid_p) (char_bytes) (Fchar_width) (strwidth): Modified. (find_charset_in_str): Argument CMPCHARP deleted. Modified. (Fstring): Adjusted for the change of CHAR_STRING. Modified. (hash_string) (CMPCHAR_HASH_TABLE_SIZE) (cmpchar_hash_table) (CMPCHAR_HASH_SIZE) (CMPCHAR_HASH_USED) (CMPCHAR_HASH_CMPCHAR_ID) (str_cmpchar_id) (cmpchar_component) (Fcmpcharp) (Fcmpchar_component) (Fcmpchar_cmp_rule) (Fcmpchar_cmp_rule_p) (Fcmpchar_cmp_count): Deleted. (Fcompose_string): Implemented by Emacs Lisp in composite.el. (init_charset_once): Modified. (syms_of_charset): Modified.
author Kenichi Handa <handa@m17n.org>
date Wed, 15 Dec 1999 00:04:59 +0000
parents 493539fc5bb2
children 5409f70f1c23
comparison
equal deleted inserted replaced
26843:0aadeca4a4a7 26844:9e9a0735c824
41 41
42 #include "mulelib.h" 42 #include "mulelib.h"
43 43
44 #endif /* emacs */ 44 #endif /* emacs */
45 45
46 Lisp_Object Qcharset, Qascii, Qcomposition; 46 Lisp_Object Qcharset, Qascii;
47 Lisp_Object Qunknown; 47 Lisp_Object Qunknown;
48 48
49 /* Declaration of special leading-codes. */ 49 /* Declaration of special leading-codes. */
50 int leading_code_composition; /* for composite characters */
51 int leading_code_private_11; /* for private DIMENSION1 of 1-column */ 50 int leading_code_private_11; /* for private DIMENSION1 of 1-column */
52 int leading_code_private_12; /* for private DIMENSION1 of 2-column */ 51 int leading_code_private_12; /* for private DIMENSION1 of 2-column */
53 int leading_code_private_21; /* for private DIMENSION2 of 1-column */ 52 int leading_code_private_21; /* for private DIMENSION2 of 1-column */
54 int leading_code_private_22; /* for private DIMENSION2 of 2-column */ 53 int leading_code_private_22; /* for private DIMENSION2 of 2-column */
55 54
56 /* Declaration of special charsets. */ 55 /* Declaration of special charsets. */
57 int charset_ascii; /* ASCII */ 56 int charset_ascii; /* ASCII */
58 int charset_composition; /* for a composite character */
59 int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ 57 int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
60 int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ 58 int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
61 int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ 59 int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
62 int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */ 60 int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
63 int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */ 61 int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
64 int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */ 62 int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
65 int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ 63 int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
66 64
67 int min_composite_char;
68
69 Lisp_Object Qcharset_table; 65 Lisp_Object Qcharset_table;
70 66
71 /* A char-table containing information of each character set. */ 67 /* A char-table containing information of each character set. */
72 Lisp_Object Vcharset_table; 68 Lisp_Object Vcharset_table;
73 69
92 int width_by_char_head[256]; 88 int width_by_char_head[256];
93 89
94 /* Mapping table from ISO2022's charset (specified by DIMENSION, 90 /* Mapping table from ISO2022's charset (specified by DIMENSION,
95 CHARS, and FINAL-CHAR) to Emacs' charset. */ 91 CHARS, and FINAL-CHAR) to Emacs' charset. */
96 int iso_charset_table[2][2][128]; 92 int iso_charset_table[2][2][128];
97
98 /* Table of pointers to the structure `cmpchar_info' indexed by
99 CMPCHAR-ID. */
100 struct cmpchar_info **cmpchar_table;
101 /* The current size of `cmpchar_table'. */
102 static int cmpchar_table_size;
103 /* Number of the current composite characters. */
104 int n_cmpchars;
105 93
106 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */ 94 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
107 unsigned char *_fetch_multibyte_char_p; 95 unsigned char *_fetch_multibyte_char_p;
108 int _fetch_multibyte_char_len; 96 int _fetch_multibyte_char_len;
109 97
125 int c; 113 int c;
126 { 114 {
127 error ("Invalid character: 0%o, %d, 0x%x", c, c, c); 115 error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
128 } 116 }
129 117
130 /* Parse composite character string STR of length LENGTH (>= 2) and 118 /* Parse a multibyte character string STR of length LENGTH (>= 2) set
131 set BYTES, CHARSET, C1, and C2 as below. 119 BYTES to the length of actual multibyte sequence, CHARSET, C1, and
132 120 C2 to such values that MAKE_CHAR can make the multibyte character
133 It is assumed that *STR is LEADING_CODE_COMPOSITION and the 121 from them.
122
123 It is assumed that *STR is one of base leading codes and the
134 following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P. 124 following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P.
135
136 If there is a valid composite character, set CHARSET, C1, and C2 to
137 such values that MAKE_CHAR can make the composite character from
138 them. Otherwise, set CHARSET to CHARSET_COMPOSITION, set C1 to the
139 second byte of the sequence, C2 to -1 so that MAKE_CHAR can make
140 the invalid multibyte character whose string representation is two
141 bytes of STR[0] and STR[1]. In any case, set BYTES to LENGTH.
142
143 This macro should be called only from SPLIT_MULTIBYTE_SEQ. */
144
145 #define SPLIT_COMPOSITE_SEQ(str, length, bytes, charset, c1, c2) \
146 do { \
147 int cmpchar_id = str_cmpchar_id ((str), (length)); \
148 \
149 (charset) = CHARSET_COMPOSITION; \
150 (bytes) = (length); \
151 if (cmpchar_id >= 0) \
152 { \
153 (c1) = CHAR_FIELD2 (cmpchar_id); \
154 (c2) = CHAR_FIELD3 (cmpchar_id); \
155 } \
156 else \
157 { \
158 (c1) = (str)[1] & 0x7F; \
159 (c2) = -1; \
160 } \
161 } while (0)
162
163 /* Parse non-composite multibyte character string STR of length LENGTH
164 (>= 2) and set BYTES to the length of actual multibyte sequence,
165 CHARSET, C1, and C2 to such values that MAKE_CHAR can make the
166 multibyte character from them.
167
168 It is assumed that *STR is one of base leading codes (excluding
169 LEADING_CODE_COMPOSITION) and the following (LENGTH - 1) bytes
170 satisfy !CHAR_HEAD_P.
171 125
172 This macro should be called only from SPLIT_MULTIBYTE_SEQ. */ 126 This macro should be called only from SPLIT_MULTIBYTE_SEQ. */
173 127
174 #define SPLIT_CHARACTER_SEQ(str, length, bytes, charset, c1, c2) \ 128 #define SPLIT_CHARACTER_SEQ(str, length, bytes, charset, c1, c2) \
175 do { \ 129 do { \
189 else \ 143 else \
190 (c1) = (c2) = -1; \ 144 (c1) = (c2) = -1; \
191 } while (0) 145 } while (0)
192 146
193 /* Parse string STR of length LENGTH and check if a multibyte 147 /* Parse string STR of length LENGTH and check if a multibyte
194 characters is at STR. set BYTES to the actual length, CHARSET, C1, 148 characters is at STR. Set BYTES to the actual length, CHARSET, C1,
195 C2 to proper values for that character. */ 149 C2 to proper values for that character. */
196 150
197 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \ 151 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
198 do { \ 152 do { \
199 int i; \ 153 int i; \
201 i = 1; \ 155 i = 1; \
202 else \ 156 else \
203 for (i = 1; i < (length) && ! CHAR_HEAD_P ((str)[i]); i++); \ 157 for (i = 1; i < (length) && ! CHAR_HEAD_P ((str)[i]); i++); \
204 if (i == 1) \ 158 if (i == 1) \
205 (bytes) = 1, (charset) = CHARSET_ASCII, (c1) = (str)[0] ; \ 159 (bytes) = 1, (charset) = CHARSET_ASCII, (c1) = (str)[0] ; \
206 else if ((str)[0] == LEADING_CODE_COMPOSITION) \
207 SPLIT_COMPOSITE_SEQ (str, i, bytes, charset, c1, c2); \
208 else \ 160 else \
209 { \ 161 { \
210 if (i > BYTES_BY_CHAR_HEAD ((str)[0])) \ 162 if (i > BYTES_BY_CHAR_HEAD ((str)[0])) \
211 i = BYTES_BY_CHAR_HEAD ((str)[0]); \ 163 i = BYTES_BY_CHAR_HEAD ((str)[0]); \
212 SPLIT_CHARACTER_SEQ (str, i, bytes, charset, c1, c2); \ 164 SPLIT_CHARACTER_SEQ (str, i, bytes, charset, c1, c2); \
219 ? ((c1) >= 0 && (c1) <= 0x7F) \ 171 ? ((c1) >= 0 && (c1) <= 0x7F) \
220 : (CHARSET_DIMENSION (charset) == 1 \ 172 : (CHARSET_DIMENSION (charset) == 1 \
221 ? ((c1) >= 0x20 && (c1) <= 0x7F) \ 173 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
222 : ((c1) >= 0x20 && (c1) <= 0x7F && (c2) >= 0x20 && (c2) <= 0x7F))) 174 : ((c1) >= 0x20 && (c1) <= 0x7F && (c2) >= 0x20 && (c2) <= 0x7F)))
223 175
224 /* Set STR a pointer to the multi-byte form of the character C. If C 176 /* Store multi-byte form of the character C in STR. The caller should
225 is not a composite character, the multi-byte form is set in WORKBUF 177 allocate at least 4-byte area at STR in advance. Returns the
226 and STR points WORKBUF. The caller should allocate at least 4-byte 178 length of the multi-byte form. If C is an invalid character code,
227 area at WORKBUF in advance. Returns the length of the multi-byte 179 signal an error.
228 form. If C is an invalid character, store (C & 0xFF) in WORKBUF[0] 180
229 and return 1. 181 Use macro `CHAR_STRING (C, STR)' instead of calling this function
230 182 directly if C can be an ASCII character. */
231 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
232 function directly if C can be an ASCII character. */
233 183
234 int 184 int
235 non_ascii_char_to_string (c, workbuf, str) 185 char_to_string (c, str)
236 int c; 186 int c;
237 unsigned char *workbuf, **str; 187 unsigned char *str;
238 { 188 {
189 unsigned char *p = str;
190
239 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */ 191 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
240 { 192 {
241 /* Multibyte character can't have a modifier bit. */ 193 /* Multibyte character can't have a modifier bit. */
242 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) 194 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
243 invalid_character (c); 195 invalid_character (c);
274 226
275 /* If C still has any modifier bits, it is an invalid character. */ 227 /* If C still has any modifier bits, it is an invalid character. */
276 if (c & CHAR_MODIFIER_MASK) 228 if (c & CHAR_MODIFIER_MASK)
277 invalid_character (c); 229 invalid_character (c);
278 230
279 *str = workbuf; 231 *p++ = c;
280 *workbuf++ = c; 232 }
281 } 233 else if (c < MAX_CHAR)
282 else
283 { 234 {
284 int charset, c1, c2; 235 int charset, c1, c2;
285 236
286 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); 237 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
287 if (charset == CHARSET_COMPOSITION) 238
288 { 239 if (charset >= LEADING_CODE_EXT_11)
289 if (c >= MAX_CHAR) 240 *p++ = (charset < LEADING_CODE_EXT_12
290 invalid_character (c); 241 ? LEADING_CODE_PRIVATE_11
291 if (c >= MIN_CHAR_COMPOSITION) 242 : (charset < LEADING_CODE_EXT_21
292 { 243 ? LEADING_CODE_PRIVATE_12
293 /* Valid composite character. */ 244 : (charset < LEADING_CODE_EXT_22
294 *str = cmpchar_table[COMPOSITE_CHAR_ID (c)]->data; 245 ? LEADING_CODE_PRIVATE_21
295 workbuf = *str + cmpchar_table[COMPOSITE_CHAR_ID (c)]->len; 246 : LEADING_CODE_PRIVATE_22)));
296 } 247 *p++ = charset;
297 else 248 if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
298 {
299 /* Invalid but can have multibyte form. */
300 *str = workbuf;
301 *workbuf++ = LEADING_CODE_COMPOSITION;
302 *workbuf++ = c1 | 0x80;
303 }
304 }
305 else if (charset > CHARSET_COMPOSITION)
306 {
307 *str = workbuf;
308 if (charset >= LEADING_CODE_EXT_11)
309 *workbuf++ = (charset < LEADING_CODE_EXT_12
310 ? LEADING_CODE_PRIVATE_11
311 : (charset < LEADING_CODE_EXT_21
312 ? LEADING_CODE_PRIVATE_12
313 : (charset < LEADING_CODE_EXT_22
314 ? LEADING_CODE_PRIVATE_21
315 : LEADING_CODE_PRIVATE_22)));
316 *workbuf++ = charset;
317 if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
318 invalid_character (c);
319 if (c1)
320 {
321 *workbuf++ = c1 | 0x80;
322 if (c2 > 0)
323 *workbuf++ = c2 | 0x80;
324 }
325 }
326 else if (charset == CHARSET_ASCII)
327 *workbuf++= c & 0x7F;
328 else
329 invalid_character (c); 249 invalid_character (c);
330 } 250 if (c1)
331 251 {
332 return (workbuf - *str); 252 *p++ = c1 | 0x80;
253 if (c2 > 0)
254 *p++ = c2 | 0x80;
255 }
256 }
257
258 return (p -str);
333 } 259 }
334 260
335 /* Return the non-ASCII character corresponding to multi-byte form at 261 /* Return the non-ASCII character corresponding to multi-byte form at
336 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte 262 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
337 length of the multibyte form in *ACTUAL_LEN. 263 length of the multibyte form in *ACTUAL_LEN.
338 264
339 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function 265 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
340 directly if you want ot handle ASCII characters as well. */ 266 this function directly if you want ot handle ASCII characters as
267 well. */
341 268
342 int 269 int
343 string_to_non_ascii_char (str, len, actual_len) 270 string_to_char (str, len, actual_len)
344 const unsigned char *str; 271 const unsigned char *str;
345 int len, *actual_len; 272 int len, *actual_len;
346 { 273 {
347 int c, bytes, charset, c1, c2; 274 int c, bytes, charset, c1, c2;
348 275
371 character at STR, and return 0. If there's no multibyte character, 298 character at STR, and return 0. If there's no multibyte character,
372 return -1. This should be used only in the macro SPLIT_STRING 299 return -1. This should be used only in the macro SPLIT_STRING
373 which checks range of STR in advance. */ 300 which checks range of STR in advance. */
374 301
375 int 302 int
376 split_non_ascii_string (str, len, charset, c1, c2) 303 split_string (str, len, charset, c1, c2)
377 const unsigned char *str; 304 const unsigned char *str;
378 unsigned char *c1, *c2; 305 unsigned char *c1, *c2;
379 int len, *charset; 306 int len, *charset;
380 { 307 {
381 register int bytes, cs, code1, code2 = -1; 308 register int bytes, cs, code1, code2 = -1;
397 { 324 {
398 int charset, c1, c2, chars; 325 int charset, c1, c2, chars;
399 326
400 if (SINGLE_BYTE_CHAR_P (c)) 327 if (SINGLE_BYTE_CHAR_P (c))
401 return 1; 328 return 1;
402 if (c >= MIN_CHAR_COMPOSITION) 329 if (c >= MAX_CHAR)
403 return (c < MAX_CHAR); 330 return 0;
404 331
405 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); 332 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
406 if (! CHARSET_DEFINED_P (charset)) 333 if (! CHARSET_DEFINED_P (charset))
407 return 0; 334 return 0;
408 if (CHARSET_CHARS (charset) == 94 335 if (CHARSET_CHARS (charset) == 94
833 /* Return number of different charsets in STR of length LEN. In 760 /* Return number of different charsets in STR of length LEN. In
834 addition, for each found charset N, CHARSETS[N] is set 1. The 761 addition, for each found charset N, CHARSETS[N] is set 1. The
835 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance. 762 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
836 It may lookup a translation table TABLE if supplied. 763 It may lookup a translation table TABLE if supplied.
837 764
838 If CMPCHARP is nonzero and some composite character is found,
839 CHARSETS[128] is also set 1 and the returned number is incremented
840 by 1.
841
842 If MULTIBYTE is zero, do not check multibyte characters, i.e. if 765 If MULTIBYTE is zero, do not check multibyte characters, i.e. if
843 any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any 766 any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any
844 8-bit codes are found CHARSET[1] is set to 1. */ 767 8-bit codes are found CHARSET[1] is set to 1. */
845 768
846 int 769 int
847 find_charset_in_str (str, len, charsets, table, cmpcharp, multibyte) 770 find_charset_in_str (str, len, charsets, table, multibyte)
848 unsigned char *str; 771 unsigned char *str;
849 int len, *charsets; 772 int len, *charsets;
850 Lisp_Object table; 773 Lisp_Object table;
851 int cmpcharp;
852 int multibyte; 774 int multibyte;
853 { 775 {
854 register int num = 0, c; 776 register int num = 0, c;
855 777
856 if (! multibyte) 778 if (! multibyte)
876 if (! CHAR_TABLE_P (table)) 798 if (! CHAR_TABLE_P (table))
877 table = Qnil; 799 table = Qnil;
878 800
879 while (len > 0) 801 while (len > 0)
880 { 802 {
881 int bytes, charset; 803 int bytes, charset, c1, c2;
882 c = *str; 804
883 805 SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
884 if (c == LEADING_CODE_COMPOSITION) 806
885 { 807 if (! NILP (table))
886 int cmpchar_id = str_cmpchar_id (str, len); 808 {
887 GLYPH *glyph; 809 int c1 = translate_char (table, -1, charset, c1, c2);
888 810 if (c1 >= 0)
889 if (cmpchar_id >= 0) 811 charset = CHAR_CHARSET (c);
890 {
891 struct cmpchar_info *cmp_p = cmpchar_table[cmpchar_id];
892 int i;
893
894 for (i = 0; i < cmp_p->glyph_len; i++)
895 {
896 c = cmp_p->glyph[i];
897 if (!NILP (table))
898 {
899 if ((c = translate_char (table, c, 0, 0, 0)) < 0)
900 c = cmp_p->glyph[i];
901 }
902 if ((charset = CHAR_CHARSET (c)) < 0)
903 charset = CHARSET_ASCII;
904 if (!charsets[charset])
905 {
906 charsets[charset] = 1;
907 num += 1;
908 }
909 }
910 str += cmp_p->len;
911 len -= cmp_p->len;
912 if (cmpcharp && !charsets[CHARSET_COMPOSITION])
913 {
914 charsets[CHARSET_COMPOSITION] = 1;
915 num += 1;
916 }
917 continue;
918 }
919
920 charset = 1; /* This leads to `unknown' charset. */
921 bytes = 1;
922 }
923 else
924 {
925 c = STRING_CHAR_AND_LENGTH (str, len, bytes);
926 if (! NILP (table))
927 {
928 int c1 = translate_char (table, c, 0, 0, 0);
929 if (c1 >= 0)
930 c = c1;
931 }
932 charset = CHAR_CHARSET (c);
933 } 812 }
934 813
935 if (!charsets[charset]) 814 if (!charsets[charset])
936 { 815 {
937 charsets[charset] = 1; 816 charsets[charset] = 1;
945 824
946 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, 825 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
947 2, 3, 0, 826 2, 3, 0,
948 "Return a list of charsets in the region between BEG and END.\n\ 827 "Return a list of charsets in the region between BEG and END.\n\
949 BEG and END are buffer positions.\n\ 828 BEG and END are buffer positions.\n\
950 If the region contains any composite character,\n\
951 `composition' is included in the returned list.\n\
952 Optional arg TABLE if non-nil is a translation table to look up.\n\ 829 Optional arg TABLE if non-nil is a translation table to look up.\n\
953 \n\ 830 \n\
954 If the region contains invalid multiybte characters,\n\ 831 If the region contains invalid multiybte characters,\n\
955 `unknown' is included in the returned list.\n\ 832 `unknown' is included in the returned list.\n\
956 \n\ 833 \n\
982 859
983 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); 860 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
984 while (1) 861 while (1)
985 { 862 {
986 find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte, 863 find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte,
987 charsets, table, 1, multibyte); 864 charsets, table, multibyte);
988 if (stop < to) 865 if (stop < to)
989 { 866 {
990 from = stop, from_byte = stop_byte; 867 from = stop, from_byte = stop_byte;
991 stop = to, stop_byte = CHAR_TO_BYTE (stop); 868 stop = to, stop_byte = CHAR_TO_BYTE (stop);
992 } 869 }
997 val = Qnil; 874 val = Qnil;
998 undefined = 0; 875 undefined = 0;
999 for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--) 876 for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--)
1000 if (charsets[i]) 877 if (charsets[i])
1001 { 878 {
1002 if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION) 879 if (CHARSET_DEFINED_P (i))
1003 val = Fcons (CHARSET_SYMBOL (i), val); 880 val = Fcons (CHARSET_SYMBOL (i), val);
1004 else 881 else
1005 undefined = 1; 882 undefined = 1;
1006 } 883 }
1007 if (undefined) 884 if (undefined)
1010 } 887 }
1011 888
1012 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string, 889 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1013 1, 2, 0, 890 1, 2, 0,
1014 "Return a list of charsets in STR.\n\ 891 "Return a list of charsets in STR.\n\
1015 If the string contains any composite characters,\n\
1016 `composition' is included in the returned list.\n\
1017 Optional arg TABLE if non-nil is a translation table to look up.\n\ 892 Optional arg TABLE if non-nil is a translation table to look up.\n\
1018 \n\ 893 \n\
1019 If the region contains invalid multiybte characters,\n\ 894 If the region contains invalid multiybte characters,\n\
1020 `unknown' is included in the returned list.\n\ 895 `unknown' is included in the returned list.\n\
1021 \n\ 896 \n\
1034 CHECK_STRING (str, 0); 909 CHECK_STRING (str, 0);
1035 multibyte = STRING_MULTIBYTE (str); 910 multibyte = STRING_MULTIBYTE (str);
1036 911
1037 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); 912 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
1038 find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)), 913 find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)),
1039 charsets, table, 1, multibyte); 914 charsets, table, multibyte);
1040 val = Qnil; 915 val = Qnil;
1041 undefined = 0; 916 undefined = 0;
1042 for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--) 917 for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--)
1043 if (charsets[i]) 918 if (charsets[i])
1044 { 919 {
1045 if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION) 920 if (CHARSET_DEFINED_P (i))
1046 val = Fcons (CHARSET_SYMBOL (i), val); 921 val = Fcons (CHARSET_SYMBOL (i), val);
1047 else 922 else
1048 undefined = 1; 923 undefined = 1;
1049 } 924 }
1050 if (undefined) 925 if (undefined)
1197 if (c < 0) 1072 if (c < 0)
1198 return 0; 1073 return 0;
1199 if (SINGLE_BYTE_CHAR_P (c)) 1074 if (SINGLE_BYTE_CHAR_P (c))
1200 return 1; 1075 return 1;
1201 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); 1076 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
1202 if (charset == CHARSET_COMPOSITION)
1203 return ((c >= MIN_CHAR_COMPOSITION
1204 && c < MIN_CHAR_COMPOSITION + n_cmpchars)
1205 || (genericp && c == GENERIC_COMPOSITION_CHAR));
1206 if (genericp) 1077 if (genericp)
1207 { 1078 {
1208 if (c1) 1079 if (c1)
1209 { 1080 {
1210 if (c2 <= 0) c2 = 0x20; 1081 if (c2 <= 0) c2 = 0x20;
1286 Don't call this function directly, instead use macro CHAR_BYTES. */ 1157 Don't call this function directly, instead use macro CHAR_BYTES. */
1287 int 1158 int
1288 char_bytes (c) 1159 char_bytes (c)
1289 int c; 1160 int c;
1290 { 1161 {
1291 int bytes; 1162 int charset;
1292 1163
1293 if (SINGLE_BYTE_CHAR_P (c) || (c & ~GLYPH_MASK_CHAR)) 1164 if (SINGLE_BYTE_CHAR_P (c) || (c & ~GLYPH_MASK_CHAR))
1294 return 1; 1165 return 1;
1295 1166
1296 if (COMPOSITE_CHAR_P (c)) 1167 charset = CHAR_CHARSET (c);
1297 { 1168 return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1298 unsigned int id = COMPOSITE_CHAR_ID (c);
1299
1300 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
1301 }
1302 else
1303 {
1304 int charset = CHAR_CHARSET (c);
1305
1306 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
1307 }
1308
1309 return bytes;
1310 } 1169 }
1311 1170
1312 /* Return the width of character of which multi-byte form starts with 1171 /* Return the width of character of which multi-byte form starts with
1313 C. The width is measured by how many columns occupied on the 1172 C. The width is measured by how many columns occupied on the
1314 screen when displayed in the current buffer. */ 1173 screen when displayed in the current buffer. */
1346 1205
1347 if (VECTORP (disp)) 1206 if (VECTORP (disp))
1348 XSETINT (val, XVECTOR (disp)->size); 1207 XSETINT (val, XVECTOR (disp)->size);
1349 else if (SINGLE_BYTE_CHAR_P (c)) 1208 else if (SINGLE_BYTE_CHAR_P (c))
1350 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c)); 1209 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1351 else if (COMPOSITE_CHAR_P (c))
1352 {
1353 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
1354 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 1));
1355 }
1356 else 1210 else
1357 { 1211 {
1358 int charset = CHAR_CHARSET (c); 1212 int charset = CHAR_CHARSET (c);
1359 1213
1360 XSETFASTINT (val, CHARSET_WIDTH (charset)); 1214 XSETFASTINT (val, CHARSET_WIDTH (charset));
1375 int width = 0; 1229 int width = 0;
1376 struct Lisp_Char_Table *dp = buffer_display_table (); 1230 struct Lisp_Char_Table *dp = buffer_display_table ();
1377 1231
1378 while (str < endp) 1232 while (str < endp)
1379 { 1233 {
1380 if (*str == LEADING_CODE_COMPOSITION) 1234 Lisp_Object disp;
1381 { 1235 int thislen;
1382 int id = str_cmpchar_id (str, endp - str); 1236 int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen);
1383 1237
1384 if (id < 0) 1238 /* Get the way the display table would display it. */
1385 { 1239 if (dp)
1386 width += 4; 1240 disp = DISP_CHAR_VECTOR (dp, c);
1387 str++;
1388 }
1389 else
1390 {
1391 width += cmpchar_table[id]->width;
1392 str += cmpchar_table[id]->len;
1393 }
1394 }
1395 else 1241 else
1396 { 1242 disp = Qnil;
1397 Lisp_Object disp; 1243
1398 int thislen; 1244 if (VECTORP (disp))
1399 int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen); 1245 width += XVECTOR (disp)->size;
1400 1246 else
1401 /* Get the way the display table would display it. */ 1247 width += ONE_BYTE_CHAR_WIDTH (*str);
1402 if (dp) 1248
1403 disp = DISP_CHAR_VECTOR (dp, c); 1249 str += thislen;
1404 else
1405 disp = Qnil;
1406
1407 if (VECTORP (disp))
1408 width += XVECTOR (disp)->size;
1409 else
1410 width += ONE_BYTE_CHAR_WIDTH (*str);
1411
1412 str += thislen;
1413 }
1414 } 1250 }
1415 return width; 1251 return width;
1416 } 1252 }
1417 1253
1418 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0, 1254 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1520 (n, args) 1356 (n, args)
1521 int n; 1357 int n;
1522 Lisp_Object *args; 1358 Lisp_Object *args;
1523 { 1359 {
1524 int i; 1360 int i;
1525 unsigned char *buf 1361 unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
1526 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
1527 unsigned char *p = buf; 1362 unsigned char *p = buf;
1528 Lisp_Object val; 1363 Lisp_Object val;
1364 int c;
1529 1365
1530 for (i = 0; i < n; i++) 1366 for (i = 0; i < n; i++)
1531 { 1367 {
1532 int c, len;
1533 unsigned char *str;
1534
1535 if (!INTEGERP (args[i])) 1368 if (!INTEGERP (args[i]))
1536 CHECK_NUMBER (args[i], 0); 1369 CHECK_NUMBER (args[i], 0);
1537 c = XINT (args[i]); 1370 c = XINT (args[i]);
1538 len = CHAR_STRING (c, p, str); 1371 p += CHAR_STRING (c, p);
1539 if (p != str)
1540 /* C is a composite character. */
1541 bcopy (str, p, len);
1542 p += len;
1543 } 1372 }
1544 1373
1545 /* Here, we can't use make_string_from_bytes because of byte 1374 /* Here, we can't use make_string_from_bytes because of byte
1546 combining problem. */ 1375 combining problem. */
1547 val = make_string (buf, p - buf); 1376 val = make_string (buf, p - buf);
1548 return val; 1377 return val;
1549 } 1378 }
1550 1379
1551 #endif /* emacs */ 1380 #endif /* emacs */
1552
1553 /*** Composite characters staffs ***/
1554
1555 /* Each composite character is identified by CMPCHAR-ID which is
1556 assigned when Emacs needs the character code of the composite
1557 character (e.g. when displaying it on the screen). See the
1558 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1559 composite character is represented in Emacs. */
1560
1561 /* If `static' is defined, it means that it is defined to null string. */
1562 #ifndef static
1563 /* The following function is copied from lread.c. */
1564 static int
1565 hash_string (ptr, len)
1566 unsigned char *ptr;
1567 int len;
1568 {
1569 register unsigned char *p = ptr;
1570 register unsigned char *end = p + len;
1571 register unsigned char c;
1572 register int hash = 0;
1573
1574 while (p != end)
1575 {
1576 c = *p++;
1577 if (c >= 0140) c -= 40;
1578 hash = ((hash<<3) + (hash>>28) + c);
1579 }
1580 return hash & 07777777777;
1581 }
1582 #endif
1583
1584 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1585
1586 static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
1587
1588 /* Each element of `cmpchar_hash_table' is a pointer to an array of
1589 integer, where the 1st element is the size of the array, the 2nd
1590 element is how many elements are actually used in the array, and
1591 the remaining elements are CMPCHAR-IDs of composite characters of
1592 the same hash value. */
1593 #define CMPCHAR_HASH_SIZE(table) table[0]
1594 #define CMPCHAR_HASH_USED(table) table[1]
1595 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1596
1597 /* Return CMPCHAR-ID of the composite character in STR of the length
1598 LEN. If the composite character has not yet been registered,
1599 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1600 is the sole function for assigning CMPCHAR-ID. */
1601 int
1602 str_cmpchar_id (str, len)
1603 const unsigned char *str;
1604 int len;
1605 {
1606 int hash_idx, *hashp;
1607 unsigned char *buf;
1608 int embedded_rule; /* 1 if composition rule is embedded. */
1609 int chars; /* number of components. */
1610 int i;
1611 struct cmpchar_info *cmpcharp;
1612
1613 /* The second byte 0xFF means COMPOSITION rule is embedded. */
1614 embedded_rule = (str[1] == 0xFF);
1615
1616 /* At first, get the actual length of the composite character. */
1617 {
1618 const unsigned char *p, *endp = str + 1, *lastp = str + len;
1619 int bytes;
1620
1621 while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++;
1622 if (endp - str < 5)
1623 /* Any composite char have at least 5-byte length. */
1624 return -1;
1625
1626 chars = 0;
1627 p = str + 1;
1628 while (p < endp)
1629 {
1630 if (embedded_rule)
1631 {
1632 p++;
1633 if (p >= endp)
1634 return -1;
1635 }
1636 /* No need of checking if *P is 0xA0 because
1637 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1638 p += BYTES_BY_CHAR_HEAD (*p - 0x20);
1639 chars++;
1640 }
1641 if (p > endp || chars < 2 || chars > MAX_COMPONENT_COUNT)
1642 /* Invalid components. */
1643 return -1;
1644 len = p - str;
1645 }
1646 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1647 hashp = cmpchar_hash_table[hash_idx];
1648
1649 /* Then, look into the hash table. */
1650 if (hashp != NULL)
1651 /* Find the correct one among composite characters of the same
1652 hash value. */
1653 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1654 {
1655 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1656 if (len == cmpcharp->len
1657 && ! bcmp (str, cmpcharp->data, len))
1658 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1659 }
1660
1661 /* We have to register the composite character in cmpchar_table. */
1662 if (n_cmpchars >= (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1663 /* No, we have no more room for a new composite character. */
1664 return -1;
1665
1666 /* Make the entry in hash table. */
1667 if (hashp == NULL)
1668 {
1669 /* Make a table for 8 composite characters initially. */
1670 hashp = (cmpchar_hash_table[hash_idx]
1671 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1672 CMPCHAR_HASH_SIZE (hashp) = 10;
1673 CMPCHAR_HASH_USED (hashp) = 2;
1674 }
1675 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1676 {
1677 CMPCHAR_HASH_SIZE (hashp) += 8;
1678 hashp = (cmpchar_hash_table[hash_idx]
1679 = (int *) xrealloc (hashp,
1680 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1681 }
1682 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1683 CMPCHAR_HASH_USED (hashp)++;
1684
1685 /* Set information of the composite character in cmpchar_table. */
1686 if (cmpchar_table_size == 0)
1687 {
1688 /* This is the first composite character to be registered. */
1689 cmpchar_table_size = 256;
1690 cmpchar_table
1691 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1692 * cmpchar_table_size);
1693 }
1694 else if (cmpchar_table_size <= n_cmpchars)
1695 {
1696 cmpchar_table_size += 256;
1697 cmpchar_table
1698 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1699 sizeof (cmpchar_table[0])
1700 * cmpchar_table_size);
1701 }
1702
1703 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1704
1705 cmpcharp->len = len;
1706 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1707 bcopy (str, cmpcharp->data, len);
1708 cmpcharp->data[len] = 0;
1709 cmpcharp->glyph_len = chars;
1710 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1711 if (embedded_rule)
1712 {
1713 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1714 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1715 }
1716 else
1717 {
1718 cmpcharp->cmp_rule = NULL;
1719 cmpcharp->col_offset = NULL;
1720 }
1721
1722 /* Setup GLYPH data and composition rules (if any) so as not to make
1723 them every time on displaying. */
1724 {
1725 unsigned char *bufp;
1726 int width;
1727 float leftmost = 0.0, rightmost = 1.0;
1728
1729 if (embedded_rule)
1730 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1731 cmpcharp->col_offset[0] = 0;
1732
1733 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1734 {
1735 if (embedded_rule)
1736 cmpcharp->cmp_rule[i] = *bufp++;
1737
1738 if (*bufp == 0xA0) /* This is an ASCII character. */
1739 {
1740 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1741 width = 1;
1742 bufp++;
1743 }
1744 else /* Multibyte character. */
1745 {
1746 /* Make `bufp' point normal multi-byte form temporally. */
1747 *bufp -= 0x20;
1748 cmpcharp->glyph[i]
1749 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1750 width = WIDTH_BY_CHAR_HEAD (*bufp);
1751 *bufp += 0x20;
1752 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1753 }
1754
1755 if (embedded_rule && i > 0)
1756 {
1757 /* Reference points (global_ref and new_ref) are
1758 encoded as below:
1759
1760 0--1--2 -- ascent
1761 | |
1762 | |
1763 | 4 -+--- center
1764 -- 3 5 -- baseline
1765 | |
1766 6--7--8 -- descent
1767
1768 Now, we calculate the column offset of the new glyph
1769 from the left edge of the first glyph. This can avoid
1770 the same calculation everytime displaying this
1771 composite character. */
1772
1773 /* Reference points of global glyph and new glyph. */
1774 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1775 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1776 /* Column offset relative to the first glyph. */
1777 float left = (leftmost
1778 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1779 - (new_ref % 3) * width / 2.0);
1780
1781 cmpcharp->col_offset[i] = left;
1782 if (left < leftmost)
1783 leftmost = left;
1784 if (left + width > rightmost)
1785 rightmost = left + width;
1786 }
1787 else
1788 {
1789 if (width > rightmost)
1790 rightmost = width;
1791 }
1792 }
1793 if (embedded_rule)
1794 {
1795 /* Now col_offset[N] are relative to the left edge of the
1796 first component. Make them relative to the left edge of
1797 overall glyph. */
1798 for (i = 0; i < chars; i++)
1799 cmpcharp->col_offset[i] -= leftmost;
1800 /* Make rightmost holds width of overall glyph. */
1801 rightmost -= leftmost;
1802 }
1803
1804 cmpcharp->width = rightmost;
1805 if (cmpcharp->width < rightmost)
1806 /* To get a ceiling integer value. */
1807 cmpcharp->width++;
1808 }
1809
1810 cmpchar_table[n_cmpchars] = cmpcharp;
1811
1812 return n_cmpchars++;
1813 }
1814
1815 /* Return the Nth element of the composite character C. If NOERROR is
1816 nonzero, return 0 on error condition (C is an invalid composite
1817 charcter, or N is out of range). */
1818 int
1819 cmpchar_component (c, n, noerror)
1820 int c, n, noerror;
1821 {
1822 int id = COMPOSITE_CHAR_ID (c);
1823
1824 if (id < 0 || id >= n_cmpchars)
1825 {
1826 /* C is not a valid composite character. */
1827 if (noerror) return 0;
1828 error ("Invalid composite character: %d", c) ;
1829 }
1830 if (n >= cmpchar_table[id]->glyph_len)
1831 {
1832 /* No such component. */
1833 if (noerror) return 0;
1834 args_out_of_range (make_number (c), make_number (n));
1835 }
1836 /* No face data is stored in glyph code. */
1837 return ((int) (cmpchar_table[id]->glyph[n]));
1838 }
1839
1840 DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1841 "T if CHAR is a composite character.")
1842 (ch)
1843 Lisp_Object ch;
1844 {
1845 CHECK_NUMBER (ch, 0);
1846 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1847 }
1848
1849 DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1850 2, 2, 0,
1851 "Return the Nth component character of composite character CHARACTER.")
1852 (character, n)
1853 Lisp_Object character, n;
1854 {
1855 int id;
1856
1857 CHECK_NUMBER (character, 0);
1858 CHECK_NUMBER (n, 1);
1859
1860 return (make_number (cmpchar_component (XINT (character), XINT (n), 0)));
1861 }
1862
1863 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1864 2, 2, 0,
1865 "Return the Nth composition rule of composite character CHARACTER.\n\
1866 The returned rule is for composing the Nth component\n\
1867 on the (N-1)th component.\n\
1868 If CHARACTER should be composed relatively or N is 0, return 255.")
1869 (character, n)
1870 Lisp_Object character, n;
1871 {
1872 int id;
1873
1874 CHECK_NUMBER (character, 0);
1875 CHECK_NUMBER (n, 1);
1876
1877 id = COMPOSITE_CHAR_ID (XINT (character));
1878 if (id < 0 || id >= n_cmpchars)
1879 error ("Invalid composite character: %d", XINT (character));
1880 if (XINT (n) < 0 || XINT (n) >= cmpchar_table[id]->glyph_len)
1881 args_out_of_range (character, n);
1882
1883 return make_number (cmpchar_table[id]->cmp_rule
1884 ? cmpchar_table[id]->cmp_rule[XINT (n)]
1885 : 255);
1886 }
1887
1888 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1889 Scmpchar_cmp_rule_p, 1, 1, 0,
1890 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1891 (character)
1892 Lisp_Object character;
1893 {
1894 int id;
1895
1896 CHECK_NUMBER (character, 0);
1897 id = COMPOSITE_CHAR_ID (XINT (character));
1898 if (id < 0 || id >= n_cmpchars)
1899 error ("Invalid composite character: %d", XINT (character));
1900
1901 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1902 }
1903
1904 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1905 Scmpchar_cmp_count, 1, 1, 0,
1906 "Return number of compoents of composite character CHARACTER.")
1907 (character)
1908 Lisp_Object character;
1909 {
1910 int id;
1911
1912 CHECK_NUMBER (character, 0);
1913 id = COMPOSITE_CHAR_ID (XINT (character));
1914 if (id < 0 || id >= n_cmpchars)
1915 error ("Invalid composite character: %d", XINT (character));
1916
1917 return (make_number (cmpchar_table[id]->glyph_len));
1918 }
1919
1920 DEFUN ("compose-string", Fcompose_string, Scompose_string,
1921 1, 1, 0,
1922 "Return one char string composed from all characters in STRING.")
1923 (str)
1924 Lisp_Object str;
1925 {
1926 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1927 int len, i;
1928
1929 CHECK_STRING (str, 0);
1930
1931 buf[0] = LEADING_CODE_COMPOSITION;
1932 p = XSTRING (str)->data;
1933 pend = p + STRING_BYTES (XSTRING (str));
1934 i = 1;
1935 while (p < pend)
1936 {
1937 if (*p < 0x20) /* control code */
1938 error ("Invalid component character: %d", *p);
1939 else if (*p < 0x80) /* ASCII */
1940 {
1941 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1942 error ("Too long string to be composed: %s", XSTRING (str)->data);
1943 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1944 code itself. */
1945 buf[i++] = 0xA0;
1946 buf[i++] = *p++ + 0x80;
1947 }
1948 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1949 {
1950 /* Already composed. Eliminate the heading
1951 LEADING_CODE_COMPOSITION, keep the remaining bytes
1952 unchanged. */
1953 p++;
1954 if (*p == 255)
1955 error ("Can't compose a rule-based composition character");
1956 ptemp = p;
1957 while (! CHAR_HEAD_P (*p)) p++;
1958 if (str_cmpchar_id (ptemp - 1, p - ptemp + 1) < 0)
1959 error ("Can't compose an invalid composition character");
1960 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1961 error ("Too long string to be composed: %s", XSTRING (str)->data);
1962 bcopy (ptemp, buf + i, p - ptemp);
1963 i += p - ptemp;
1964 }
1965 else /* multibyte char */
1966 {
1967 /* Add 0x20 to the base leading-code, keep the remaining
1968 bytes unchanged. */
1969 int c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
1970
1971 if (len <= 1 || ! CHAR_VALID_P (c, 0))
1972 error ("Can't compose an invalid character");
1973 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1974 error ("Too long string to be composed: %s", XSTRING (str)->data);
1975 bcopy (p, buf + i, len);
1976 buf[i] += 0x20;
1977 p += len, i += len;
1978 }
1979 }
1980
1981 if (i < 5)
1982 /* STR contains only one character, which can't be composed. */
1983 error ("Too short string to be composed: %s", XSTRING (str)->data);
1984
1985 return make_string_from_bytes (buf, 1, i);
1986 }
1987
1988 1381
1989 int 1382 int
1990 charset_id_internal (charset_name) 1383 charset_id_internal (charset_name)
1991 char *charset_name; 1384 char *charset_name;
1992 { 1385 {
2044 /* Setup tables. */ 1437 /* Setup tables. */
2045 for (i = 0; i < 2; i++) 1438 for (i = 0; i < 2; i++)
2046 for (j = 0; j < 2; j++) 1439 for (j = 0; j < 2; j++)
2047 for (k = 0; k < 128; k++) 1440 for (k = 0; k < 128; k++)
2048 iso_charset_table [i][j][k] = -1; 1441 iso_charset_table [i][j][k] = -1;
2049
2050 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
2051 cmpchar_table_size = n_cmpchars = 0;
2052 1442
2053 for (i = 0; i < 256; i++) 1443 for (i = 0; i < 256; i++)
2054 BYTES_BY_CHAR_HEAD (i) = 1; 1444 BYTES_BY_CHAR_HEAD (i) = 1;
2055 for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; 1445 for (i = MIN_CHARSET_OFFICIAL_DIMENSION1;
2056 i <= MAX_CHARSET_OFFICIAL_DIMENSION1; i++) 1446 i <= MAX_CHARSET_OFFICIAL_DIMENSION1; i++)
2062 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3; 1452 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
2063 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4; 1453 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
2064 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4; 1454 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
2065 /* The followings don't reflect the actual bytes, but just to tell 1455 /* The followings don't reflect the actual bytes, but just to tell
2066 that it is a start of a multibyte character. */ 1456 that it is a start of a multibyte character. */
2067 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2; 1457 BYTES_BY_CHAR_HEAD (0x80) = 2;
2068 BYTES_BY_CHAR_HEAD (0x9E) = 2; 1458 BYTES_BY_CHAR_HEAD (0x9E) = 2;
2069 BYTES_BY_CHAR_HEAD (0x9F) = 2; 1459 BYTES_BY_CHAR_HEAD (0x9F) = 2;
2070 1460
2071 for (i = 0; i < 128; i++) 1461 for (i = 0; i < 128; i++)
2072 WIDTH_BY_CHAR_HEAD (i) = 1; 1462 WIDTH_BY_CHAR_HEAD (i) = 1;
2087 val = Fcons (make_number ((i - 0x8F) << 14), val); 1477 val = Fcons (make_number ((i - 0x8F) << 14), val);
2088 for (i = 0xA0; i < 0xF0; i++) 1478 for (i = 0xA0; i < 0xF0; i++)
2089 val = Fcons (make_number ((i - 0x70) << 7), val); 1479 val = Fcons (make_number ((i - 0x70) << 7), val);
2090 for (; i < 0xFF; i++) 1480 for (; i < 0xFF; i++)
2091 val = Fcons (make_number ((i - 0xE0) << 14), val); 1481 val = Fcons (make_number ((i - 0xE0) << 14), val);
2092 val = Fcons (make_number (GENERIC_COMPOSITION_CHAR), val);
2093 Vgeneric_character_list = Fnreverse (val); 1482 Vgeneric_character_list = Fnreverse (val);
2094 } 1483 }
2095 1484
2096 nonascii_insert_offset = 0; 1485 nonascii_insert_offset = 0;
2097 Vnonascii_translation_table = Qnil; 1486 Vnonascii_translation_table = Qnil;
2118 build_string ("ASCII"), 1507 build_string ("ASCII"),
2119 build_string ("ASCII"), 1508 build_string ("ASCII"),
2120 build_string ("ASCII (ISO646 IRV)")); 1509 build_string ("ASCII (ISO646 IRV)"));
2121 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii; 1510 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
2122 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII)); 1511 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
2123
2124 Qcomposition = intern ("composition");
2125 staticpro (&Qcomposition);
2126 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
2127 1512
2128 Qauto_fill_chars = intern ("auto-fill-chars"); 1513 Qauto_fill_chars = intern ("auto-fill-chars");
2129 staticpro (&Qauto_fill_chars); 1514 staticpro (&Qauto_fill_chars);
2130 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0)); 1515 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
2131 1516
2147 defsubr (&Schar_width); 1532 defsubr (&Schar_width);
2148 defsubr (&Sstring_width); 1533 defsubr (&Sstring_width);
2149 defsubr (&Schar_direction); 1534 defsubr (&Schar_direction);
2150 defsubr (&Schars_in_region); 1535 defsubr (&Schars_in_region);
2151 defsubr (&Sstring); 1536 defsubr (&Sstring);
2152 defsubr (&Scmpcharp);
2153 defsubr (&Scmpchar_component);
2154 defsubr (&Scmpchar_cmp_rule);
2155 defsubr (&Scmpchar_cmp_rule_p);
2156 defsubr (&Scmpchar_cmp_count);
2157 defsubr (&Scompose_string);
2158 defsubr (&Ssetup_special_charsets); 1537 defsubr (&Ssetup_special_charsets);
2159 1538
2160 DEFVAR_LISP ("charset-list", &Vcharset_list, 1539 DEFVAR_LISP ("charset-list", &Vcharset_list,
2161 "List of charsets ever defined."); 1540 "List of charsets ever defined.");
2162 Vcharset_list = Fcons (Qascii, Qnil); 1541 Vcharset_list = Fcons (Qascii, Qnil);
2163 1542
2164 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector, 1543 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
2165 "Vector of cons cell of a symbol and translation table ever defined.\n\ 1544 "Vector of cons cell of a symbol and translation table ever defined.\n\
2166 An ID of a translation table is an index of this vector."); 1545 An ID of a translation table is an index of this vector.");
2167 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil); 1546 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
2168
2169 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
2170 "Leading-code of composite characters.");
2171 leading_code_composition = LEADING_CODE_COMPOSITION;
2172 1547
2173 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11, 1548 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
2174 "Leading-code of private TYPE9N charset of column-width 1."); 1549 "Leading-code of private TYPE9N charset of column-width 1.");
2175 leading_code_private_11 = LEADING_CODE_PRIVATE_11; 1550 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
2176 1551
2206 to the corresponding Emacs character code.\n\n\ 1581 to the corresponding Emacs character code.\n\n\
2207 If this is nil, `nonascii-insert-offset' is used instead.\n\ 1582 If this is nil, `nonascii-insert-offset' is used instead.\n\
2208 See also the docstring of `make-translation-table'."); 1583 See also the docstring of `make-translation-table'.");
2209 Vnonascii_translation_table = Qnil; 1584 Vnonascii_translation_table = Qnil;
2210 1585
2211 DEFVAR_INT ("min-composite-char", &min_composite_char,
2212 "Minimum character code of a composite character.");
2213 min_composite_char = MIN_CHAR_COMPOSITION;
2214
2215 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars, 1586 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
2216 "A char-table for characters which invoke auto-filling.\n\ 1587 "A char-table for characters which invoke auto-filling.\n\
2217 Such characters has value t in this table."); 1588 Such characters has value t in this table.");
2218 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil); 1589 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
2219 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt); 1590 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);