Mercurial > emacs
comparison src/charset.c @ 20933:5ceea9d50194
(invalid_character): New function.
(chars_in_text): Make the behaviour consistent with INC_POS.
(multibyte_chars_in_text): Likewise.
(str_cmpchar_id): Likewise.
(non_ascii_char_to_string): Call invalid_character.
(Fchar_direction): Likewise.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 20 Feb 1998 01:40:47 +0000 |
parents | c7cfd531cf2b |
children | f3128abaf8aa |
comparison
equal
deleted
inserted
replaced
20932:3c2c8431c51d | 20933:5ceea9d50194 |
---|---|
106 Lisp_Object Vnonascii_translate_table; | 106 Lisp_Object Vnonascii_translate_table; |
107 | 107 |
108 #define min(X, Y) ((X) < (Y) ? (X) : (Y)) | 108 #define min(X, Y) ((X) < (Y) ? (X) : (Y)) |
109 #define max(X, Y) ((X) > (Y) ? (X) : (Y)) | 109 #define max(X, Y) ((X) > (Y) ? (X) : (Y)) |
110 | 110 |
111 void | |
112 invalid_character (c) | |
113 int c; | |
114 { | |
115 error ("Invalid character: %o, %d, 0x%x", c); | |
116 } | |
117 | |
118 | |
111 /* Set STR a pointer to the multi-byte form of the character C. If C | 119 /* Set STR a pointer to the multi-byte form of the character C. If C |
112 is not a composite character, the multi-byte form is set in WORKBUF | 120 is not a composite character, the multi-byte form is set in WORKBUF |
113 and STR points WORKBUF. The caller should allocate at least 4-byte | 121 and STR points WORKBUF. The caller should allocate at least 4-byte |
114 area at WORKBUF in advance. Returns the length of the multi-byte | 122 area at WORKBUF in advance. Returns the length of the multi-byte |
115 form. If C is an invalid character to have a multi-byte form, | 123 form. If C is an invalid character to have a multi-byte form, |
134 *str = cmpchar_table[cmpchar_id]->data; | 142 *str = cmpchar_table[cmpchar_id]->data; |
135 return cmpchar_table[cmpchar_id]->len; | 143 return cmpchar_table[cmpchar_id]->len; |
136 } | 144 } |
137 else | 145 else |
138 { | 146 { |
139 error ("Invalid character: %d", c); | 147 invalid_character (c); |
140 } | 148 } |
141 } | 149 } |
142 | 150 |
143 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); | 151 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); |
144 if (!charset | 152 if (!charset |
145 || ! CHARSET_DEFINED_P (charset) | 153 || ! CHARSET_DEFINED_P (charset) |
146 || c1 >= 0 && c1 < 32 | 154 || c1 >= 0 && c1 < 32 |
147 || c2 >= 0 && c2 < 32) | 155 || c2 >= 0 && c2 < 32) |
148 error ("Invalid character: %d", c); | 156 invalid_character (c); |
149 | 157 |
150 *str = workbuf; | 158 *str = workbuf; |
151 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset); | 159 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset); |
152 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset)) | 160 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset)) |
153 workbuf++; | 161 workbuf++; |
1015 int charset; | 1023 int charset; |
1016 | 1024 |
1017 CHECK_NUMBER (ch, 0); | 1025 CHECK_NUMBER (ch, 0); |
1018 charset = CHAR_CHARSET (XFASTINT (ch)); | 1026 charset = CHAR_CHARSET (XFASTINT (ch)); |
1019 if (!CHARSET_DEFINED_P (charset)) | 1027 if (!CHARSET_DEFINED_P (charset)) |
1020 error ("Invalid character: %d", XINT (ch)); | 1028 invalid_character (XINT (ch)); |
1021 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX); | 1029 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX); |
1022 } | 1030 } |
1023 | 1031 |
1024 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0, | 1032 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0, |
1025 "Return number of characters between BEG and END.") | 1033 "Return number of characters between BEG and END.") |
1042 int | 1050 int |
1043 chars_in_text (ptr, nbytes) | 1051 chars_in_text (ptr, nbytes) |
1044 unsigned char *ptr; | 1052 unsigned char *ptr; |
1045 int nbytes; | 1053 int nbytes; |
1046 { | 1054 { |
1047 unsigned char *endp; | 1055 unsigned char *endp, c; |
1048 int chars; | 1056 int chars; |
1049 | 1057 |
1050 /* current_buffer is null at early stages of Emacs initialization. */ | 1058 /* current_buffer is null at early stages of Emacs initialization. */ |
1051 if (current_buffer == 0 | 1059 if (current_buffer == 0 |
1052 || NILP (current_buffer->enable_multibyte_characters)) | 1060 || NILP (current_buffer->enable_multibyte_characters)) |
1055 endp = ptr + nbytes; | 1063 endp = ptr + nbytes; |
1056 chars = 0; | 1064 chars = 0; |
1057 | 1065 |
1058 while (ptr < endp) | 1066 while (ptr < endp) |
1059 { | 1067 { |
1060 if (*ptr == LEADING_CODE_COMPOSITION) | 1068 c = *ptr++; |
1061 { | 1069 |
1062 ptr++; | 1070 if (BASE_LEADING_CODE_P (c)) |
1063 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++; | 1071 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++; |
1064 } | |
1065 else | |
1066 ptr += BYTES_BY_CHAR_HEAD (*ptr); | |
1067 chars++; | 1072 chars++; |
1068 } | 1073 } |
1069 | 1074 |
1070 return chars; | 1075 return chars; |
1071 } | 1076 } |
1077 int | 1082 int |
1078 multibyte_chars_in_text (ptr, nbytes) | 1083 multibyte_chars_in_text (ptr, nbytes) |
1079 unsigned char *ptr; | 1084 unsigned char *ptr; |
1080 int nbytes; | 1085 int nbytes; |
1081 { | 1086 { |
1082 unsigned char *endp; | 1087 unsigned char *endp, c; |
1083 int chars; | 1088 int chars; |
1084 | 1089 |
1085 endp = ptr + nbytes; | 1090 endp = ptr + nbytes; |
1086 chars = 0; | 1091 chars = 0; |
1087 | 1092 |
1088 while (ptr < endp) | 1093 while (ptr < endp) |
1089 { | 1094 { |
1090 if (*ptr == LEADING_CODE_COMPOSITION) | 1095 c = *ptr++; |
1091 { | 1096 |
1092 ptr++; | 1097 if (BASE_LEADING_CODE_P (c)) |
1093 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++; | 1098 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++; |
1094 } | |
1095 else | |
1096 ptr += BYTES_BY_CHAR_HEAD (*ptr); | |
1097 chars++; | 1099 chars++; |
1098 } | 1100 } |
1099 | 1101 |
1100 return chars; | 1102 return chars; |
1101 } | 1103 } |
1191 int embedded_rule; /* 1 if composition rule is embedded. */ | 1193 int embedded_rule; /* 1 if composition rule is embedded. */ |
1192 int chars; /* number of components. */ | 1194 int chars; /* number of components. */ |
1193 int i; | 1195 int i; |
1194 struct cmpchar_info *cmpcharp; | 1196 struct cmpchar_info *cmpcharp; |
1195 | 1197 |
1196 if (len < 5) | |
1197 /* Any composite char have at least 3-byte length. */ | |
1198 return -1; | |
1199 | |
1200 /* The second byte 0xFF means compostion rule is embedded. */ | 1198 /* The second byte 0xFF means compostion rule is embedded. */ |
1201 embedded_rule = (str[1] == 0xFF); | 1199 embedded_rule = (str[1] == 0xFF); |
1202 | 1200 |
1203 /* At first, get the actual length of the composite character. */ | 1201 /* At first, get the actual length of the composite character. */ |
1204 { | 1202 { |
1205 unsigned char *p, *endp = str + 1, *lastp = str + len; | 1203 unsigned char *p, *endp = str + 1, *lastp = str + len; |
1206 int bytes; | 1204 int bytes; |
1207 | 1205 |
1208 while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++; | 1206 while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++; |
1207 if (endp - str < 5) | |
1208 /* Any composite char have at least 5-byte length. */ | |
1209 return -1; | |
1210 | |
1209 chars = 0; | 1211 chars = 0; |
1210 p = str + 1 + embedded_rule; | 1212 p = str + 1; |
1211 while (p < endp) | 1213 while (p < endp) |
1212 { | 1214 { |
1215 if (embedded_rule) p++; | |
1213 /* No need of checking if *P is 0xA0 because | 1216 /* No need of checking if *P is 0xA0 because |
1214 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */ | 1217 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */ |
1215 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule); | 1218 p += BYTES_BY_CHAR_HEAD (*p - 0x20); |
1216 chars++; | 1219 chars++; |
1217 } | 1220 } |
1218 len = (p -= embedded_rule) - str; | 1221 if (p > endp || chars < 2 || chars > MAX_COMPONENT_COUNT) |
1219 if (p > endp) | 1222 /* Invalid components. */ |
1220 len -= - bytes, chars--; | |
1221 | |
1222 if (chars < 2 || chars > MAX_COMPONENT_COUNT) | |
1223 /* Invalid number of components. */ | |
1224 return -1; | 1223 return -1; |
1224 len = p - str; | |
1225 } | 1225 } |
1226 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE; | 1226 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE; |
1227 hashp = cmpchar_hash_table[hash_idx]; | 1227 hashp = cmpchar_hash_table[hash_idx]; |
1228 | 1228 |
1229 /* Then, look into the hash table. */ | 1229 /* Then, look into the hash table. */ |