comparison src/charset.c @ 20933:5ceea9d50194

(invalid_character): New function. (chars_in_text): Make the behaviour consistent with INC_POS. (multibyte_chars_in_text): Likewise. (str_cmpchar_id): Likewise. (non_ascii_char_to_string): Call invalid_character. (Fchar_direction): Likewise.
author Kenichi Handa <handa@m17n.org>
date Fri, 20 Feb 1998 01:40:47 +0000
parents c7cfd531cf2b
children f3128abaf8aa
comparison
equal deleted inserted replaced
20932:3c2c8431c51d 20933:5ceea9d50194
106 Lisp_Object Vnonascii_translate_table; 106 Lisp_Object Vnonascii_translate_table;
107 107
108 #define min(X, Y) ((X) < (Y) ? (X) : (Y)) 108 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
109 #define max(X, Y) ((X) > (Y) ? (X) : (Y)) 109 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
110 110
111 void
112 invalid_character (c)
113 int c;
114 {
115 error ("Invalid character: %o, %d, 0x%x", c);
116 }
117
118
111 /* Set STR a pointer to the multi-byte form of the character C. If C 119 /* Set STR a pointer to the multi-byte form of the character C. If C
112 is not a composite character, the multi-byte form is set in WORKBUF 120 is not a composite character, the multi-byte form is set in WORKBUF
113 and STR points WORKBUF. The caller should allocate at least 4-byte 121 and STR points WORKBUF. The caller should allocate at least 4-byte
114 area at WORKBUF in advance. Returns the length of the multi-byte 122 area at WORKBUF in advance. Returns the length of the multi-byte
115 form. If C is an invalid character to have a multi-byte form, 123 form. If C is an invalid character to have a multi-byte form,
134 *str = cmpchar_table[cmpchar_id]->data; 142 *str = cmpchar_table[cmpchar_id]->data;
135 return cmpchar_table[cmpchar_id]->len; 143 return cmpchar_table[cmpchar_id]->len;
136 } 144 }
137 else 145 else
138 { 146 {
139 error ("Invalid character: %d", c); 147 invalid_character (c);
140 } 148 }
141 } 149 }
142 150
143 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); 151 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
144 if (!charset 152 if (!charset
145 || ! CHARSET_DEFINED_P (charset) 153 || ! CHARSET_DEFINED_P (charset)
146 || c1 >= 0 && c1 < 32 154 || c1 >= 0 && c1 < 32
147 || c2 >= 0 && c2 < 32) 155 || c2 >= 0 && c2 < 32)
148 error ("Invalid character: %d", c); 156 invalid_character (c);
149 157
150 *str = workbuf; 158 *str = workbuf;
151 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset); 159 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
152 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset)) 160 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
153 workbuf++; 161 workbuf++;
1015 int charset; 1023 int charset;
1016 1024
1017 CHECK_NUMBER (ch, 0); 1025 CHECK_NUMBER (ch, 0);
1018 charset = CHAR_CHARSET (XFASTINT (ch)); 1026 charset = CHAR_CHARSET (XFASTINT (ch));
1019 if (!CHARSET_DEFINED_P (charset)) 1027 if (!CHARSET_DEFINED_P (charset))
1020 error ("Invalid character: %d", XINT (ch)); 1028 invalid_character (XINT (ch));
1021 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX); 1029 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1022 } 1030 }
1023 1031
1024 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0, 1032 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1025 "Return number of characters between BEG and END.") 1033 "Return number of characters between BEG and END.")
1042 int 1050 int
1043 chars_in_text (ptr, nbytes) 1051 chars_in_text (ptr, nbytes)
1044 unsigned char *ptr; 1052 unsigned char *ptr;
1045 int nbytes; 1053 int nbytes;
1046 { 1054 {
1047 unsigned char *endp; 1055 unsigned char *endp, c;
1048 int chars; 1056 int chars;
1049 1057
1050 /* current_buffer is null at early stages of Emacs initialization. */ 1058 /* current_buffer is null at early stages of Emacs initialization. */
1051 if (current_buffer == 0 1059 if (current_buffer == 0
1052 || NILP (current_buffer->enable_multibyte_characters)) 1060 || NILP (current_buffer->enable_multibyte_characters))
1055 endp = ptr + nbytes; 1063 endp = ptr + nbytes;
1056 chars = 0; 1064 chars = 0;
1057 1065
1058 while (ptr < endp) 1066 while (ptr < endp)
1059 { 1067 {
1060 if (*ptr == LEADING_CODE_COMPOSITION) 1068 c = *ptr++;
1061 { 1069
1062 ptr++; 1070 if (BASE_LEADING_CODE_P (c))
1063 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++; 1071 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
1064 }
1065 else
1066 ptr += BYTES_BY_CHAR_HEAD (*ptr);
1067 chars++; 1072 chars++;
1068 } 1073 }
1069 1074
1070 return chars; 1075 return chars;
1071 } 1076 }
1077 int 1082 int
1078 multibyte_chars_in_text (ptr, nbytes) 1083 multibyte_chars_in_text (ptr, nbytes)
1079 unsigned char *ptr; 1084 unsigned char *ptr;
1080 int nbytes; 1085 int nbytes;
1081 { 1086 {
1082 unsigned char *endp; 1087 unsigned char *endp, c;
1083 int chars; 1088 int chars;
1084 1089
1085 endp = ptr + nbytes; 1090 endp = ptr + nbytes;
1086 chars = 0; 1091 chars = 0;
1087 1092
1088 while (ptr < endp) 1093 while (ptr < endp)
1089 { 1094 {
1090 if (*ptr == LEADING_CODE_COMPOSITION) 1095 c = *ptr++;
1091 { 1096
1092 ptr++; 1097 if (BASE_LEADING_CODE_P (c))
1093 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++; 1098 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
1094 }
1095 else
1096 ptr += BYTES_BY_CHAR_HEAD (*ptr);
1097 chars++; 1099 chars++;
1098 } 1100 }
1099 1101
1100 return chars; 1102 return chars;
1101 } 1103 }
1191 int embedded_rule; /* 1 if composition rule is embedded. */ 1193 int embedded_rule; /* 1 if composition rule is embedded. */
1192 int chars; /* number of components. */ 1194 int chars; /* number of components. */
1193 int i; 1195 int i;
1194 struct cmpchar_info *cmpcharp; 1196 struct cmpchar_info *cmpcharp;
1195 1197
1196 if (len < 5)
1197 /* Any composite char have at least 3-byte length. */
1198 return -1;
1199
1200 /* The second byte 0xFF means compostion rule is embedded. */ 1198 /* The second byte 0xFF means compostion rule is embedded. */
1201 embedded_rule = (str[1] == 0xFF); 1199 embedded_rule = (str[1] == 0xFF);
1202 1200
1203 /* At first, get the actual length of the composite character. */ 1201 /* At first, get the actual length of the composite character. */
1204 { 1202 {
1205 unsigned char *p, *endp = str + 1, *lastp = str + len; 1203 unsigned char *p, *endp = str + 1, *lastp = str + len;
1206 int bytes; 1204 int bytes;
1207 1205
1208 while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++; 1206 while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++;
1207 if (endp - str < 5)
1208 /* Any composite char have at least 5-byte length. */
1209 return -1;
1210
1209 chars = 0; 1211 chars = 0;
1210 p = str + 1 + embedded_rule; 1212 p = str + 1;
1211 while (p < endp) 1213 while (p < endp)
1212 { 1214 {
1215 if (embedded_rule) p++;
1213 /* No need of checking if *P is 0xA0 because 1216 /* No need of checking if *P is 0xA0 because
1214 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */ 1217 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1215 p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule); 1218 p += BYTES_BY_CHAR_HEAD (*p - 0x20);
1216 chars++; 1219 chars++;
1217 } 1220 }
1218 len = (p -= embedded_rule) - str; 1221 if (p > endp || chars < 2 || chars > MAX_COMPONENT_COUNT)
1219 if (p > endp) 1222 /* Invalid components. */
1220 len -= - bytes, chars--;
1221
1222 if (chars < 2 || chars > MAX_COMPONENT_COUNT)
1223 /* Invalid number of components. */
1224 return -1; 1223 return -1;
1224 len = p - str;
1225 } 1225 }
1226 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE; 1226 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1227 hashp = cmpchar_hash_table[hash_idx]; 1227 hashp = cmpchar_hash_table[hash_idx];
1228 1228
1229 /* Then, look into the hash table. */ 1229 /* Then, look into the hash table. */