Mercurial > emacs
comparison src/search.c @ 66078:2fe97fc2ee41
(search_buffer): Give up BM search on case-fold-search
if one of a target character has a case-equivalence of different
charset even if that target charcter is an ASCII.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 14 Oct 2005 07:54:50 +0000 |
parents | a0d1312ede66 |
children | 654295366299 |
comparison
equal
deleted
inserted
replaced
66077:68d711d80f2f | 66078:2fe97fc2ee41 |
---|---|
1173 int raw_pattern_size; | 1173 int raw_pattern_size; |
1174 int raw_pattern_size_byte; | 1174 int raw_pattern_size_byte; |
1175 unsigned char *patbuf; | 1175 unsigned char *patbuf; |
1176 int multibyte = !NILP (current_buffer->enable_multibyte_characters); | 1176 int multibyte = !NILP (current_buffer->enable_multibyte_characters); |
1177 unsigned char *base_pat = SDATA (string); | 1177 unsigned char *base_pat = SDATA (string); |
1178 /* Set to nozero if we find a non-ASCII char that need | 1178 /* Set to positive if we find a non-ASCII char that need |
1179 translation. */ | 1179 translation. Otherwise set to zero later. */ |
1180 int charset_base = 0; | 1180 int charset_base = -1; |
1181 int boyer_moore_ok = 1; | 1181 int boyer_moore_ok = 1; |
1182 | 1182 |
1183 /* MULTIBYTE says whether the text to be searched is multibyte. | 1183 /* MULTIBYTE says whether the text to be searched is multibyte. |
1184 We must convert PATTERN to match that, or we will not really | 1184 We must convert PATTERN to match that, or we will not really |
1185 find things right. */ | 1185 find things right. */ |
1273 also that we don't have to check ASCII | 1273 also that we don't have to check ASCII |
1274 characters because boyer-moore search can | 1274 characters because boyer-moore search can |
1275 always handle their translation. */ | 1275 always handle their translation. */ |
1276 while (1) | 1276 while (1) |
1277 { | 1277 { |
1278 if (! ASCII_BYTE_P (inverse)) | 1278 if (ASCII_BYTE_P (inverse)) |
1279 { | 1279 { |
1280 if (SINGLE_BYTE_CHAR_P (inverse)) | 1280 if (charset_base > 0) |
1281 { | |
1282 /* Boyer-moore search can't handle a | |
1283 translation of an eight-bit | |
1284 character. */ | |
1285 boyer_moore_ok = 0; | |
1286 break; | |
1287 } | |
1288 else if (charset_base == 0) | |
1289 charset_base = inverse & ~CHAR_FIELD3_MASK; | |
1290 else if ((inverse & ~CHAR_FIELD3_MASK) | |
1291 != charset_base) | |
1292 { | 1281 { |
1293 boyer_moore_ok = 0; | 1282 boyer_moore_ok = 0; |
1294 break; | 1283 break; |
1295 } | 1284 } |
1285 charset_base = 0; | |
1286 } | |
1287 else if (SINGLE_BYTE_CHAR_P (inverse)) | |
1288 { | |
1289 /* Boyer-moore search can't handle a | |
1290 translation of an eight-bit | |
1291 character. */ | |
1292 boyer_moore_ok = 0; | |
1293 break; | |
1294 } | |
1295 else if (charset_base < 0) | |
1296 charset_base = inverse & ~CHAR_FIELD3_MASK; | |
1297 else if ((inverse & ~CHAR_FIELD3_MASK) | |
1298 != charset_base) | |
1299 { | |
1300 boyer_moore_ok = 0; | |
1301 break; | |
1296 } | 1302 } |
1297 if (c == inverse) | 1303 if (c == inverse) |
1298 break; | 1304 break; |
1299 TRANSLATE (inverse, inverse_trt, inverse); | 1305 TRANSLATE (inverse, inverse_trt, inverse); |
1300 } | 1306 } |
1301 } | 1307 } |
1302 } | 1308 } |
1309 if (charset_base < 0) | |
1310 charset_base = 0; | |
1303 | 1311 |
1304 /* Store this character into the translated pattern. */ | 1312 /* Store this character into the translated pattern. */ |
1305 bcopy (str, pat, charlen); | 1313 bcopy (str, pat, charlen); |
1306 pat += charlen; | 1314 pat += charlen; |
1307 base_pat += in_charlen; | 1315 base_pat += in_charlen; |