comparison src/search.c @ 66078:2fe97fc2ee41

(search_buffer): Give up BM search on case-fold-search if one of a target character has a case-equivalence of different charset even if that target charcter is an ASCII.
author Kenichi Handa <handa@m17n.org>
date Fri, 14 Oct 2005 07:54:50 +0000
parents a0d1312ede66
children 654295366299
comparison
equal deleted inserted replaced
66077:68d711d80f2f 66078:2fe97fc2ee41
1173 int raw_pattern_size; 1173 int raw_pattern_size;
1174 int raw_pattern_size_byte; 1174 int raw_pattern_size_byte;
1175 unsigned char *patbuf; 1175 unsigned char *patbuf;
1176 int multibyte = !NILP (current_buffer->enable_multibyte_characters); 1176 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1177 unsigned char *base_pat = SDATA (string); 1177 unsigned char *base_pat = SDATA (string);
1178 /* Set to nozero if we find a non-ASCII char that need 1178 /* Set to positive if we find a non-ASCII char that need
1179 translation. */ 1179 translation. Otherwise set to zero later. */
1180 int charset_base = 0; 1180 int charset_base = -1;
1181 int boyer_moore_ok = 1; 1181 int boyer_moore_ok = 1;
1182 1182
1183 /* MULTIBYTE says whether the text to be searched is multibyte. 1183 /* MULTIBYTE says whether the text to be searched is multibyte.
1184 We must convert PATTERN to match that, or we will not really 1184 We must convert PATTERN to match that, or we will not really
1185 find things right. */ 1185 find things right. */
1273 also that we don't have to check ASCII 1273 also that we don't have to check ASCII
1274 characters because boyer-moore search can 1274 characters because boyer-moore search can
1275 always handle their translation. */ 1275 always handle their translation. */
1276 while (1) 1276 while (1)
1277 { 1277 {
1278 if (! ASCII_BYTE_P (inverse)) 1278 if (ASCII_BYTE_P (inverse))
1279 { 1279 {
1280 if (SINGLE_BYTE_CHAR_P (inverse)) 1280 if (charset_base > 0)
1281 {
1282 /* Boyer-moore search can't handle a
1283 translation of an eight-bit
1284 character. */
1285 boyer_moore_ok = 0;
1286 break;
1287 }
1288 else if (charset_base == 0)
1289 charset_base = inverse & ~CHAR_FIELD3_MASK;
1290 else if ((inverse & ~CHAR_FIELD3_MASK)
1291 != charset_base)
1292 { 1281 {
1293 boyer_moore_ok = 0; 1282 boyer_moore_ok = 0;
1294 break; 1283 break;
1295 } 1284 }
1285 charset_base = 0;
1286 }
1287 else if (SINGLE_BYTE_CHAR_P (inverse))
1288 {
1289 /* Boyer-moore search can't handle a
1290 translation of an eight-bit
1291 character. */
1292 boyer_moore_ok = 0;
1293 break;
1294 }
1295 else if (charset_base < 0)
1296 charset_base = inverse & ~CHAR_FIELD3_MASK;
1297 else if ((inverse & ~CHAR_FIELD3_MASK)
1298 != charset_base)
1299 {
1300 boyer_moore_ok = 0;
1301 break;
1296 } 1302 }
1297 if (c == inverse) 1303 if (c == inverse)
1298 break; 1304 break;
1299 TRANSLATE (inverse, inverse_trt, inverse); 1305 TRANSLATE (inverse, inverse_trt, inverse);
1300 } 1306 }
1301 } 1307 }
1302 } 1308 }
1309 if (charset_base < 0)
1310 charset_base = 0;
1303 1311
1304 /* Store this character into the translated pattern. */ 1312 /* Store this character into the translated pattern. */
1305 bcopy (str, pat, charlen); 1313 bcopy (str, pat, charlen);
1306 pat += charlen; 1314 pat += charlen;
1307 base_pat += in_charlen; 1315 base_pat += in_charlen;