Mercurial > emacs
comparison src/search.c @ 21117:a88d2c555a06
(simple_search): Don't count a character until it matches!
Call set_search_regs differently in a forward search.
(boyer_moore): Fix up the code that translates the pattern
and loops thru equivalent characters.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Mon, 09 Mar 1998 00:25:30 +0000 |
parents | 8c68721e5ec9 |
children | 60f6085df198 |
comparison
equal
deleted
inserted
replaced
21116:ccf251830c66 | 21117:a88d2c555a06 |
---|---|
1302 Lisp_Object trt; | 1302 Lisp_Object trt; |
1303 int pos, pos_byte; | 1303 int pos, pos_byte; |
1304 int lim, lim_byte; | 1304 int lim, lim_byte; |
1305 { | 1305 { |
1306 int multibyte = ! NILP (current_buffer->enable_multibyte_characters); | 1306 int multibyte = ! NILP (current_buffer->enable_multibyte_characters); |
1307 int forward = n > 0; | |
1307 | 1308 |
1308 if (lim > pos && multibyte) | 1309 if (lim > pos && multibyte) |
1309 while (n > 0) | 1310 while (n > 0) |
1310 { | 1311 { |
1311 while (1) | 1312 while (1) |
1320 goto stop; | 1321 goto stop; |
1321 | 1322 |
1322 while (this_len > 0) | 1323 while (this_len > 0) |
1323 { | 1324 { |
1324 int charlen, buf_charlen; | 1325 int charlen, buf_charlen; |
1325 int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); | 1326 int pat_ch, buf_ch; |
1326 int buf_ch; | 1327 |
1328 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); | |
1329 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), | |
1330 ZV_BYTE - this_pos_byte, | |
1331 buf_charlen); | |
1332 TRANSLATE (buf_ch, trt, buf_ch); | |
1333 | |
1334 if (buf_ch != pat_ch) | |
1335 break; | |
1327 | 1336 |
1328 this_len_byte -= charlen; | 1337 this_len_byte -= charlen; |
1329 this_len--; | 1338 this_len--; |
1330 p += charlen; | 1339 p += charlen; |
1331 | 1340 |
1332 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), | |
1333 ZV_BYTE - this_pos_byte, | |
1334 buf_charlen); | |
1335 this_pos_byte += buf_charlen; | 1341 this_pos_byte += buf_charlen; |
1336 this_pos++; | 1342 this_pos++; |
1337 TRANSLATE (buf_ch, trt, buf_ch); | |
1338 | |
1339 if (buf_ch != pat_ch) | |
1340 break; | |
1341 } | 1343 } |
1342 | 1344 |
1343 if (this_len == 0) | 1345 if (this_len == 0) |
1344 { | 1346 { |
1345 pos += len; | 1347 pos += len; |
1367 | 1369 |
1368 while (this_len > 0) | 1370 while (this_len > 0) |
1369 { | 1371 { |
1370 int pat_ch = *p++; | 1372 int pat_ch = *p++; |
1371 int buf_ch = FETCH_BYTE (this_pos); | 1373 int buf_ch = FETCH_BYTE (this_pos); |
1374 TRANSLATE (buf_ch, trt, buf_ch); | |
1375 | |
1376 if (buf_ch != pat_ch) | |
1377 break; | |
1378 | |
1372 this_len--; | 1379 this_len--; |
1373 this_pos++; | 1380 this_pos++; |
1374 TRANSLATE (buf_ch, trt, buf_ch); | |
1375 | |
1376 if (buf_ch != pat_ch) | |
1377 break; | |
1378 } | 1381 } |
1379 | 1382 |
1380 if (this_len == 0) | 1383 if (this_len == 0) |
1381 { | 1384 { |
1382 pos += len; | 1385 pos += len; |
1405 goto stop; | 1408 goto stop; |
1406 | 1409 |
1407 while (this_len > 0) | 1410 while (this_len > 0) |
1408 { | 1411 { |
1409 int charlen, buf_charlen; | 1412 int charlen, buf_charlen; |
1410 int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); | 1413 int pat_ch, buf_ch; |
1411 int buf_ch; | 1414 |
1415 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); | |
1416 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), | |
1417 ZV_BYTE - this_pos_byte, | |
1418 buf_charlen); | |
1419 TRANSLATE (buf_ch, trt, buf_ch); | |
1420 | |
1421 if (buf_ch != pat_ch) | |
1422 break; | |
1412 | 1423 |
1413 this_len_byte -= charlen; | 1424 this_len_byte -= charlen; |
1414 this_len--; | 1425 this_len--; |
1415 p += charlen; | 1426 p += charlen; |
1416 | |
1417 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), | |
1418 ZV_BYTE - this_pos_byte, | |
1419 buf_charlen); | |
1420 this_pos_byte += buf_charlen; | 1427 this_pos_byte += buf_charlen; |
1421 this_pos++; | 1428 this_pos++; |
1422 TRANSLATE (buf_ch, trt, buf_ch); | |
1423 | |
1424 if (buf_ch != pat_ch) | |
1425 break; | |
1426 } | 1429 } |
1427 | 1430 |
1428 if (this_len == 0) | 1431 if (this_len == 0) |
1429 { | 1432 { |
1430 pos -= len; | 1433 pos -= len; |
1452 | 1455 |
1453 while (this_len > 0) | 1456 while (this_len > 0) |
1454 { | 1457 { |
1455 int pat_ch = *p++; | 1458 int pat_ch = *p++; |
1456 int buf_ch = FETCH_BYTE (this_pos); | 1459 int buf_ch = FETCH_BYTE (this_pos); |
1460 TRANSLATE (buf_ch, trt, buf_ch); | |
1461 | |
1462 if (buf_ch != pat_ch) | |
1463 break; | |
1457 this_len--; | 1464 this_len--; |
1458 this_pos++; | 1465 this_pos++; |
1459 TRANSLATE (buf_ch, trt, buf_ch); | |
1460 | |
1461 if (buf_ch != pat_ch) | |
1462 break; | |
1463 } | 1466 } |
1464 | 1467 |
1465 if (this_len == 0) | 1468 if (this_len == 0) |
1466 { | 1469 { |
1467 pos -= len; | 1470 pos -= len; |
1475 } | 1478 } |
1476 | 1479 |
1477 stop: | 1480 stop: |
1478 if (n == 0) | 1481 if (n == 0) |
1479 { | 1482 { |
1480 set_search_regs (multibyte ? pos_byte : pos, len_byte); | 1483 if (forward) |
1484 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte); | |
1485 else | |
1486 set_search_regs (multibyte ? pos_byte : pos, len_byte); | |
1481 | 1487 |
1482 return pos; | 1488 return pos; |
1483 } | 1489 } |
1484 else if (n > 0) | 1490 else if (n > 0) |
1485 return -n; | 1491 return -n; |
1603 { | 1609 { |
1604 unsigned char *charstart = ptr; | 1610 unsigned char *charstart = ptr; |
1605 while (! CHAR_HEAD_P (*charstart)) | 1611 while (! CHAR_HEAD_P (*charstart)) |
1606 charstart--; | 1612 charstart--; |
1607 untranslated = STRING_CHAR (charstart, ptr - charstart + 1); | 1613 untranslated = STRING_CHAR (charstart, ptr - charstart + 1); |
1608 TRANSLATE (ch, trt, untranslated); | 1614 if (charset_base == (untranslated & ~0xff)) |
1609 if (charset_base == (ch & ~0xff)) | |
1610 { | 1615 { |
1616 TRANSLATE (ch, trt, untranslated); | |
1611 if (! CHAR_HEAD_P (*ptr)) | 1617 if (! CHAR_HEAD_P (*ptr)) |
1612 { | 1618 { |
1613 translate_prev_byte = ptr[-1]; | 1619 translate_prev_byte = ptr[-1]; |
1614 if (! CHAR_HEAD_P (translate_prev_byte)) | 1620 if (! CHAR_HEAD_P (translate_prev_byte)) |
1615 translate_anteprev_byte = ptr[-2]; | 1621 translate_anteprev_byte = ptr[-2]; |
1616 } | 1622 } |
1617 } | 1623 } |
1618 else | 1624 else |
1619 this_translated = 0; | 1625 { |
1626 this_translated = 0; | |
1627 ch = *ptr; | |
1628 } | |
1620 } | 1629 } |
1621 else if (!multibyte) | 1630 else if (!multibyte) |
1622 TRANSLATE (ch, trt, *ptr); | 1631 TRANSLATE (ch, trt, *ptr); |
1623 else | 1632 else |
1624 { | 1633 { |
1625 ch = *ptr; | 1634 ch = *ptr; |
1626 this_translated = 0; | 1635 this_translated = 0; |
1627 } | 1636 } |
1628 | 1637 |
1629 k = j = (unsigned char) ch; | 1638 if (ch > 0400) |
1639 j = ((unsigned char) ch) | 0200; | |
1640 else | |
1641 j = (unsigned char) ch; | |
1642 | |
1630 if (i == infinity) | 1643 if (i == infinity) |
1631 stride_for_teases = BM_tab[j]; | 1644 stride_for_teases = BM_tab[j]; |
1645 | |
1632 BM_tab[j] = dirlen - i; | 1646 BM_tab[j] = dirlen - i; |
1633 /* A translation table is accompanied by its inverse -- see */ | 1647 /* A translation table is accompanied by its inverse -- see */ |
1634 /* comment following downcase_table for details */ | 1648 /* comment following downcase_table for details */ |
1635 if (this_translated) | 1649 if (this_translated) |
1636 while (1) | 1650 { |
1637 { | 1651 int starting_ch = ch; |
1638 TRANSLATE (ch, inverse_trt, ch); | 1652 int starting_j = j; |
1639 /* For all the characters that map into K, | 1653 while (1) |
1640 set up simple_translate to map them into K. */ | 1654 { |
1641 simple_translate[(unsigned char) ch] = k; | 1655 TRANSLATE (ch, inverse_trt, ch); |
1642 if ((unsigned char) ch == k) | 1656 if (ch > 0400) |
1643 break; | 1657 j = ((unsigned char) ch) | 0200; |
1644 BM_tab[(unsigned char) ch] = dirlen - i; | 1658 else |
1645 } | 1659 j = (unsigned char) ch; |
1660 | |
1661 /* For all the characters that map into CH, | |
1662 set up simple_translate to map the last byte | |
1663 into STARTING_J. */ | |
1664 simple_translate[j] = starting_j; | |
1665 if (ch == starting_ch) | |
1666 break; | |
1667 BM_tab[j] = dirlen - i; | |
1668 } | |
1669 } | |
1646 } | 1670 } |
1647 else | 1671 else |
1648 { | 1672 { |
1649 j = *ptr; | 1673 j = *ptr; |
1650 | 1674 |