comparison src/search.c @ 21117:a88d2c555a06

(simple_search): Don't count a character until it matches! Call set_search_regs differently in a forward search. (boyer_moore): Fix up the code that translates the pattern and loops thru equivalent characters.
author Richard M. Stallman <rms@gnu.org>
date Mon, 09 Mar 1998 00:25:30 +0000
parents 8c68721e5ec9
children 60f6085df198
comparison
equal deleted inserted replaced
21116:ccf251830c66 21117:a88d2c555a06
1302 Lisp_Object trt; 1302 Lisp_Object trt;
1303 int pos, pos_byte; 1303 int pos, pos_byte;
1304 int lim, lim_byte; 1304 int lim, lim_byte;
1305 { 1305 {
1306 int multibyte = ! NILP (current_buffer->enable_multibyte_characters); 1306 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1307 int forward = n > 0;
1307 1308
1308 if (lim > pos && multibyte) 1309 if (lim > pos && multibyte)
1309 while (n > 0) 1310 while (n > 0)
1310 { 1311 {
1311 while (1) 1312 while (1)
1320 goto stop; 1321 goto stop;
1321 1322
1322 while (this_len > 0) 1323 while (this_len > 0)
1323 { 1324 {
1324 int charlen, buf_charlen; 1325 int charlen, buf_charlen;
1325 int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); 1326 int pat_ch, buf_ch;
1326 int buf_ch; 1327
1328 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1329 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1330 ZV_BYTE - this_pos_byte,
1331 buf_charlen);
1332 TRANSLATE (buf_ch, trt, buf_ch);
1333
1334 if (buf_ch != pat_ch)
1335 break;
1327 1336
1328 this_len_byte -= charlen; 1337 this_len_byte -= charlen;
1329 this_len--; 1338 this_len--;
1330 p += charlen; 1339 p += charlen;
1331 1340
1332 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1333 ZV_BYTE - this_pos_byte,
1334 buf_charlen);
1335 this_pos_byte += buf_charlen; 1341 this_pos_byte += buf_charlen;
1336 this_pos++; 1342 this_pos++;
1337 TRANSLATE (buf_ch, trt, buf_ch);
1338
1339 if (buf_ch != pat_ch)
1340 break;
1341 } 1343 }
1342 1344
1343 if (this_len == 0) 1345 if (this_len == 0)
1344 { 1346 {
1345 pos += len; 1347 pos += len;
1367 1369
1368 while (this_len > 0) 1370 while (this_len > 0)
1369 { 1371 {
1370 int pat_ch = *p++; 1372 int pat_ch = *p++;
1371 int buf_ch = FETCH_BYTE (this_pos); 1373 int buf_ch = FETCH_BYTE (this_pos);
1374 TRANSLATE (buf_ch, trt, buf_ch);
1375
1376 if (buf_ch != pat_ch)
1377 break;
1378
1372 this_len--; 1379 this_len--;
1373 this_pos++; 1380 this_pos++;
1374 TRANSLATE (buf_ch, trt, buf_ch);
1375
1376 if (buf_ch != pat_ch)
1377 break;
1378 } 1381 }
1379 1382
1380 if (this_len == 0) 1383 if (this_len == 0)
1381 { 1384 {
1382 pos += len; 1385 pos += len;
1405 goto stop; 1408 goto stop;
1406 1409
1407 while (this_len > 0) 1410 while (this_len > 0)
1408 { 1411 {
1409 int charlen, buf_charlen; 1412 int charlen, buf_charlen;
1410 int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); 1413 int pat_ch, buf_ch;
1411 int buf_ch; 1414
1415 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1416 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1417 ZV_BYTE - this_pos_byte,
1418 buf_charlen);
1419 TRANSLATE (buf_ch, trt, buf_ch);
1420
1421 if (buf_ch != pat_ch)
1422 break;
1412 1423
1413 this_len_byte -= charlen; 1424 this_len_byte -= charlen;
1414 this_len--; 1425 this_len--;
1415 p += charlen; 1426 p += charlen;
1416
1417 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1418 ZV_BYTE - this_pos_byte,
1419 buf_charlen);
1420 this_pos_byte += buf_charlen; 1427 this_pos_byte += buf_charlen;
1421 this_pos++; 1428 this_pos++;
1422 TRANSLATE (buf_ch, trt, buf_ch);
1423
1424 if (buf_ch != pat_ch)
1425 break;
1426 } 1429 }
1427 1430
1428 if (this_len == 0) 1431 if (this_len == 0)
1429 { 1432 {
1430 pos -= len; 1433 pos -= len;
1452 1455
1453 while (this_len > 0) 1456 while (this_len > 0)
1454 { 1457 {
1455 int pat_ch = *p++; 1458 int pat_ch = *p++;
1456 int buf_ch = FETCH_BYTE (this_pos); 1459 int buf_ch = FETCH_BYTE (this_pos);
1460 TRANSLATE (buf_ch, trt, buf_ch);
1461
1462 if (buf_ch != pat_ch)
1463 break;
1457 this_len--; 1464 this_len--;
1458 this_pos++; 1465 this_pos++;
1459 TRANSLATE (buf_ch, trt, buf_ch);
1460
1461 if (buf_ch != pat_ch)
1462 break;
1463 } 1466 }
1464 1467
1465 if (this_len == 0) 1468 if (this_len == 0)
1466 { 1469 {
1467 pos -= len; 1470 pos -= len;
1475 } 1478 }
1476 1479
1477 stop: 1480 stop:
1478 if (n == 0) 1481 if (n == 0)
1479 { 1482 {
1480 set_search_regs (multibyte ? pos_byte : pos, len_byte); 1483 if (forward)
1484 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1485 else
1486 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1481 1487
1482 return pos; 1488 return pos;
1483 } 1489 }
1484 else if (n > 0) 1490 else if (n > 0)
1485 return -n; 1491 return -n;
1603 { 1609 {
1604 unsigned char *charstart = ptr; 1610 unsigned char *charstart = ptr;
1605 while (! CHAR_HEAD_P (*charstart)) 1611 while (! CHAR_HEAD_P (*charstart))
1606 charstart--; 1612 charstart--;
1607 untranslated = STRING_CHAR (charstart, ptr - charstart + 1); 1613 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1608 TRANSLATE (ch, trt, untranslated); 1614 if (charset_base == (untranslated & ~0xff))
1609 if (charset_base == (ch & ~0xff))
1610 { 1615 {
1616 TRANSLATE (ch, trt, untranslated);
1611 if (! CHAR_HEAD_P (*ptr)) 1617 if (! CHAR_HEAD_P (*ptr))
1612 { 1618 {
1613 translate_prev_byte = ptr[-1]; 1619 translate_prev_byte = ptr[-1];
1614 if (! CHAR_HEAD_P (translate_prev_byte)) 1620 if (! CHAR_HEAD_P (translate_prev_byte))
1615 translate_anteprev_byte = ptr[-2]; 1621 translate_anteprev_byte = ptr[-2];
1616 } 1622 }
1617 } 1623 }
1618 else 1624 else
1619 this_translated = 0; 1625 {
1626 this_translated = 0;
1627 ch = *ptr;
1628 }
1620 } 1629 }
1621 else if (!multibyte) 1630 else if (!multibyte)
1622 TRANSLATE (ch, trt, *ptr); 1631 TRANSLATE (ch, trt, *ptr);
1623 else 1632 else
1624 { 1633 {
1625 ch = *ptr; 1634 ch = *ptr;
1626 this_translated = 0; 1635 this_translated = 0;
1627 } 1636 }
1628 1637
1629 k = j = (unsigned char) ch; 1638 if (ch > 0400)
1639 j = ((unsigned char) ch) | 0200;
1640 else
1641 j = (unsigned char) ch;
1642
1630 if (i == infinity) 1643 if (i == infinity)
1631 stride_for_teases = BM_tab[j]; 1644 stride_for_teases = BM_tab[j];
1645
1632 BM_tab[j] = dirlen - i; 1646 BM_tab[j] = dirlen - i;
1633 /* A translation table is accompanied by its inverse -- see */ 1647 /* A translation table is accompanied by its inverse -- see */
1634 /* comment following downcase_table for details */ 1648 /* comment following downcase_table for details */
1635 if (this_translated) 1649 if (this_translated)
1636 while (1) 1650 {
1637 { 1651 int starting_ch = ch;
1638 TRANSLATE (ch, inverse_trt, ch); 1652 int starting_j = j;
1639 /* For all the characters that map into K, 1653 while (1)
1640 set up simple_translate to map them into K. */ 1654 {
1641 simple_translate[(unsigned char) ch] = k; 1655 TRANSLATE (ch, inverse_trt, ch);
1642 if ((unsigned char) ch == k) 1656 if (ch > 0400)
1643 break; 1657 j = ((unsigned char) ch) | 0200;
1644 BM_tab[(unsigned char) ch] = dirlen - i; 1658 else
1645 } 1659 j = (unsigned char) ch;
1660
1661 /* For all the characters that map into CH,
1662 set up simple_translate to map the last byte
1663 into STARTING_J. */
1664 simple_translate[j] = starting_j;
1665 if (ch == starting_ch)
1666 break;
1667 BM_tab[j] = dirlen - i;
1668 }
1669 }
1646 } 1670 }
1647 else 1671 else
1648 { 1672 {
1649 j = *ptr; 1673 j = *ptr;
1650 1674