Mercurial > emacs
changeset 21117:a88d2c555a06
(simple_search): Don't count a character until it matches!
Call set_search_regs differently in a forward search.
(boyer_moore): Fix up the code that translates the pattern
and loops thru equivalent characters.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Mon, 09 Mar 1998 00:25:30 +0000 |
parents | ccf251830c66 |
children | 2aa0dc203036 |
files | src/search.c |
diffstat | 1 files changed, 62 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/src/search.c Sun Mar 08 23:40:28 1998 +0000 +++ b/src/search.c Mon Mar 09 00:25:30 1998 +0000 @@ -1304,6 +1304,7 @@ int lim, lim_byte; { int multibyte = ! NILP (current_buffer->enable_multibyte_characters); + int forward = n > 0; if (lim > pos && multibyte) while (n > 0) @@ -1322,22 +1323,23 @@ while (this_len > 0) { int charlen, buf_charlen; - int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); - int buf_ch; + int pat_ch, buf_ch; + + pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); + buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), + ZV_BYTE - this_pos_byte, + buf_charlen); + TRANSLATE (buf_ch, trt, buf_ch); + + if (buf_ch != pat_ch) + break; this_len_byte -= charlen; this_len--; p += charlen; - buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), - ZV_BYTE - this_pos_byte, - buf_charlen); this_pos_byte += buf_charlen; this_pos++; - TRANSLATE (buf_ch, trt, buf_ch); - - if (buf_ch != pat_ch) - break; } if (this_len == 0) @@ -1369,12 +1371,13 @@ { int pat_ch = *p++; int buf_ch = FETCH_BYTE (this_pos); - this_len--; - this_pos++; TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; + + this_len--; + this_pos++; } if (this_len == 0) @@ -1407,22 +1410,22 @@ while (this_len > 0) { int charlen, buf_charlen; - int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); - int buf_ch; + int pat_ch, buf_ch; + + pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); + buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), + ZV_BYTE - this_pos_byte, + buf_charlen); + TRANSLATE (buf_ch, trt, buf_ch); + + if (buf_ch != pat_ch) + break; this_len_byte -= charlen; this_len--; p += charlen; - - buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), - ZV_BYTE - this_pos_byte, - buf_charlen); this_pos_byte += buf_charlen; this_pos++; - TRANSLATE (buf_ch, trt, buf_ch); - - if (buf_ch != pat_ch) - break; } if (this_len == 0) @@ -1454,12 +1457,12 @@ { int pat_ch = *p++; int buf_ch = FETCH_BYTE (this_pos); - this_len--; - this_pos++; TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; + this_len--; + this_pos++; } if (this_len == 0) @@ -1477,7 +1480,10 @@ stop: if (n == 0) { - set_search_regs (multibyte ? pos_byte : pos, len_byte); + if (forward) + set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte); + else + set_search_regs (multibyte ? pos_byte : pos, len_byte); return pos; } @@ -1605,9 +1611,9 @@ while (! CHAR_HEAD_P (*charstart)) charstart--; untranslated = STRING_CHAR (charstart, ptr - charstart + 1); - TRANSLATE (ch, trt, untranslated); - if (charset_base == (ch & ~0xff)) + if (charset_base == (untranslated & ~0xff)) { + TRANSLATE (ch, trt, untranslated); if (! CHAR_HEAD_P (*ptr)) { translate_prev_byte = ptr[-1]; @@ -1616,7 +1622,10 @@ } } else - this_translated = 0; + { + this_translated = 0; + ch = *ptr; + } } else if (!multibyte) TRANSLATE (ch, trt, *ptr); @@ -1626,23 +1635,38 @@ this_translated = 0; } - k = j = (unsigned char) ch; + if (ch > 0400) + j = ((unsigned char) ch) | 0200; + else + j = (unsigned char) ch; + if (i == infinity) stride_for_teases = BM_tab[j]; + BM_tab[j] = dirlen - i; /* A translation table is accompanied by its inverse -- see */ /* comment following downcase_table for details */ if (this_translated) - while (1) - { - TRANSLATE (ch, inverse_trt, ch); - /* For all the characters that map into K, - set up simple_translate to map them into K. */ - simple_translate[(unsigned char) ch] = k; - if ((unsigned char) ch == k) - break; - BM_tab[(unsigned char) ch] = dirlen - i; - } + { + int starting_ch = ch; + int starting_j = j; + while (1) + { + TRANSLATE (ch, inverse_trt, ch); + if (ch > 0400) + j = ((unsigned char) ch) | 0200; + else + j = (unsigned char) ch; + + /* For all the characters that map into CH, + set up simple_translate to map the last byte + into STARTING_J. */ + simple_translate[j] = starting_j; + if (ch == starting_ch) + break; + BM_tab[j] = dirlen - i; + } + } } else {