# HG changeset patch # User Richard M. Stallman # Date 889403130 0 # Node ID a88d2c555a0608890b4292c2157ca3c93362bebe # Parent ccf251830c665838afd804d67f5afa558bf4668f (simple_search): Don't count a character until it matches! Call set_search_regs differently in a forward search. (boyer_moore): Fix up the code that translates the pattern and loops thru equivalent characters. diff -r ccf251830c66 -r a88d2c555a06 src/search.c --- a/src/search.c Sun Mar 08 23:40:28 1998 +0000 +++ b/src/search.c Mon Mar 09 00:25:30 1998 +0000 @@ -1304,6 +1304,7 @@ int lim, lim_byte; { int multibyte = ! NILP (current_buffer->enable_multibyte_characters); + int forward = n > 0; if (lim > pos && multibyte) while (n > 0) @@ -1322,22 +1323,23 @@ while (this_len > 0) { int charlen, buf_charlen; - int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); - int buf_ch; + int pat_ch, buf_ch; + + pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); + buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), + ZV_BYTE - this_pos_byte, + buf_charlen); + TRANSLATE (buf_ch, trt, buf_ch); + + if (buf_ch != pat_ch) + break; this_len_byte -= charlen; this_len--; p += charlen; - buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), - ZV_BYTE - this_pos_byte, - buf_charlen); this_pos_byte += buf_charlen; this_pos++; - TRANSLATE (buf_ch, trt, buf_ch); - - if (buf_ch != pat_ch) - break; } if (this_len == 0) @@ -1369,12 +1371,13 @@ { int pat_ch = *p++; int buf_ch = FETCH_BYTE (this_pos); - this_len--; - this_pos++; TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; + + this_len--; + this_pos++; } if (this_len == 0) @@ -1407,22 +1410,22 @@ while (this_len > 0) { int charlen, buf_charlen; - int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); - int buf_ch; + int pat_ch, buf_ch; + + pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); + buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), + ZV_BYTE - this_pos_byte, + buf_charlen); + TRANSLATE (buf_ch, trt, buf_ch); + + if (buf_ch != pat_ch) + break; this_len_byte -= charlen; this_len--; p += charlen; - - buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), - ZV_BYTE - this_pos_byte, - buf_charlen); this_pos_byte += buf_charlen; this_pos++; - TRANSLATE (buf_ch, trt, buf_ch); - - if (buf_ch != pat_ch) - break; } if (this_len == 0) @@ -1454,12 +1457,12 @@ { int pat_ch = *p++; int buf_ch = FETCH_BYTE (this_pos); - this_len--; - this_pos++; TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; + this_len--; + this_pos++; } if (this_len == 0) @@ -1477,7 +1480,10 @@ stop: if (n == 0) { - set_search_regs (multibyte ? pos_byte : pos, len_byte); + if (forward) + set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte); + else + set_search_regs (multibyte ? pos_byte : pos, len_byte); return pos; } @@ -1605,9 +1611,9 @@ while (! CHAR_HEAD_P (*charstart)) charstart--; untranslated = STRING_CHAR (charstart, ptr - charstart + 1); - TRANSLATE (ch, trt, untranslated); - if (charset_base == (ch & ~0xff)) + if (charset_base == (untranslated & ~0xff)) { + TRANSLATE (ch, trt, untranslated); if (! CHAR_HEAD_P (*ptr)) { translate_prev_byte = ptr[-1]; @@ -1616,7 +1622,10 @@ } } else - this_translated = 0; + { + this_translated = 0; + ch = *ptr; + } } else if (!multibyte) TRANSLATE (ch, trt, *ptr); @@ -1626,23 +1635,38 @@ this_translated = 0; } - k = j = (unsigned char) ch; + if (ch > 0400) + j = ((unsigned char) ch) | 0200; + else + j = (unsigned char) ch; + if (i == infinity) stride_for_teases = BM_tab[j]; + BM_tab[j] = dirlen - i; /* A translation table is accompanied by its inverse -- see */ /* comment following downcase_table for details */ if (this_translated) - while (1) - { - TRANSLATE (ch, inverse_trt, ch); - /* For all the characters that map into K, - set up simple_translate to map them into K. */ - simple_translate[(unsigned char) ch] = k; - if ((unsigned char) ch == k) - break; - BM_tab[(unsigned char) ch] = dirlen - i; - } + { + int starting_ch = ch; + int starting_j = j; + while (1) + { + TRANSLATE (ch, inverse_trt, ch); + if (ch > 0400) + j = ((unsigned char) ch) | 0200; + else + j = (unsigned char) ch; + + /* For all the characters that map into CH, + set up simple_translate to map the last byte + into STARTING_J. */ + simple_translate[j] = starting_j; + if (ch == starting_ch) + break; + BM_tab[j] = dirlen - i; + } + } } else {