Mercurial > emacs
changeset 20898:f69969e35e78
(simple_search): Call set_search_regs.
(boyer_moore): New arg CHARSET_BASE says which chars
to use the translate tables for.
(search_buffer): Properly test which chars participate in translation.
(TRANSLATE): New arg OUT. Handle non-integer in TRT.
All calls changed.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Sat, 14 Feb 1998 08:43:17 +0000 |
parents | 77deec174f57 |
children | d1f6ac340403 |
files | src/search.c |
diffstat | 1 files changed, 54 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/src/search.c Sat Feb 14 08:41:32 1998 +0000 +++ b/src/search.c Sat Feb 14 08:43:17 1998 +0000 @@ -972,8 +972,22 @@ POSIX is nonzero if we want full backtracking (POSIX style) for this pattern. 0 means backtrack only enough to get a valid match. */ -#define TRANSLATE(trt, d) \ - (! NILP (trt) ? XINT (Faref (trt, make_number (d))) : (d)) +#define TRANSLATE(out, trt, d) \ +do \ + { \ + if (! NILP (trt)) \ + { \ + Lisp_Object temp; \ + temp = Faref (trt, make_number (d)); \ + if (INTEGERP (temp)) \ + out = XINT (temp); \ + else \ + out = d; \ + } \ + else \ + out = d; \ + } \ +while (0) static int search_buffer (string, pos, pos_byte, lim, lim_byte, n, @@ -1165,7 +1179,7 @@ while (--len >= 0) { unsigned char workbuf[4], *str; - int c, translated; + int c, translated, inverse; int in_charlen, charlen; /* If we got here and the RE flag is set, it's because we're @@ -1180,7 +1194,7 @@ c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); /* Translate the character, if requested. */ - translated = TRANSLATE (trt, c); + TRANSLATE (translated, trt, c); /* If translation changed the byte-length, go back to the original character. */ charlen = CHAR_STRING (translated, workbuf, str); @@ -1190,10 +1204,11 @@ charlen = CHAR_STRING (c, workbuf, str); } + TRANSLATE (inverse, inverse_trt, c); + /* Did this char actually get translated? Would any other char get translated into it? */ - if (translated != c - || TRANSLATE (inverse_trt, c) != c) + if (translated != c || inverse != c) { /* Keep track of which character set row contains the characters that need translation. */ @@ -1206,7 +1221,7 @@ simple = 0; /* ??? Handa: this must do simple = 0 if c is a composite character. */ - } + } /* Store this character into the translated pattern. */ bcopy (str, pat, charlen); @@ -1219,7 +1234,7 @@ { while (--len >= 0) { - int c, translated; + int c, translated, inverse; /* If we got here and the RE flag is set, it's because we're dealing with a regexp known to be trivial, so the backslash @@ -1230,12 +1245,12 @@ base_pat++; } c = *base_pat++; - translated = TRANSLATE (trt, c); + TRANSLATE (translated, trt, c); + TRANSLATE (inverse, inverse_trt, c); /* Did this char actually get translated? Would any other char get translated into it? */ - if (translated != c - || TRANSLATE (inverse_trt, c) != c) + if (translated != c || inverse != c) { /* Keep track of which character set row contains the characters that need translation. */ @@ -1246,7 +1261,7 @@ /* If two different rows appear, needing translation, then we cannot use boyer_moore search. */ simple = 0; - } + } *pat++ = translated; } } @@ -1257,7 +1272,8 @@ if (simple) return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, - pos, pos_byte, lim, lim_byte); + pos, pos_byte, lim, lim_byte, + charset_base); else return simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte); @@ -1316,7 +1332,7 @@ buf_charlen); this_pos_byte += buf_charlen; this_pos++; - buf_ch = TRANSLATE (trt, buf_ch); + TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; @@ -1353,7 +1369,7 @@ int buf_ch = FETCH_BYTE (this_pos); this_len--; this_pos++; - buf_ch = TRANSLATE (trt, buf_ch); + TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; @@ -1401,7 +1417,7 @@ buf_charlen); this_pos_byte += buf_charlen; this_pos++; - buf_ch = TRANSLATE (trt, buf_ch); + TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; @@ -1438,7 +1454,7 @@ int buf_ch = FETCH_BYTE (this_pos); this_len--; this_pos++; - buf_ch = TRANSLATE (trt, buf_ch); + TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; @@ -1458,7 +1474,11 @@ stop: if (n == 0) - return pos; + { + set_search_regs (multibyte ? pos_byte : pos, len_byte); + + return pos; + } else if (n > 0) return -n; else @@ -1480,7 +1500,7 @@ static int boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, - pos, pos_byte, lim, lim_byte) + pos, pos_byte, lim, lim_byte, charset_base) int n; unsigned char *base_pat; int len, len_byte; @@ -1488,6 +1508,7 @@ Lisp_Object inverse_trt; int pos, pos_byte; int lim, lim_byte; + int charset_base; { int direction = ((n > 0) ? 1 : -1); register int dirlen; @@ -1572,6 +1593,7 @@ if (! NILP (trt)) { int ch; + int untranslated; int this_translated = 1; if (multibyte @@ -1580,17 +1602,22 @@ unsigned char *charstart = ptr; while (! CHAR_HEAD_P (*charstart)) charstart--; - if (! CHAR_HEAD_P (*ptr)) + untranslated = STRING_CHAR (charstart, ptr - charstart + 1); + TRANSLATE (ch, trt, untranslated); + if (charset_base == (ch & ~0xff)) { - translate_prev_byte = ptr[-1]; - if (! CHAR_HEAD_P (translate_prev_byte)) - translate_anteprev_byte = ptr[-2]; + if (! CHAR_HEAD_P (*ptr)) + { + translate_prev_byte = ptr[-1]; + if (! CHAR_HEAD_P (translate_prev_byte)) + translate_anteprev_byte = ptr[-2]; + } } - ch = STRING_CHAR (charstart, ptr - charstart + 1); - ch = TRANSLATE (trt, ch); + else + this_translated = 0; } else if (!multibyte) - ch = TRANSLATE (trt, *ptr); + TRANSLATE (ch, trt, *ptr); else { ch = *ptr; @@ -1606,7 +1633,7 @@ if (this_translated) while (1) { - ch = TRANSLATE (inverse_trt, ch); + TRANSLATE (ch, inverse_trt, ch); /* For all the characters that map into K, set up simple_translate to map them into K. */ simple_translate[(unsigned char) ch] = k;