Mercurial > emacs
changeset 73056:2e2651f3d494
(analyse_first): For eight-bit-control chars, mark both the
char's value and its leading byte in the fastmap.
(re_search_2): When fast-scanning without translation, be careful to
check that we only match the leading byte of a multibyte char.
author | Stefan Monnier <monnier@iro.umontreal.ca> |
---|---|
date | Fri, 22 Sep 2006 17:30:13 +0000 |
parents | e946daf4b223 |
children | 76991373b070 |
files | src/ChangeLog src/regex.c |
diffstat | 2 files changed, 54 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/src/ChangeLog Fri Sep 22 17:30:02 2006 +0000 +++ b/src/ChangeLog Fri Sep 22 17:30:13 2006 +0000 @@ -1,3 +1,14 @@ +2006-09-22 Stefan Monnier <monnier@iro.umontreal.ca> + + * regex.c (analyse_first): For eight-bit-control chars, mark both the + char's value and its leading byte in the fastmap. + (re_search_2): When fast-scanning without translation, be careful to + check that we only match the leading byte of a multibyte char. + + * charset.h (PREV_CHAR_BOUNDARY): Make it work from within a char's + byte sequence. + (AT_CHAR_BOUNDARY): New macro. + 2006-09-22 Kenichi Handa <handa@m17n.org> * fns.c (optimize_sub_char_table): Don't optimize a sub-char-table @@ -271,7 +282,7 @@ 2006-08-27 Martin Rudalics <rudalics@gmx.at> - * xdisp.c (mouse_autoselect_window): Removed. + * xdisp.c (mouse_autoselect_window): Remove. (Vmouse_autoselect_window): New variable. DEFVAR_LISP it. * dispextern.h (mouse_autoselect_window): Remove extern.
--- a/src/regex.c Fri Sep 22 17:30:02 2006 +0000 +++ b/src/regex.c Fri Sep 22 17:30:13 2006 +0000 @@ -3877,11 +3877,13 @@ if (fastmap) { int c = RE_STRING_CHAR (p + 1, pend - p); - + /* When fast-scanning, the fastmap can be indexed either with + a char (smaller than 256) or with the first byte of + a char's byte sequence. So we have to conservatively add + both to the table. */ if (SINGLE_BYTE_CHAR_P (c)) fastmap[c] = 1; - else - fastmap[p[1]] = 1; + fastmap[p[1]] = 1; } break; @@ -3899,6 +3901,10 @@ So any that are not listed in the charset are possible matches, even in multibyte buffers. */ if (!fastmap) break; + /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially + because it will automatically be set when needed by virtue of + being larger than the highest char of its charset (0xbf) but + smaller than (1<<BYTEWIDTH). */ for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) fastmap[j] = 1; @@ -3909,7 +3915,13 @@ for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++; j >= 0; j--) if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) - fastmap[j] = 1; + { + fastmap[j] = 1; +#ifdef emacs + if (j >= 0x80 && j < 0xa0) + fastmap[LEADING_CODE_8_BIT_CONTROL] = 1; +#endif + } if ((not && multibyte) /* Any character set can possibly contain a character @@ -4352,11 +4364,33 @@ } } else - while (range > lim && !fastmap[*d]) + do { - d++; - range--; - } + re_char *d_start = d; + while (range > lim && !fastmap[*d]) + { + d++; + range--; + } +#ifdef emacs + if (multibyte && range > lim) + { + /* Check that we are at the beginning of a char. */ + int at_boundary; + AT_CHAR_BOUNDARY_P (at_boundary, d, d_start); + if (at_boundary) + break; + else + { /* We have matched an internal byte of a char + rather than the leading byte, so it's a false + positive: we should keep scanning. */ + d++; range--; + } + } + else +#endif + break; + } while (1); startpos += irange - range; }