Mercurial > emacs
changeset 26906:5eb1e428de28
1999-12-15 Kenichi Handa <handa@etl.go.jp>
* regex.c (regex_compile): Adjusted for the change of CHAR_STRING.
1999-12-04 Stefan Monnier <monnier@cs.yale.edu>
* regex.c (regex_compile): Recognize *?, +? and ?? as non-greedy
operators and handle them properly.
* regex.h (RE_ALL_GREEDY): New option.
(RE_UNMATCHED_RIGHT_PAREN_ORD): Moved to the end where alphabetic
sorting would put it.
(RE_SYNTAX_AWK, RE_SYNTAX_GREP, RE_SYNTAX_EGREP)
(_RE_SYNTAX_POSIX_COMMON): Use the new option to keep old behavior.
author | Dave Love <fx@gnu.org> |
---|---|
date | Wed, 15 Dec 1999 15:15:29 +0000 |
parents | 9f4d574a38b5 |
children | 7498ac232d1c |
files | src/regex.c |
diffstat | 1 files changed, 46 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/src/regex.c Wed Dec 15 15:14:20 1999 +0000 +++ b/src/regex.c Wed Dec 15 15:15:29 1999 +0000 @@ -2168,6 +2168,7 @@ /* 1 means zero (many) matches is allowed. */ char zero_times_ok = 0, many_times_ok = 0; + char greedy = 1; /* If there is a sequence of repetition chars, collapse it down to just one (the right one). We can't combine @@ -2176,8 +2177,14 @@ for (;;) { - zero_times_ok |= c != '+'; - many_times_ok |= c != '?'; + if (!(syntax & RE_ALL_GREEDY) + && c == '?' && (zero_times_ok || many_times_ok)) + greedy = 0; + else + { + zero_times_ok |= c != '+'; + many_times_ok |= c != '?'; + } if (p == pend) break; @@ -2218,6 +2225,8 @@ /* Now we know whether or not zero matches is allowed and also whether or not two or more matches is allowed. */ + if (greedy) + { if (many_times_ok) { /* More than one repetition is allowed, so put in at the end a backward relative jump from `b' to before the next @@ -2276,7 +2285,39 @@ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); b += 3; } - } + + } + else /* not greedy */ + { /* I wish the greedy and non-greedy cases could be merged. */ + + if (many_times_ok) + { + /* The greedy multiple match looks like a repeat..until: + we only need a conditional jump at the end of the loop */ + GET_BUFFER_SPACE (3); + STORE_JUMP (on_failure_jump, b, laststart); + b += 3; + if (zero_times_ok) + { + /* The repeat...until naturally matches one or more. + To also match zero times, we need to first jump to + the end of the loop (its conditional jump). */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (jump, laststart, b); + b += 3; + } + } + else + { + /* non-greedy a?? */ + GET_BUFFER_SPACE (6); + INSERT_JUMP (jump, laststart, b + 3); + b += 3; + INSERT_JUMP (on_failure_jump, laststart, laststart + 6); + b += 3; + } + } + } break; @@ -3110,8 +3151,8 @@ #ifdef emacs if (! SINGLE_BYTE_CHAR_P (c)) { - unsigned char work[4], *str; - int i = CHAR_STRING (c, work, str); + unsigned char str[MAX_MULTIBYTE_LENGTH]; + int i = CHAR_STRING (c, str); int j; for (j = 0; j < i; j++) {