# HG changeset patch # User Richard M. Stallman # Date 739440370 0 # Node ID cb4aa2f13edd65ce82ea4b50fd7bc9acee26e635 # Parent c14c0f2954fd5c69f378194fb980d30b804aa050 *** empty log message *** diff -r c14c0f2954fd -r cb4aa2f13edd src/regex.c --- a/src/regex.c Mon Jun 07 06:56:03 1993 +0000 +++ b/src/regex.c Mon Jun 07 08:06:10 1993 +0000 @@ -4130,11 +4130,27 @@ detect that here, the alternative has put on a dummy failure point which is what we will end up popping. */ - /* Skip over open/close-group commands. */ - while (p2 + 2 < pend - && ((re_opcode_t) *p2 == stop_memory - || (re_opcode_t) *p2 == start_memory)) - p2 += 3; /* Skip over args, too. */ + /* Skip over open/close-group commands. + If what follows this loop is a ...+ construct, + look at what begins its body, since we will have to + match at least one of that. */ + while (1) + { + if (p2 + 2 < pend + && ((re_opcode_t) *p2 == stop_memory + || (re_opcode_t) *p2 == start_memory)) + p2 += 3; + else if (p2 + 6 < pend + && (re_opcode_t) *p2 == dummy_failure_jump) + p2 += 6; + else + break; + } + + p1 = p + mcnt; + /* p1[0] ... p1[2] are the `on_failure_jump' corresponding + to the `maybe_finalize_jump' of this case. Examine what + follows. */ /* If we're at the end of the pattern, we can change. */ if (p2 == pend) @@ -4152,11 +4168,7 @@ { register unsigned char c = *p2 == (unsigned char) endline ? '\n' : p2[2]; - p1 = p + mcnt; - - /* p1[0] ... p1[2] are the `on_failure_jump' corresponding - to the `maybe_finalize_jump' of this case. Examine what - follows. */ + if ((re_opcode_t) p1[3] == exactn && p1[5] != c) { p[-3] = (unsigned char) pop_failure_jump; @@ -4182,6 +4194,54 @@ } } } + else if ((re_opcode_t) *p2 == charset) + { + register unsigned char c + = *p2 == (unsigned char) endline ? '\n' : p2[2]; + + if ((re_opcode_t) p1[3] == exactn + && ! (p2[1] * BYTEWIDTH > p1[4] + && (p2[1 + p1[4] / BYTEWIDTH] + & (1 << (p1[4] % BYTEWIDTH))))) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + c, p1[5]); + } + + else if ((re_opcode_t) p1[3] == charset_not) + { + int idx; + /* We win if the charset_not inside the loop + lists every character listed in the charset after. */ + for (idx = 0; idx < p2[1]; idx++) + if (! (p2[2 + idx] == 0 + || (idx < p1[4] + && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) + break; + + if (idx == p2[1]) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + else if ((re_opcode_t) p1[3] == charset) + { + int idx; + /* We win if the charset inside the loop + has no overlap with the one after the loop. */ + for (idx = 0; idx < p2[1] && idx < p1[4]; idx++) + if ((p2[2 + idx] & p1[5 + idx]) != 0) + break; + + if (idx == p2[1] || idx == p1[4]) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + } } p -= 2; /* Point at relative address again. */ if ((re_opcode_t) p[-1] != pop_failure_jump)