Mercurial > emacs
comparison src/regex.c @ 28342:9761cf2351fa
(enum re_opcode_t): New opcode on_failure_jump_nastyloop.
(print_partial_compiled_pattern, re_compile_fastmap): Handle new opcode.
(regex_compile): Use on_failure_jump_nastyloop for non-greedy loops.
(re_match_2_internal): Add code for on_failure_jump_nastyloop when
executing it as well as when popping it off the stack to find infinite
loops in non-greedy repetition operators.
author | Stefan Monnier <monnier@iro.umontreal.ca> |
---|---|
date | Sun, 26 Mar 2000 23:05:51 +0000 |
parents | 24a23e27dac6 |
children | bc86be15099e |
comparison
equal
deleted
inserted
replaced
28341:f89c8f185c0d | 28342:9761cf2351fa |
---|---|
18 along with this program; if not, write to the Free Software | 18 along with this program; if not, write to the Free Software |
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
20 USA. */ | 20 USA. */ |
21 | 21 |
22 /* TODO: | 22 /* TODO: |
23 - detect nasty infinite loops like "\\(\\)+?ab" when matching "ac". | |
24 - use analyze_first to optimize non-empty loops | 23 - use analyze_first to optimize non-empty loops |
25 - optimize succeed_n and jump_n away when possible | 24 - optimize succeed_n and jump_n away when possible |
26 - clean up multibyte issues | 25 - clean up multibyte issues |
27 - structure the opcode space into opcode+flag. | 26 - structure the opcode space into opcode+flag. |
28 - merge with glic's regex.[ch] | 27 - merge with glic's regex.[ch] |
529 | 528 |
530 /* Just like `on_failure_jump', except that it checks that we | 529 /* Just like `on_failure_jump', except that it checks that we |
531 don't get stuck in an infinite loop (matching an empty string | 530 don't get stuck in an infinite loop (matching an empty string |
532 indefinitely). */ | 531 indefinitely). */ |
533 on_failure_jump_loop, | 532 on_failure_jump_loop, |
533 | |
534 /* Just like `on_failure_jump_loop', except that it checks for | |
535 a different kind of loop (the kind that shows up with non-greedy | |
536 operators). This operation has to be immediately preceded | |
537 by a `no_op'. */ | |
538 on_failure_jump_nastyloop, | |
534 | 539 |
535 /* A smart `on_failure_jump' used for greedy * and + operators. | 540 /* A smart `on_failure_jump' used for greedy * and + operators. |
536 It analyses the loop before which it is put and if the | 541 It analyses the loop before which it is put and if the |
537 loop does not require backtracking, it changes itself to | 542 loop does not require backtracking, it changes itself to |
538 `on_failure_keep_string_jump' and short-circuits the loop, | 543 `on_failure_keep_string_jump' and short-circuits the loop, |
938 break; | 943 break; |
939 | 944 |
940 case on_failure_keep_string_jump: | 945 case on_failure_keep_string_jump: |
941 extract_number_and_incr (&mcnt, &p); | 946 extract_number_and_incr (&mcnt, &p); |
942 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); | 947 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); |
948 break; | |
949 | |
950 case on_failure_jump_nastyloop: | |
951 extract_number_and_incr (&mcnt, &p); | |
952 printf ("/on_failure_jump_nastyloop to %d", p + mcnt - start); | |
943 break; | 953 break; |
944 | 954 |
945 case on_failure_jump_loop: | 955 case on_failure_jump_loop: |
946 extract_number_and_incr (&mcnt, &p); | 956 extract_number_and_incr (&mcnt, &p); |
947 printf ("/on_failure_jump_loop to %d", p + mcnt - start); | 957 printf ("/on_failure_jump_loop to %d", p + mcnt - start); |
2177 } | 2187 } |
2178 } | 2188 } |
2179 else /* not greedy */ | 2189 else /* not greedy */ |
2180 { /* I wish the greedy and non-greedy cases could be merged. */ | 2190 { /* I wish the greedy and non-greedy cases could be merged. */ |
2181 | 2191 |
2192 GET_BUFFER_SPACE (7); /* We might use less. */ | |
2182 if (many_times_ok) | 2193 if (many_times_ok) |
2183 { | 2194 { |
2184 /* The non-greedy multiple match looks like a repeat..until: | 2195 /* The non-greedy multiple match looks like a repeat..until: |
2185 we only need a conditional jump at the end of the loop */ | 2196 we only need a conditional jump at the end of the loop */ |
2186 GET_BUFFER_SPACE (3); | 2197 BUF_PUSH (no_op); |
2187 STORE_JUMP (on_failure_jump, b, laststart); | 2198 STORE_JUMP (on_failure_jump_nastyloop, b, laststart); |
2188 b += 3; | 2199 b += 3; |
2189 if (zero_times_ok) | 2200 if (zero_times_ok) |
2190 { | 2201 { |
2191 /* The repeat...until naturally matches one or more. | 2202 /* The repeat...until naturally matches one or more. |
2192 To also match zero times, we need to first jump to | 2203 To also match zero times, we need to first jump to |
2193 the end of the loop (its conditional jump). */ | 2204 the end of the loop (its conditional jump). */ |
2194 GET_BUFFER_SPACE (3); | |
2195 INSERT_JUMP (jump, laststart, b); | 2205 INSERT_JUMP (jump, laststart, b); |
2196 b += 3; | 2206 b += 3; |
2197 } | 2207 } |
2198 } | 2208 } |
2199 else | 2209 else |
2200 { | 2210 { |
2201 /* non-greedy a?? */ | 2211 /* non-greedy a?? */ |
2202 GET_BUFFER_SPACE (6); | |
2203 INSERT_JUMP (jump, laststart, b + 3); | 2212 INSERT_JUMP (jump, laststart, b + 3); |
2204 b += 3; | 2213 b += 3; |
2205 INSERT_JUMP (on_failure_jump, laststart, laststart + 6); | 2214 INSERT_JUMP (on_failure_jump, laststart, laststart + 6); |
2206 b += 3; | 2215 b += 3; |
2207 } | 2216 } |
3512 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p)) | 3521 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p)) |
3513 { | 3522 { |
3514 case on_failure_jump: | 3523 case on_failure_jump: |
3515 case on_failure_keep_string_jump: | 3524 case on_failure_keep_string_jump: |
3516 case on_failure_jump_loop: | 3525 case on_failure_jump_loop: |
3526 case on_failure_jump_nastyloop: | |
3517 case on_failure_jump_smart: | 3527 case on_failure_jump_smart: |
3518 p++; | 3528 p++; |
3519 break; | 3529 break; |
3520 default: | 3530 default: |
3521 continue; | 3531 continue; |
3524 to jump back to "just after here". */ | 3534 to jump back to "just after here". */ |
3525 /* Fallthrough */ | 3535 /* Fallthrough */ |
3526 | 3536 |
3527 case on_failure_jump: | 3537 case on_failure_jump: |
3528 case on_failure_keep_string_jump: | 3538 case on_failure_keep_string_jump: |
3539 case on_failure_jump_nastyloop: | |
3529 case on_failure_jump_loop: | 3540 case on_failure_jump_loop: |
3530 case on_failure_jump_smart: | 3541 case on_failure_jump_smart: |
3531 handle_on_failure_jump: | 3542 handle_on_failure_jump: |
3532 EXTRACT_NUMBER_AND_INCR (j, p); | 3543 EXTRACT_NUMBER_AND_INCR (j, p); |
3533 | 3544 |
3706 int total_size = size1 + size2; | 3717 int total_size = size1 + size2; |
3707 int endpos = startpos + range; | 3718 int endpos = startpos + range; |
3708 int anchored_start = 0; | 3719 int anchored_start = 0; |
3709 | 3720 |
3710 /* Nonzero if we have to concern multibyte character. */ | 3721 /* Nonzero if we have to concern multibyte character. */ |
3711 int multibyte = bufp->multibyte; | 3722 const boolean multibyte = bufp->multibyte; |
3712 | 3723 |
3713 /* Check for out-of-range STARTPOS. */ | 3724 /* Check for out-of-range STARTPOS. */ |
3714 if (startpos < 0 || startpos > total_size) | 3725 if (startpos < 0 || startpos > total_size) |
3715 return -1; | 3726 return -1; |
3716 | 3727 |
5065 mcnt, p + mcnt); | 5076 mcnt, p + mcnt); |
5066 | 5077 |
5067 PUSH_FAILURE_POINT (p - 3, NULL); | 5078 PUSH_FAILURE_POINT (p - 3, NULL); |
5068 break; | 5079 break; |
5069 | 5080 |
5081 /* A nasty loop is introduced by the non-greedy *? and +?. | |
5082 With such loops, the stack only ever contains one failure point | |
5083 at a time, so that a plain on_failure_jump_loop kind of | |
5084 cycle detection cannot work. Worse yet, such a detection | |
5085 can not only fail to detect a cycle, but it can also wrongly | |
5086 detect a cycle (between different instantiations of the same | |
5087 loop. | |
5088 So the method used for those nasty loops is a little different: | |
5089 We use a special cycle-detection-stack-frame which is pushed | |
5090 when the on_failure_jump_nastyloop failure-point is *popped*. | |
5091 This special frame thus marks the beginning of one iteration | |
5092 through the loop and we can hence easily check right here | |
5093 whether something matched between the beginning and the end of | |
5094 the loop. */ | |
5095 case on_failure_jump_nastyloop: | |
5096 EXTRACT_NUMBER_AND_INCR (mcnt, p); | |
5097 DEBUG_PRINT3 ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n", | |
5098 mcnt, p + mcnt); | |
5099 | |
5100 assert ((re_opcode_t)p[-4] == no_op); | |
5101 CHECK_INFINITE_LOOP (p - 4, d); | |
5102 PUSH_FAILURE_POINT (p - 3, d); | |
5103 break; | |
5104 | |
5070 | 5105 |
5071 /* Simple loop detecting on_failure_jump: just check on the | 5106 /* Simple loop detecting on_failure_jump: just check on the |
5072 failure stack if the same spot was already hit earlier. */ | 5107 failure stack if the same spot was already hit earlier. */ |
5073 case on_failure_jump_loop: | 5108 case on_failure_jump_loop: |
5074 on_failure: | 5109 on_failure: |
5426 { | 5461 { |
5427 case on_failure_keep_string_jump: | 5462 case on_failure_keep_string_jump: |
5428 assert (str == NULL); | 5463 assert (str == NULL); |
5429 goto continue_failure_jump; | 5464 goto continue_failure_jump; |
5430 | 5465 |
5466 case on_failure_jump_nastyloop: | |
5467 assert ((re_opcode_t)pat[-2] == no_op); | |
5468 PUSH_FAILURE_POINT (pat - 2, str); | |
5469 /* Fallthrough */ | |
5470 | |
5431 case on_failure_jump_loop: | 5471 case on_failure_jump_loop: |
5432 case on_failure_jump: | 5472 case on_failure_jump: |
5433 case succeed_n: | 5473 case succeed_n: |
5434 d = str; | 5474 d = str; |
5435 continue_failure_jump: | 5475 continue_failure_jump: |
5436 EXTRACT_NUMBER_AND_INCR (mcnt, pat); | 5476 EXTRACT_NUMBER_AND_INCR (mcnt, pat); |
5437 p = pat + mcnt; | 5477 p = pat + mcnt; |
5438 break; | 5478 break; |
5479 | |
5480 case no_op: | |
5481 /* A special frame used for nastyloops. */ | |
5482 goto fail; | |
5439 | 5483 |
5440 default: | 5484 default: |
5441 abort(); | 5485 abort(); |
5442 } | 5486 } |
5443 | 5487 |