comparison src/regex.c @ 28342:9761cf2351fa

(enum re_opcode_t): New opcode on_failure_jump_nastyloop. (print_partial_compiled_pattern, re_compile_fastmap): Handle new opcode. (regex_compile): Use on_failure_jump_nastyloop for non-greedy loops. (re_match_2_internal): Add code for on_failure_jump_nastyloop when executing it as well as when popping it off the stack to find infinite loops in non-greedy repetition operators.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Sun, 26 Mar 2000 23:05:51 +0000
parents 24a23e27dac6
children bc86be15099e
comparison
equal deleted inserted replaced
28341:f89c8f185c0d 28342:9761cf2351fa
18 along with this program; if not, write to the Free Software 18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20 USA. */ 20 USA. */
21 21
22 /* TODO: 22 /* TODO:
23 - detect nasty infinite loops like "\\(\\)+?ab" when matching "ac".
24 - use analyze_first to optimize non-empty loops 23 - use analyze_first to optimize non-empty loops
25 - optimize succeed_n and jump_n away when possible 24 - optimize succeed_n and jump_n away when possible
26 - clean up multibyte issues 25 - clean up multibyte issues
27 - structure the opcode space into opcode+flag. 26 - structure the opcode space into opcode+flag.
28 - merge with glic's regex.[ch] 27 - merge with glic's regex.[ch]
529 528
530 /* Just like `on_failure_jump', except that it checks that we 529 /* Just like `on_failure_jump', except that it checks that we
531 don't get stuck in an infinite loop (matching an empty string 530 don't get stuck in an infinite loop (matching an empty string
532 indefinitely). */ 531 indefinitely). */
533 on_failure_jump_loop, 532 on_failure_jump_loop,
533
534 /* Just like `on_failure_jump_loop', except that it checks for
535 a different kind of loop (the kind that shows up with non-greedy
536 operators). This operation has to be immediately preceded
537 by a `no_op'. */
538 on_failure_jump_nastyloop,
534 539
535 /* A smart `on_failure_jump' used for greedy * and + operators. 540 /* A smart `on_failure_jump' used for greedy * and + operators.
536 It analyses the loop before which it is put and if the 541 It analyses the loop before which it is put and if the
537 loop does not require backtracking, it changes itself to 542 loop does not require backtracking, it changes itself to
538 `on_failure_keep_string_jump' and short-circuits the loop, 543 `on_failure_keep_string_jump' and short-circuits the loop,
938 break; 943 break;
939 944
940 case on_failure_keep_string_jump: 945 case on_failure_keep_string_jump:
941 extract_number_and_incr (&mcnt, &p); 946 extract_number_and_incr (&mcnt, &p);
942 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); 947 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
948 break;
949
950 case on_failure_jump_nastyloop:
951 extract_number_and_incr (&mcnt, &p);
952 printf ("/on_failure_jump_nastyloop to %d", p + mcnt - start);
943 break; 953 break;
944 954
945 case on_failure_jump_loop: 955 case on_failure_jump_loop:
946 extract_number_and_incr (&mcnt, &p); 956 extract_number_and_incr (&mcnt, &p);
947 printf ("/on_failure_jump_loop to %d", p + mcnt - start); 957 printf ("/on_failure_jump_loop to %d", p + mcnt - start);
2177 } 2187 }
2178 } 2188 }
2179 else /* not greedy */ 2189 else /* not greedy */
2180 { /* I wish the greedy and non-greedy cases could be merged. */ 2190 { /* I wish the greedy and non-greedy cases could be merged. */
2181 2191
2192 GET_BUFFER_SPACE (7); /* We might use less. */
2182 if (many_times_ok) 2193 if (many_times_ok)
2183 { 2194 {
2184 /* The non-greedy multiple match looks like a repeat..until: 2195 /* The non-greedy multiple match looks like a repeat..until:
2185 we only need a conditional jump at the end of the loop */ 2196 we only need a conditional jump at the end of the loop */
2186 GET_BUFFER_SPACE (3); 2197 BUF_PUSH (no_op);
2187 STORE_JUMP (on_failure_jump, b, laststart); 2198 STORE_JUMP (on_failure_jump_nastyloop, b, laststart);
2188 b += 3; 2199 b += 3;
2189 if (zero_times_ok) 2200 if (zero_times_ok)
2190 { 2201 {
2191 /* The repeat...until naturally matches one or more. 2202 /* The repeat...until naturally matches one or more.
2192 To also match zero times, we need to first jump to 2203 To also match zero times, we need to first jump to
2193 the end of the loop (its conditional jump). */ 2204 the end of the loop (its conditional jump). */
2194 GET_BUFFER_SPACE (3);
2195 INSERT_JUMP (jump, laststart, b); 2205 INSERT_JUMP (jump, laststart, b);
2196 b += 3; 2206 b += 3;
2197 } 2207 }
2198 } 2208 }
2199 else 2209 else
2200 { 2210 {
2201 /* non-greedy a?? */ 2211 /* non-greedy a?? */
2202 GET_BUFFER_SPACE (6);
2203 INSERT_JUMP (jump, laststart, b + 3); 2212 INSERT_JUMP (jump, laststart, b + 3);
2204 b += 3; 2213 b += 3;
2205 INSERT_JUMP (on_failure_jump, laststart, laststart + 6); 2214 INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
2206 b += 3; 2215 b += 3;
2207 } 2216 }
3512 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p)) 3521 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
3513 { 3522 {
3514 case on_failure_jump: 3523 case on_failure_jump:
3515 case on_failure_keep_string_jump: 3524 case on_failure_keep_string_jump:
3516 case on_failure_jump_loop: 3525 case on_failure_jump_loop:
3526 case on_failure_jump_nastyloop:
3517 case on_failure_jump_smart: 3527 case on_failure_jump_smart:
3518 p++; 3528 p++;
3519 break; 3529 break;
3520 default: 3530 default:
3521 continue; 3531 continue;
3524 to jump back to "just after here". */ 3534 to jump back to "just after here". */
3525 /* Fallthrough */ 3535 /* Fallthrough */
3526 3536
3527 case on_failure_jump: 3537 case on_failure_jump:
3528 case on_failure_keep_string_jump: 3538 case on_failure_keep_string_jump:
3539 case on_failure_jump_nastyloop:
3529 case on_failure_jump_loop: 3540 case on_failure_jump_loop:
3530 case on_failure_jump_smart: 3541 case on_failure_jump_smart:
3531 handle_on_failure_jump: 3542 handle_on_failure_jump:
3532 EXTRACT_NUMBER_AND_INCR (j, p); 3543 EXTRACT_NUMBER_AND_INCR (j, p);
3533 3544
3706 int total_size = size1 + size2; 3717 int total_size = size1 + size2;
3707 int endpos = startpos + range; 3718 int endpos = startpos + range;
3708 int anchored_start = 0; 3719 int anchored_start = 0;
3709 3720
3710 /* Nonzero if we have to concern multibyte character. */ 3721 /* Nonzero if we have to concern multibyte character. */
3711 int multibyte = bufp->multibyte; 3722 const boolean multibyte = bufp->multibyte;
3712 3723
3713 /* Check for out-of-range STARTPOS. */ 3724 /* Check for out-of-range STARTPOS. */
3714 if (startpos < 0 || startpos > total_size) 3725 if (startpos < 0 || startpos > total_size)
3715 return -1; 3726 return -1;
3716 3727
5065 mcnt, p + mcnt); 5076 mcnt, p + mcnt);
5066 5077
5067 PUSH_FAILURE_POINT (p - 3, NULL); 5078 PUSH_FAILURE_POINT (p - 3, NULL);
5068 break; 5079 break;
5069 5080
5081 /* A nasty loop is introduced by the non-greedy *? and +?.
5082 With such loops, the stack only ever contains one failure point
5083 at a time, so that a plain on_failure_jump_loop kind of
5084 cycle detection cannot work. Worse yet, such a detection
5085 can not only fail to detect a cycle, but it can also wrongly
5086 detect a cycle (between different instantiations of the same
5087 loop.
5088 So the method used for those nasty loops is a little different:
5089 We use a special cycle-detection-stack-frame which is pushed
5090 when the on_failure_jump_nastyloop failure-point is *popped*.
5091 This special frame thus marks the beginning of one iteration
5092 through the loop and we can hence easily check right here
5093 whether something matched between the beginning and the end of
5094 the loop. */
5095 case on_failure_jump_nastyloop:
5096 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5097 DEBUG_PRINT3 ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
5098 mcnt, p + mcnt);
5099
5100 assert ((re_opcode_t)p[-4] == no_op);
5101 CHECK_INFINITE_LOOP (p - 4, d);
5102 PUSH_FAILURE_POINT (p - 3, d);
5103 break;
5104
5070 5105
5071 /* Simple loop detecting on_failure_jump: just check on the 5106 /* Simple loop detecting on_failure_jump: just check on the
5072 failure stack if the same spot was already hit earlier. */ 5107 failure stack if the same spot was already hit earlier. */
5073 case on_failure_jump_loop: 5108 case on_failure_jump_loop:
5074 on_failure: 5109 on_failure:
5426 { 5461 {
5427 case on_failure_keep_string_jump: 5462 case on_failure_keep_string_jump:
5428 assert (str == NULL); 5463 assert (str == NULL);
5429 goto continue_failure_jump; 5464 goto continue_failure_jump;
5430 5465
5466 case on_failure_jump_nastyloop:
5467 assert ((re_opcode_t)pat[-2] == no_op);
5468 PUSH_FAILURE_POINT (pat - 2, str);
5469 /* Fallthrough */
5470
5431 case on_failure_jump_loop: 5471 case on_failure_jump_loop:
5432 case on_failure_jump: 5472 case on_failure_jump:
5433 case succeed_n: 5473 case succeed_n:
5434 d = str; 5474 d = str;
5435 continue_failure_jump: 5475 continue_failure_jump:
5436 EXTRACT_NUMBER_AND_INCR (mcnt, pat); 5476 EXTRACT_NUMBER_AND_INCR (mcnt, pat);
5437 p = pat + mcnt; 5477 p = pat + mcnt;
5438 break; 5478 break;
5479
5480 case no_op:
5481 /* A special frame used for nastyloops. */
5482 goto fail;
5439 5483
5440 default: 5484 default:
5441 abort(); 5485 abort();
5442 } 5486 }
5443 5487