comparison src/regex.c @ 28203:c10ee0e6982b

(RE_STRING_CHAR): New macro. (GET_CHAR_AFER_2): Remove. (RE_TRANSLATE, RE_TRANSLATE_P): New macros moved from regex.h. (enum re_opcode_t): Remove on_failure_jump_exclusive. (print_partial_compiled_pattern, re_compile_fastmap) (re_match_2_internal): Remove on_failure_jump_exclusive. (regex_compile): Turn optimizable P+ loops into PP*, so that the optimization only need to work for * (ie. can use of_keep_string_jump). Remove the special case for .*\n since it is now covered by the general optimization. (re_search_2): Don't bother with `room'. (skip_one_char): New function. (skip_noops): Simplify since `memory' is not needed any more. (mutually_exclusive_p): Restructure slightly to use `switch' and add handling for "all" remaining cases. (re_match_2_internal): Change on_failure_jump_smart to use on_failure_keep_string_jump (and redirect the end-of-loop jump) rather than on_failure_jump_exclusive.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Sun, 19 Mar 2000 23:22:06 +0000
parents c314d747a819
children f955117a1fcd
comparison
equal deleted inserted replaced
28202:118ca0109252 28203:c10ee0e6982b
78 #include "category.h" 78 #include "category.h"
79 79
80 #define malloc xmalloc 80 #define malloc xmalloc
81 #define realloc xrealloc 81 #define realloc xrealloc
82 #define free xfree 82 #define free xfree
83
84 #define RE_STRING_CHAR(p, s) \
85 (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
83 86
84 #else /* not emacs */ 87 #else /* not emacs */
85 88
86 /* If we are not linking with Emacs proper, 89 /* If we are not linking with Emacs proper,
87 we can't use the relocating allocator 90 we can't use the relocating allocator
185 #define CHAR_HEAD_P(p) (1) 188 #define CHAR_HEAD_P(p) (1)
186 #define SINGLE_BYTE_CHAR_P(c) (1) 189 #define SINGLE_BYTE_CHAR_P(c) (1)
187 #define SAME_CHARSET_P(c1, c2) (1) 190 #define SAME_CHARSET_P(c1, c2) (1)
188 #define MULTIBYTE_FORM_LENGTH(p, s) (1) 191 #define MULTIBYTE_FORM_LENGTH(p, s) (1)
189 #define STRING_CHAR(p, s) (*(p)) 192 #define STRING_CHAR(p, s) (*(p))
193 #define RE_STRING_CHAR STRING_CHAR
190 #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p)) 194 #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
191 #define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
192 (c = ((p) == (end1) ? *(str2) : *(p)))
193 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ 195 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
194 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) 196 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
195 #endif /* not emacs */ 197 #endif /* not emacs */
198
199 #ifndef RE_TRANSLATE
200 #define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
201 #define RE_TRANSLATE_P(TBL) (TBL)
202 #endif
196 203
197 /* Get the interface, including the syntax bits. */ 204 /* Get the interface, including the syntax bits. */
198 #include "regex.h" 205 #include "regex.h"
199 206
200 /* isalpha etc. are used for the character classes. */ 207 /* isalpha etc. are used for the character classes. */
517 524
518 /* Like on_failure_jump, but pushes a placeholder instead of the 525 /* Like on_failure_jump, but pushes a placeholder instead of the
519 current string position when executed. */ 526 current string position when executed. */
520 on_failure_keep_string_jump, 527 on_failure_keep_string_jump,
521 528
522 /* Like `on_failure_jump', except that it assumes that the
523 pattern following it is mutually exclusive with the pattern
524 at the destination of the jump: if one matches something,
525 the other won't match at all.
526 Always used via `on_failure_jump_smart'. */
527 on_failure_jump_exclusive,
528
529 /* Just like `on_failure_jump', except that it checks that we 529 /* Just like `on_failure_jump', except that it checks that we
530 don't get stuck in an infinite loop (matching an empty string 530 don't get stuck in an infinite loop (matching an empty string
531 indefinitely). */ 531 indefinitely). */
532 on_failure_jump_loop, 532 on_failure_jump_loop,
533 533
534 /* A smart `on_failure_jump' used for greedy * and + operators. 534 /* A smart `on_failure_jump' used for greedy * and + operators.
535 It analyses the loop before which it is put and if the 535 It analyses the loop before which it is put and if the
536 loop does not require backtracking, it changes itself to 536 loop does not require backtracking, it changes itself to
537 `on_failure_jump_exclusive', else it just defaults to 537 `on_failure_keep_string_jump' and short-circuits the loop,
538 changing itself into `on_failure_jump_loop'. */ 538 else it just defaults to changing itself into `on_failure_jump'.
539 It assumes that it is pointing to just past a `jump'. */
539 on_failure_jump_smart, 540 on_failure_jump_smart,
540 541
541 /* Followed by two-byte relative address and two-byte number n. 542 /* Followed by two-byte relative address and two-byte number n.
542 After matching N times, jump to the address upon failure. */ 543 After matching N times, jump to the address upon failure. */
543 succeed_n, 544 succeed_n,
939 break; 940 break;
940 941
941 case on_failure_keep_string_jump: 942 case on_failure_keep_string_jump:
942 extract_number_and_incr (&mcnt, &p); 943 extract_number_and_incr (&mcnt, &p);
943 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); 944 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
944 break;
945
946 case on_failure_jump_exclusive:
947 extract_number_and_incr (&mcnt, &p);
948 printf ("/on_failure_jump_exclusive to %d", p + mcnt - start);
949 break; 945 break;
950 946
951 case on_failure_jump_loop: 947 case on_failure_jump_loop:
952 extract_number_and_incr (&mcnt, &p); 948 extract_number_and_incr (&mcnt, &p);
953 printf ("/on_failure_jump_loop to %d", p + mcnt - start); 949 printf ("/on_failure_jump_loop to %d", p + mcnt - start);
1530 const unsigned char *p, 1526 const unsigned char *p,
1531 reg_syntax_t syntax)); 1527 reg_syntax_t syntax));
1532 static boolean at_endline_loc_p _RE_ARGS((const unsigned char *p, 1528 static boolean at_endline_loc_p _RE_ARGS((const unsigned char *p,
1533 const unsigned char *pend, 1529 const unsigned char *pend,
1534 reg_syntax_t syntax)); 1530 reg_syntax_t syntax));
1531 static unsigned char *skip_one_char _RE_ARGS((unsigned char *p));
1535 1532
1536 /* Fetch the next character in the uncompiled pattern---translating it 1533 /* Fetch the next character in the uncompiled pattern---translating it
1537 if necessary. Also cast from a signed character in the constant 1534 if necessary. Also cast from a signed character in the constant
1538 string passed to us by the user to an unsigned char that we can use 1535 string passed to us by the user to an unsigned char that we can use
1539 as an array index (in, e.g., `translate'). */ 1536 as an array index (in, e.g., `translate'). */
2070 else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 2067 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2071 goto normal_char; 2068 goto normal_char;
2072 } 2069 }
2073 2070
2074 { 2071 {
2075 /* Are we optimizing this jump? */
2076 boolean keep_string_p = false;
2077
2078 /* 1 means zero (many) matches is allowed. */ 2072 /* 1 means zero (many) matches is allowed. */
2079 boolean zero_times_ok = 0, many_times_ok = 0; 2073 boolean zero_times_ok = 0, many_times_ok = 0;
2080 boolean greedy = 1; 2074 boolean greedy = 1;
2081 2075
2082 /* If there is a sequence of repetition chars, collapse it 2076 /* If there is a sequence of repetition chars, collapse it
2127 /* If we get here, we found another repeat character. */ 2121 /* If we get here, we found another repeat character. */
2128 } 2122 }
2129 2123
2130 /* Star, etc. applied to an empty pattern is equivalent 2124 /* Star, etc. applied to an empty pattern is equivalent
2131 to an empty pattern. */ 2125 to an empty pattern. */
2132 if (!laststart) 2126 if (!laststart || laststart == b)
2133 break; 2127 break;
2134 2128
2135 /* Now we know whether or not zero matches is allowed 2129 /* Now we know whether or not zero matches is allowed
2136 and also whether or not two or more matches is allowed. */ 2130 and also whether or not two or more matches is allowed. */
2137 if (greedy) 2131 if (greedy)
2138 { 2132 {
2139 if (many_times_ok) 2133 if (many_times_ok)
2140 { /* More than one repetition is allowed, so put in at the 2134 {
2141 end a backward relative jump from `b' to before the next 2135 boolean simple = skip_one_char (laststart) == b;
2142 jump we're going to put in below (which jumps from 2136 unsigned int startoffset = 0;
2143 laststart to after this jump). 2137 assert (skip_one_char (laststart) <= b);
2144 2138
2145 But if we are at the `*' in the exact sequence `.*\n', 2139 if (!zero_times_ok && simple)
2146 insert an unconditional jump backwards to the ., 2140 { /* Since simple * loops can be made faster by using
2147 instead of the beginning of the loop. This way we only 2141 on_failure_keep_string_jump, we turn simple P+
2148 push a failure point once, instead of every time 2142 into PP* if P is simple. */
2149 through the loop. */ 2143 unsigned char *p1, *p2;
2150 assert (p - 1 > pattern); 2144 startoffset = b - laststart;
2151 2145 GET_BUFFER_SPACE (startoffset);
2152 /* Allocate the space for the jump. */ 2146 p1 = b; p2 = laststart;
2153 GET_BUFFER_SPACE (3); 2147 while (p2 < p1)
2154 2148 *b++ = *p2++;
2155 /* We know we are not at the first character of the pattern, 2149 zero_times_ok = 1;
2156 because laststart was nonzero. And we've already
2157 incremented `p', by the way, to be the character after
2158 the `*'. Do we have to do something analogous here
2159 for null bytes, because of RE_DOT_NOT_NULL? */
2160 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2161 && zero_times_ok
2162 && p < pend
2163 && TRANSLATE (*p) == TRANSLATE ('\n')
2164 && !(syntax & RE_DOT_NEWLINE))
2165 { /* We have .*\n. */
2166 STORE_JUMP (jump, b, laststart);
2167 keep_string_p = true;
2168 } 2150 }
2151
2152 GET_BUFFER_SPACE (6);
2153 if (!zero_times_ok)
2154 /* A + loop. */
2155 STORE_JUMP (on_failure_jump_loop, b, b + 6);
2169 else 2156 else
2170 STORE_JUMP (jump, b, laststart - 3); 2157 /* Simple * loops can use on_failure_keep_string_jump
2171 2158 depending on what follows. But since we don't know
2172 /* We've added more stuff to the buffer. */ 2159 that yet, we leave the decision up to
2160 on_failure_jump_smart. */
2161 INSERT_JUMP (simple ? on_failure_jump_smart
2162 : on_failure_jump_loop,
2163 laststart + startoffset, b + 6);
2173 b += 3; 2164 b += 3;
2174 } 2165 STORE_JUMP (jump, b, laststart + startoffset);
2175
2176 /* On failure, jump from laststart to b + 3, which will be the
2177 end of the buffer after this jump is inserted. */
2178 GET_BUFFER_SPACE (3);
2179 if (!zero_times_ok)
2180 {
2181 assert (many_times_ok);
2182 INSERT_JUMP (on_failure_jump_smart, b - 3, b + 3);
2183 pending_exact = 0;
2184 b += 3; 2166 b += 3;
2185 } 2167 }
2186 else 2168 else
2187 { 2169 {
2188 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 2170 /* A simple ? pattern. */
2189 : !many_times_ok ? 2171 assert (zero_times_ok);
2190 on_failure_jump : on_failure_jump_smart, 2172 GET_BUFFER_SPACE (3);
2191 laststart, b + 3); 2173 INSERT_JUMP (on_failure_jump, laststart, b + 3);
2192 pending_exact = 0;
2193 b += 3; 2174 b += 3;
2194 } 2175 }
2195 } 2176 }
2196 else /* not greedy */ 2177 else /* not greedy */
2197 { /* I wish the greedy and non-greedy cases could be merged. */ 2178 { /* I wish the greedy and non-greedy cases could be merged. */
2222 INSERT_JUMP (on_failure_jump, laststart, laststart + 6); 2203 INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
2223 b += 3; 2204 b += 3;
2224 } 2205 }
2225 } 2206 }
2226 } 2207 }
2208 pending_exact = 0;
2227 break; 2209 break;
2228 2210
2229 2211
2230 case '.': 2212 case '.':
2231 laststart = b; 2213 laststart = b;
3215 static boolean 3197 static boolean
3216 at_begline_loc_p (pattern, p, syntax) 3198 at_begline_loc_p (pattern, p, syntax)
3217 const unsigned char *pattern, *p; 3199 const unsigned char *pattern, *p;
3218 reg_syntax_t syntax; 3200 reg_syntax_t syntax;
3219 { 3201 {
3220 re_char *prev = p - 2; 3202 const unsigned char *prev = p - 2;
3221 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 3203 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3222 3204
3223 return 3205 return
3224 /* After a subexpression? */ 3206 /* After a subexpression? */
3225 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 3207 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3234 static boolean 3216 static boolean
3235 at_endline_loc_p (p, pend, syntax) 3217 at_endline_loc_p (p, pend, syntax)
3236 const unsigned char *p, *pend; 3218 const unsigned char *p, *pend;
3237 reg_syntax_t syntax; 3219 reg_syntax_t syntax;
3238 { 3220 {
3239 re_char *next = p; 3221 const unsigned char *next = p;
3240 boolean next_backslash = *next == '\\'; 3222 boolean next_backslash = *next == '\\';
3241 re_char *next_next = p + 1 < pend ? p + 1 : 0; 3223 const unsigned char *next_next = p + 1 < pend ? p + 1 : 0;
3242 3224
3243 return 3225 return
3244 /* Before a subexpression? */ 3226 /* Before a subexpression? */
3245 (syntax & RE_NO_BK_PARENS ? *next == ')' 3227 (syntax & RE_NO_BK_PARENS ? *next == ')'
3246 : next_backslash && next_next && *next_next == ')') 3228 : next_backslash && next_next && *next_next == ')')
3661 p += j; 3643 p += j;
3662 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p)) 3644 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
3663 { 3645 {
3664 case on_failure_jump: 3646 case on_failure_jump:
3665 case on_failure_keep_string_jump: 3647 case on_failure_keep_string_jump:
3666 case on_failure_jump_exclusive:
3667 case on_failure_jump_loop: 3648 case on_failure_jump_loop:
3668 case on_failure_jump_smart: 3649 case on_failure_jump_smart:
3669 p++; 3650 p++;
3670 break; 3651 break;
3671 default: 3652 default:
3675 to jump back to "just after here". */ 3656 to jump back to "just after here". */
3676 /* Fallthrough */ 3657 /* Fallthrough */
3677 3658
3678 case on_failure_jump: 3659 case on_failure_jump:
3679 case on_failure_keep_string_jump: 3660 case on_failure_keep_string_jump:
3680 case on_failure_jump_exclusive:
3681 case on_failure_jump_loop: 3661 case on_failure_jump_loop:
3682 case on_failure_jump_smart: 3662 case on_failure_jump_smart:
3683 handle_on_failure_jump: 3663 handle_on_failure_jump:
3684 EXTRACT_NUMBER_AND_INCR (j, p); 3664 EXTRACT_NUMBER_AND_INCR (j, p);
3685 3665
3985 3965
3986 startpos += irange - range; 3966 startpos += irange - range;
3987 } 3967 }
3988 else /* Searching backwards. */ 3968 else /* Searching backwards. */
3989 { 3969 {
3990 int room = (startpos >= size1 3970 buf_ch = STRING_CHAR (d, (startpos >= size1
3991 ? size2 + size1 - startpos 3971 ? size2 + size1 - startpos
3992 : size1 - startpos); 3972 : size1 - startpos));
3993
3994 buf_ch = STRING_CHAR (d, room);
3995 if (RE_TRANSLATE_P (translate)) 3973 if (RE_TRANSLATE_P (translate))
3996 buf_ch = RE_TRANSLATE (translate, buf_ch); 3974 buf_ch = RE_TRANSLATE (translate, buf_ch);
3997 3975
3998 if (! (buf_ch >= 0400 3976 if (! (buf_ch >= 0400
3999 || fastmap[buf_ch])) 3977 || fastmap[buf_ch]))
4151 #endif /* not MATCH_MAY_ALLOCATE */ 4129 #endif /* not MATCH_MAY_ALLOCATE */
4152 4130
4153 4131
4154 /* Optimization routines. */ 4132 /* Optimization routines. */
4155 4133
4134 /* If the operation is a match against one or more chars,
4135 return a pointer to the next operation, else return NULL. */
4136 static unsigned char *
4137 skip_one_char (p)
4138 unsigned char *p;
4139 {
4140 switch (SWITCH_ENUM_CAST (*p++))
4141 {
4142 case anychar:
4143 break;
4144
4145 case exactn:
4146 p += *p + 1;
4147 break;
4148
4149 case charset_not:
4150 case charset:
4151 if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
4152 {
4153 int mcnt;
4154 p = CHARSET_RANGE_TABLE (p - 1);
4155 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4156 p = CHARSET_RANGE_TABLE_END (p, mcnt);
4157 }
4158 else
4159 p += 1 + CHARSET_BITMAP_SIZE (p - 1);
4160 break;
4161
4162 #ifdef emacs
4163 case syntaxspec:
4164 case notsyntaxspec:
4165 case categoryspec:
4166 case notcategoryspec:
4167 #endif /* emacs */
4168 p++;
4169 break;
4170
4171 default:
4172 p = NULL;
4173 }
4174 return p;
4175 }
4176
4177
4156 /* Jump over non-matching operations. */ 4178 /* Jump over non-matching operations. */
4157 static unsigned char * 4179 static unsigned char *
4158 skip_noops (p, pend, memory) 4180 skip_noops (p, pend)
4159 unsigned char *p, *pend; 4181 unsigned char *p, *pend;
4160 int memory;
4161 { 4182 {
4162 int mcnt; 4183 int mcnt;
4163 while (p < pend) 4184 while (p < pend)
4164 { 4185 {
4165 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p)) 4186 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
4166 { 4187 {
4167 case start_memory: 4188 case start_memory:
4168 if (!memory)
4169 return p;
4170 case stop_memory: 4189 case stop_memory:
4171 p += 2; break; 4190 p += 2; break;
4172 case no_op: 4191 case no_op:
4173 p += 1; break; 4192 p += 1; break;
4174 case jump: 4193 case jump:
4188 static int 4207 static int
4189 mutually_exclusive_p (bufp, p1, p2) 4208 mutually_exclusive_p (bufp, p1, p2)
4190 struct re_pattern_buffer *bufp; 4209 struct re_pattern_buffer *bufp;
4191 unsigned char *p1, *p2; 4210 unsigned char *p1, *p2;
4192 { 4211 {
4193 int multibyte = bufp->multibyte; 4212 re_opcode_t op2;
4213 const boolean multibyte = bufp->multibyte;
4194 unsigned char *pend = bufp->buffer + bufp->used; 4214 unsigned char *pend = bufp->buffer + bufp->used;
4195 4215
4196 assert (p1 >= bufp->buffer && p1 <= pend 4216 assert (p1 >= bufp->buffer && p1 < pend
4197 && p2 >= bufp->buffer && p2 <= pend); 4217 && p2 >= bufp->buffer && p2 <= pend);
4198 4218
4199 /* Skip over open/close-group commands. 4219 /* Skip over open/close-group commands.
4200 If what follows this loop is a ...+ construct, 4220 If what follows this loop is a ...+ construct,
4201 look at what begins its body, since we will have to 4221 look at what begins its body, since we will have to
4202 match at least one of that. */ 4222 match at least one of that. */
4203 p2 = skip_noops (p2, pend, 1); 4223 p2 = skip_noops (p2, pend);
4204 /* The same skip can be done for p1, except that skipping over 4224 /* The same skip can be done for p1, except that this function
4205 start_memory is not a good idea (if there's a group inside 4225 is only used in the case where p1 is a simple match operator. */
4206 the loop delimited by on_failure_jump_exclusive, then it 4226 /* p1 = skip_noops (p1, pend); */
4207 can't optimize the push away (it still works properly, but 4227
4208 slightly slower rather than faster)). */ 4228 assert (p1 >= bufp->buffer && p1 < pend
4209 p1 = skip_noops (p1, pend, 0); 4229 && p2 >= bufp->buffer && p2 <= pend);
4210 4230
4211 /* If we're at the end of the pattern, we can change. */ 4231 op2 = p2 == pend ? succeed : *p2;
4212 if (p2 == pend) 4232
4233 switch (SWITCH_ENUM_CAST (op2))
4213 { 4234 {
4214 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p1)) 4235 case succeed:
4236 case endbuf:
4237 /* If we're at the end of the pattern, we can change. */
4238 if (skip_one_char (p1))
4215 { 4239 {
4216 case anychar:
4217 case charset_not:
4218 case charset:
4219 case exactn:
4220 DEBUG_PRINT1 (" End of pattern: fast loop.\n"); 4240 DEBUG_PRINT1 (" End of pattern: fast loop.\n");
4221 return 1; 4241 return 1;
4222 default:
4223 return 0;
4224 } 4242 }
4225 } 4243 break;
4226 4244
4227 else if ((re_opcode_t) *p2 == exactn 4245 case endline:
4228 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 4246 if (!bufp->newline_anchor)
4229 { 4247 break;
4230 register unsigned int c 4248 /* Fallthrough */
4231 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 4249 case exactn:
4232 4250 {
4233 if ((re_opcode_t) *p1 == exactn) 4251 register unsigned int c
4234 { 4252 = (re_opcode_t) *p2 == endline ? '\n'
4235 if (!(multibyte /* && (c != '\n') */ 4253 : RE_STRING_CHAR(p2 + 2, pend - p2 - 2);
4236 && BASE_LEADING_CODE_P (c)) 4254
4237 ? c != p1[2] 4255 if ((re_opcode_t) *p1 == exactn)
4238 : (STRING_CHAR (&p2[2], pend - &p2[2]) 4256 {
4239 != STRING_CHAR (&p1[2], pend - &p1[2]))) 4257 if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2))
4240 { 4258 {
4241 DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]); 4259 DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]);
4242 return 1; 4260 return 1;
4243 } 4261 }
4244 } 4262 }
4245 4263
4246 else if ((re_opcode_t) *p1 == charset 4264 else if ((re_opcode_t) *p1 == charset
4247 || (re_opcode_t) *p1 == charset_not) 4265 || (re_opcode_t) *p1 == charset_not)
4248 { 4266 {
4249 int not = (re_opcode_t) *p1 == charset_not; 4267 int not = (re_opcode_t) *p1 == charset_not;
4250 4268
4251 if (multibyte /* && (c != '\n') */ 4269 /* Test if C is listed in charset (or charset_not)
4252 && BASE_LEADING_CODE_P (c)) 4270 at `p1'. */
4253 c = STRING_CHAR (&p2[2], pend - &p2[2]); 4271 if (SINGLE_BYTE_CHAR_P (c))
4254 4272 {
4255 /* Test if C is listed in charset (or charset_not) 4273 if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
4256 at `p1'. */ 4274 && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4257 if (SINGLE_BYTE_CHAR_P (c)) 4275 not = !not;
4258 { 4276 }
4259 if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH 4277 else if (CHARSET_RANGE_TABLE_EXISTS_P (p1))
4260 && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 4278 CHARSET_LOOKUP_RANGE_TABLE (not, c, p1);
4261 not = !not; 4279
4262 } 4280 /* `not' is equal to 1 if c would match, which means
4263 else if (CHARSET_RANGE_TABLE_EXISTS_P (p1)) 4281 that we can't change to pop_failure_jump. */
4264 CHARSET_LOOKUP_RANGE_TABLE (not, c, p1); 4282 if (!not)
4265 4283 {
4266 /* `not' is equal to 1 if c would match, which means 4284 DEBUG_PRINT1 (" No match => fast loop.\n");
4267 that we can't change to pop_failure_jump. */ 4285 return 1;
4268 if (!not) 4286 }
4269 { 4287 }
4270 DEBUG_PRINT1 (" No match => fast loop.\n"); 4288 else if ((re_opcode_t) *p1 == anychar
4271 return 1; 4289 && c == '\n')
4272 } 4290 {
4273 } 4291 DEBUG_PRINT1 (" . != \\n => fast loop.\n");
4274 else if ((re_opcode_t) *p1 == anychar 4292 return 1;
4275 && c == '\n') 4293 }
4276 { 4294 }
4277 DEBUG_PRINT1 (" . != \\n => fast loop.\n"); 4295 break;
4278 return 1; 4296
4279 } 4297 case charset:
4280 } 4298 case charset_not:
4281 else if ((re_opcode_t) *p2 == charset 4299 {
4282 || (re_opcode_t) *p2 == charset_not) 4300 if ((re_opcode_t) *p1 == exactn)
4283 { 4301 /* Reuse the code above. */
4284 if ((re_opcode_t) *p1 == exactn) 4302 return mutually_exclusive_p (bufp, p2, p1);
4285 /* Reuse the code above. */
4286 return mutually_exclusive_p (bufp, p2, p1);
4287 4303
4288 4304
4289 /* It is hard to list up all the character in charset 4305 /* It is hard to list up all the character in charset
4290 P2 if it includes multibyte character. Give up in 4306 P2 if it includes multibyte character. Give up in
4291 such case. */ 4307 such case. */
4330 if (! (p2[2 + idx] == 0 4346 if (! (p2[2 + idx] == 0
4331 || (idx < CHARSET_BITMAP_SIZE (p1) 4347 || (idx < CHARSET_BITMAP_SIZE (p1)
4332 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0)))) 4348 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
4333 break; 4349 break;
4334 4350
4335 if (idx == p2[1]) 4351 if (idx == p2[1])
4336 { 4352 {
4337 DEBUG_PRINT1 (" No match => fast loop.\n"); 4353 DEBUG_PRINT1 (" No match => fast loop.\n");
4338 return 1; 4354 return 1;
4339 } 4355 }
4340 } 4356 }
4341 } 4357 }
4358 }
4359
4360 #ifdef emacs
4361 case wordend:
4362 case notsyntaxspec:
4363 return ((re_opcode_t) *p1 == syntaxspec
4364 && p1[1] == (op2 == wordend ? Sword : p2[1]));
4365
4366 case wordbeg:
4367 case syntaxspec:
4368 return ((re_opcode_t) *p1 == notsyntaxspec
4369 && p1[1] == (op2 == wordend ? Sword : p2[1]));
4370
4371 case wordbound:
4372 return (((re_opcode_t) *p1 == notsyntaxspec
4373 || (re_opcode_t) *p1 == syntaxspec)
4374 && p1[1] == Sword);
4375
4376 case categoryspec:
4377 return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
4378 case notcategoryspec:
4379 return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
4380 #endif /* emacs */
4381
4382 default:
4383 ;
4342 } 4384 }
4343 4385
4344 /* Safe default. */ 4386 /* Safe default. */
4345 return 0; 4387 return 0;
4346 } 4388 }
5155 mcnt, p + mcnt); 5197 mcnt, p + mcnt);
5156 5198
5157 PUSH_FAILURE_POINT (p - 3, NULL); 5199 PUSH_FAILURE_POINT (p - 3, NULL);
5158 break; 5200 break;
5159 5201
5160 case on_failure_jump_exclusive: 5202
5161 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5203 /* Simple loop detecting on_failure_jump: just check on the
5162 DEBUG_PRINT3 ("EXECUTING on_failure_jump_exclusive %d (to %p):\n", 5204 failure stack if the same spot was already hit earlier. */
5163 mcnt, p + mcnt);
5164
5165 if (! FAIL_STACK_EMPTY ()
5166 && FAILURE_PAT (TOP_FAILURE_HANDLE ()) == (p - 3)
5167 && fail_stack.avail == fail_stack.frame)
5168 {
5169 /* We are trying to push failure F2 onto the stack but there
5170 is already a failure F1 pushed from the same instruction.
5171 Between F1 and now, something has matched (else this is an
5172 improper use of on_failure_jump_exclusive), so that we know
5173 that the fail-destination of F1 cannot match, hence we can
5174 pop F1 before pushing F2. Instead of doing this pop/push,
5175 we manually turn F1 into F2.
5176 `fail_stack.avail == fail_stack.frame' makes sure
5177 that popping F1 doesn't involve registers, else
5178 this optimization cannot be done so trivially. */
5179 assert (FAILURE_STR (TOP_FAILURE_HANDLE ()) != d);
5180 FAILURE_STR (TOP_FAILURE_HANDLE ()) = d;
5181 }
5182 else
5183 PUSH_FAILURE_POINT (p - 3, d);
5184 break;
5185
5186 case on_failure_jump_loop: 5205 case on_failure_jump_loop:
5187 on_failure: 5206 on_failure:
5188 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5207 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5189 DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n", 5208 DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
5190 mcnt, p + mcnt); 5209 mcnt, p + mcnt);
5213 mcnt, p + mcnt); 5232 mcnt, p + mcnt);
5214 5233
5215 PUSH_FAILURE_POINT (p -3, d); 5234 PUSH_FAILURE_POINT (p -3, d);
5216 break; 5235 break;
5217 5236
5218 /* This operation is used for greedy * and +. 5237 /* This operation is used for greedy *.
5219 Compare the beginning of the repeat with what in the 5238 Compare the beginning of the repeat with what in the
5220 pattern follows its end. If we can establish that there 5239 pattern follows its end. If we can establish that there
5221 is nothing that they would both match, i.e., that we 5240 is nothing that they would both match, i.e., that we
5222 would have to backtrack because of (as in, e.g., `a*a') 5241 would have to backtrack because of (as in, e.g., `a*a')
5223 then we can use a non-backtracking loop based on 5242 then we can use a non-backtracking loop based on
5224 on_failure_jump_exclusive instead of on_failure_jump_loop. */ 5243 on_failure_keep_string_jump instead of on_failure_jump. */
5225 case on_failure_jump_smart: 5244 case on_failure_jump_smart:
5226 QUIT; 5245 QUIT;
5227 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5246 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5228 DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n", 5247 DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
5229 mcnt, p + mcnt); 5248 mcnt, p + mcnt);
5232 unsigned char *p2 = p + mcnt; /* Destination of the jump. */ 5251 unsigned char *p2 = p + mcnt; /* Destination of the jump. */
5233 5252
5234 p -= 3; /* Reset so that we will re-execute the 5253 p -= 3; /* Reset so that we will re-execute the
5235 instruction once it's been changed. */ 5254 instruction once it's been changed. */
5236 5255
5256 EXTRACT_NUMBER (mcnt, p2 - 2);
5257
5258 /* Ensure this is a indeed the trivial kind of loop
5259 we are expecting. */
5260 assert (skip_one_char (p1) == p2 - 3);
5261 assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
5237 DEBUG_STATEMENT (debug += 2); 5262 DEBUG_STATEMENT (debug += 2);
5238 if (mutually_exclusive_p (bufp, p1, p2)) 5263 if (mutually_exclusive_p (bufp, p1, p2))
5239 { 5264 {
5240 /* Use a fast `on_failure_keep_string_jump' loop. */ 5265 /* Use a fast `on_failure_keep_string_jump' loop. */
5241 *p = (unsigned char) on_failure_jump_exclusive; 5266 DEBUG_PRINT1 (" smart exclusive => fast loop.\n");
5242 /* STORE_NUMBER (p2 - 2, mcnt + 3); */ 5267 *p = (unsigned char) on_failure_keep_string_jump;
5268 STORE_NUMBER (p2 - 2, mcnt + 3);
5243 } 5269 }
5244 else 5270 else
5245 { 5271 {
5246 /* Default to a safe `on_failure_jump' loop. */ 5272 /* Default to a safe `on_failure_jump' loop. */
5247 DEBUG_PRINT1 (" smart default => slow loop.\n"); 5273 DEBUG_PRINT1 (" smart default => slow loop.\n");
5248 *p = (unsigned char) on_failure_jump_loop; 5274 *p = (unsigned char) on_failure_jump;
5249 } 5275 }
5250 DEBUG_STATEMENT (debug -= 2); 5276 DEBUG_STATEMENT (debug -= 2);
5251 } 5277 }
5252 break; 5278 break;
5253 5279
5598 { 5624 {
5599 case on_failure_keep_string_jump: 5625 case on_failure_keep_string_jump:
5600 assert (str == NULL); 5626 assert (str == NULL);
5601 goto continue_failure_jump; 5627 goto continue_failure_jump;
5602 5628
5603 case on_failure_jump_exclusive:
5604 /* If something has matched, the alternative will not match,
5605 so we might as well keep popping right away. */
5606 if (0 /* d != str && d != string2 */) /* Don't bother. -sm */
5607 /* (d == string2 && str == end1) => (d =~ str) */
5608 goto fail;
5609 /* Fallthrough */
5610
5611 case on_failure_jump_loop: 5629 case on_failure_jump_loop:
5612 case on_failure_jump: 5630 case on_failure_jump:
5613 case succeed_n: 5631 case succeed_n:
5614 d = str; 5632 d = str;
5615 continue_failure_jump: 5633 continue_failure_jump: