Mercurial > emacs
comparison src/regex.c @ 31312:e6b19a60e035
* regex.h (RE_NO_NEWLINE_ANCHOR): New syntax flag.
(struct re_pattern_buffer): Remove newline_anchor.
* regex.c: Keep namespace clean for GNU libc by renaming <fun>
to __<fun> and using `weak_alias (__<fun>, <fun>)'.
(re_max_failures, fail_stack): Use size_t rather than unsigned.
(regex_compile): For ^ and $, choose between buffer and line (beg|end)
depending on the new RE_NO_NEWLINE_ANCHOR syntax flag.
(print_compiled_pattern, re_search_2, mutually_exclusive_p)
(re_match_2_internal, re_compile_pattern, re_comp, regcomp):
Get rid of references to newline_anchor.
(regcomp): Allocate and precompute a fastmap.
author | Stefan Monnier <monnier@iro.umontreal.ca> |
---|---|
date | Thu, 31 Aug 2000 17:19:15 +0000 |
parents | 34c25566aab3 |
children | b8513fe83893 |
comparison
equal
deleted
inserted
replaced
31311:a04fdd7a9207 | 31312:e6b19a60e035 |
---|---|
23 - structure the opcode space into opcode+flag. | 23 - structure the opcode space into opcode+flag. |
24 - merge with glibc's regex.[ch]. | 24 - merge with glibc's regex.[ch]. |
25 - replace succeed_n + jump_n with a combined operation so that the counter | 25 - replace succeed_n + jump_n with a combined operation so that the counter |
26 can simply be decremented when popping the failure_point without having | 26 can simply be decremented when popping the failure_point without having |
27 to stack up failure_count entries. | 27 to stack up failure_count entries. |
28 - get rid of `newline_anchor'. | |
29 */ | 28 */ |
30 | 29 |
31 /* AIX requires this to be the first thing in the file. */ | 30 /* AIX requires this to be the first thing in the file. */ |
32 #if defined _AIX && !defined REGEX_MALLOC | 31 #if defined _AIX && !defined REGEX_MALLOC |
33 #pragma alloca | 32 #pragma alloca |
43 #if defined STDC_HEADERS && !defined emacs | 42 #if defined STDC_HEADERS && !defined emacs |
44 # include <stddef.h> | 43 # include <stddef.h> |
45 #else | 44 #else |
46 /* We need this for `regex.h', and perhaps for the Emacs include files. */ | 45 /* We need this for `regex.h', and perhaps for the Emacs include files. */ |
47 # include <sys/types.h> | 46 # include <sys/types.h> |
47 #endif | |
48 | |
49 #ifdef _LIBC | |
50 /* We have to keep the namespace clean. */ | |
51 # define regfree(preg) __regfree (preg) | |
52 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) | |
53 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) | |
54 # define regerror(errcode, preg, errbuf, errbuf_size) \ | |
55 __regerror(errcode, preg, errbuf, errbuf_size) | |
56 # define re_set_registers(bu, re, nu, st, en) \ | |
57 __re_set_registers (bu, re, nu, st, en) | |
58 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ | |
59 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) | |
60 # define re_match(bufp, string, size, pos, regs) \ | |
61 __re_match (bufp, string, size, pos, regs) | |
62 # define re_search(bufp, string, size, startpos, range, regs) \ | |
63 __re_search (bufp, string, size, startpos, range, regs) | |
64 # define re_compile_pattern(pattern, length, bufp) \ | |
65 __re_compile_pattern (pattern, length, bufp) | |
66 # define re_set_syntax(syntax) __re_set_syntax (syntax) | |
67 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ | |
68 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) | |
69 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) | |
70 | |
71 # define WEAK_ALIAS(a,b) weak_alias (a, b) | |
72 | |
73 /* We are also using some library internals. */ | |
74 # include <locale/localeinfo.h> | |
75 # include <locale/elem-hash.h> | |
76 # include <langinfo.h> | |
77 #else | |
78 # define WEAK_ALIAS(a,b) | |
48 #endif | 79 #endif |
49 | 80 |
50 /* This is for other GNU distributions with internationalized messages. */ | 81 /* This is for other GNU distributions with internationalized messages. */ |
51 #if HAVE_LIBINTL_H || defined _LIBC | 82 #if HAVE_LIBINTL_H || defined _LIBC |
52 # include <libintl.h> | 83 # include <libintl.h> |
1106 } | 1137 } |
1107 | 1138 |
1108 printf ("re_nsub: %d\t", bufp->re_nsub); | 1139 printf ("re_nsub: %d\t", bufp->re_nsub); |
1109 printf ("regs_alloc: %d\t", bufp->regs_allocated); | 1140 printf ("regs_alloc: %d\t", bufp->regs_allocated); |
1110 printf ("can_be_null: %d\t", bufp->can_be_null); | 1141 printf ("can_be_null: %d\t", bufp->can_be_null); |
1111 printf ("newline_anchor: %d\n", bufp->newline_anchor); | |
1112 printf ("no_sub: %d\t", bufp->no_sub); | 1142 printf ("no_sub: %d\t", bufp->no_sub); |
1113 printf ("not_bol: %d\t", bufp->not_bol); | 1143 printf ("not_bol: %d\t", bufp->not_bol); |
1114 printf ("not_eol: %d\t", bufp->not_eol); | 1144 printf ("not_eol: %d\t", bufp->not_eol); |
1115 printf ("syntax: %lx\n", bufp->syntax); | 1145 printf ("syntax: %lx\n", bufp->syntax); |
1116 fflush (stdout); | 1146 fflush (stdout); |
1182 reg_syntax_t ret = re_syntax_options; | 1212 reg_syntax_t ret = re_syntax_options; |
1183 | 1213 |
1184 re_syntax_options = syntax; | 1214 re_syntax_options = syntax; |
1185 return ret; | 1215 return ret; |
1186 } | 1216 } |
1217 WEAK_ALIAS (__re_set_syntax, re_set_syntax) | |
1187 | 1218 |
1188 /* This table gives an error message for each of the error codes listed | 1219 /* This table gives an error message for each of the error codes listed |
1189 in regex.h. Obviously the order here has to be same as there. | 1220 in regex.h. Obviously the order here has to be same as there. |
1190 POSIX doesn't require that we do anything for REG_NOERROR, | 1221 POSIX doesn't require that we do anything for REG_NOERROR, |
1191 but why not be nice? */ | 1222 but why not be nice? */ |
1262 #endif | 1293 #endif |
1263 | 1294 |
1264 /* Roughly the maximum number of failure points on the stack. Would be | 1295 /* Roughly the maximum number of failure points on the stack. Would be |
1265 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. | 1296 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. |
1266 This is a variable only so users of regex can assign to it; we never | 1297 This is a variable only so users of regex can assign to it; we never |
1267 change it ourselves. */ | 1298 change it ourselves. */ |
1268 #if defined MATCH_MAY_ALLOCATE | 1299 # if defined MATCH_MAY_ALLOCATE |
1269 /* Note that 4400 is enough to cause a crash on Alpha OSF/1, | 1300 /* Note that 4400 was enough to cause a crash on Alpha OSF/1, |
1270 whose default stack limit is 2mb. In order for a larger | 1301 whose default stack limit is 2mb. In order for a larger |
1271 value to work reliably, you have to try to make it accord | 1302 value to work reliably, you have to try to make it accord |
1272 with the process stack limit. */ | 1303 with the process stack limit. */ |
1273 int re_max_failures = 40000; | 1304 size_t re_max_failures = 40000; |
1274 #else | 1305 # else |
1275 int re_max_failures = 4000; | 1306 size_t re_max_failures = 4000; |
1276 #endif | 1307 # endif |
1277 | 1308 |
1278 union fail_stack_elt | 1309 union fail_stack_elt |
1279 { | 1310 { |
1280 const unsigned char *pointer; | 1311 const unsigned char *pointer; |
1281 unsigned int integer; | 1312 /* This should be the biggest `int' that's no bigger than a pointer. */ |
1313 long integer; | |
1282 }; | 1314 }; |
1283 | 1315 |
1284 typedef union fail_stack_elt fail_stack_elt_t; | 1316 typedef union fail_stack_elt fail_stack_elt_t; |
1285 | 1317 |
1286 typedef struct | 1318 typedef struct |
1287 { | 1319 { |
1288 fail_stack_elt_t *stack; | 1320 fail_stack_elt_t *stack; |
1289 unsigned size; | 1321 size_t size; |
1290 unsigned avail; /* Offset of next open position. */ | 1322 size_t avail; /* Offset of next open position. */ |
1291 unsigned frame; /* Offset of the cur constructed frame. */ | 1323 size_t frame; /* Offset of the cur constructed frame. */ |
1292 } fail_stack_type; | 1324 } fail_stack_type; |
1293 | 1325 |
1294 #define PATTERN_STACK_EMPTY() (fail_stack.avail == 0) | 1326 #define PATTERN_STACK_EMPTY() (fail_stack.avail == 0) |
1295 #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) | 1327 #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) |
1296 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) | 1328 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) |
1961 `used' is set to the length of the compiled pattern; | 1993 `used' is set to the length of the compiled pattern; |
1962 `fastmap_accurate' is zero; | 1994 `fastmap_accurate' is zero; |
1963 `re_nsub' is the number of subexpressions in PATTERN; | 1995 `re_nsub' is the number of subexpressions in PATTERN; |
1964 `not_bol' and `not_eol' are zero; | 1996 `not_bol' and `not_eol' are zero; |
1965 | 1997 |
1966 The `fastmap' and `newline_anchor' fields are neither | 1998 The `fastmap' field is neither examined nor set. */ |
1967 examined nor set. */ | |
1968 | 1999 |
1969 /* Insert the `jump' from the end of last alternative to "here". | 2000 /* Insert the `jump' from the end of last alternative to "here". |
1970 The space for the jump has already been allocated. */ | 2001 The space for the jump has already been allocated. */ |
1971 #define FIXUP_ALT_JUMP() \ | 2002 #define FIXUP_ALT_JUMP() \ |
1972 do { \ | 2003 do { \ |
2124 p == pattern + 1 | 2155 p == pattern + 1 |
2125 /* If context independent, it's an operator. */ | 2156 /* If context independent, it's an operator. */ |
2126 || syntax & RE_CONTEXT_INDEP_ANCHORS | 2157 || syntax & RE_CONTEXT_INDEP_ANCHORS |
2127 /* Otherwise, depends on what's come before. */ | 2158 /* Otherwise, depends on what's come before. */ |
2128 || at_begline_loc_p (pattern, p, syntax)) | 2159 || at_begline_loc_p (pattern, p, syntax)) |
2129 BUF_PUSH (begline); | 2160 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline); |
2130 else | 2161 else |
2131 goto normal_char; | 2162 goto normal_char; |
2132 } | 2163 } |
2133 break; | 2164 break; |
2134 | 2165 |
2139 p == pend | 2170 p == pend |
2140 /* If context independent, it's an operator. */ | 2171 /* If context independent, it's an operator. */ |
2141 || syntax & RE_CONTEXT_INDEP_ANCHORS | 2172 || syntax & RE_CONTEXT_INDEP_ANCHORS |
2142 /* Otherwise, depends on what's next. */ | 2173 /* Otherwise, depends on what's next. */ |
2143 || at_endline_loc_p (p, pend, syntax)) | 2174 || at_endline_loc_p (p, pend, syntax)) |
2144 BUF_PUSH (endline); | 2175 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline); |
2145 else | 2176 else |
2146 goto normal_char; | 2177 goto normal_char; |
2147 } | 2178 } |
2148 break; | 2179 break; |
2149 | 2180 |
3397 worklist. | 3428 worklist. |
3398 We guarantee termination by ignoring backward jumps (more or less), | 3429 We guarantee termination by ignoring backward jumps (more or less), |
3399 so that `p' is monotonically increasing. More to the point, we | 3430 so that `p' is monotonically increasing. More to the point, we |
3400 never set `p' (or push) anything `<= p1'. */ | 3431 never set `p' (or push) anything `<= p1'. */ |
3401 | 3432 |
3402 /* If can_be_null is set, then the fastmap will not be used anyway. */ | |
3403 while (1) | 3433 while (1) |
3404 { | 3434 { |
3405 /* `p1' is used as a marker of how far back a `on_failure_jump' | 3435 /* `p1' is used as a marker of how far back a `on_failure_jump' |
3406 can go without being ignored. It is normally equal to `p' | 3436 can go without being ignored. It is normally equal to `p' |
3407 (which prevents any backward `on_failure_jump') except right | 3437 (which prevents any backward `on_failure_jump') except right |
3687 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ | 3717 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ |
3688 bufp->fastmap_accurate = 1; /* It will be when we're done. */ | 3718 bufp->fastmap_accurate = 1; /* It will be when we're done. */ |
3689 | 3719 |
3690 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, | 3720 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, |
3691 fastmap, RE_MULTIBYTE_P (bufp)); | 3721 fastmap, RE_MULTIBYTE_P (bufp)); |
3722 bufp->can_be_null = (analysis != 0); | |
3692 if (analysis < -1) | 3723 if (analysis < -1) |
3693 return analysis; | 3724 return analysis; |
3694 bufp->can_be_null = (analysis != 0); | |
3695 return 0; | 3725 return 0; |
3696 } /* re_compile_fastmap */ | 3726 } /* re_compile_fastmap */ |
3697 | 3727 |
3698 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and | 3728 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and |
3699 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use | 3729 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use |
3727 bufp->regs_allocated = REGS_UNALLOCATED; | 3757 bufp->regs_allocated = REGS_UNALLOCATED; |
3728 regs->num_regs = 0; | 3758 regs->num_regs = 0; |
3729 regs->start = regs->end = (regoff_t *) 0; | 3759 regs->start = regs->end = (regoff_t *) 0; |
3730 } | 3760 } |
3731 } | 3761 } |
3762 WEAK_ALIAS (__re_set_registers, re_set_registers) | |
3732 | 3763 |
3733 /* Searching routines. */ | 3764 /* Searching routines. */ |
3734 | 3765 |
3735 /* Like re_search_2, below, but only one string is specified, and | 3766 /* Like re_search_2, below, but only one string is specified, and |
3736 doesn't let you say where to stop matching. */ | 3767 doesn't let you say where to stop matching. */ |
3743 struct re_registers *regs; | 3774 struct re_registers *regs; |
3744 { | 3775 { |
3745 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, | 3776 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, |
3746 regs, size); | 3777 regs, size); |
3747 } | 3778 } |
3779 WEAK_ALIAS (__re_search, re_search) | |
3748 | 3780 |
3749 /* End address of virtual concatenation of string. */ | 3781 /* End address of virtual concatenation of string. */ |
3750 #define STOP_ADDR_VSTRING(P) \ | 3782 #define STOP_ADDR_VSTRING(P) \ |
3751 (((P) >= size1 ? string2 + size2 : string1 + size1)) | 3783 (((P) >= size1 ? string2 + size2 : string1 + size1)) |
3752 | 3784 |
3790 re_char *string2 = (re_char*) str2; | 3822 re_char *string2 = (re_char*) str2; |
3791 register char *fastmap = bufp->fastmap; | 3823 register char *fastmap = bufp->fastmap; |
3792 register RE_TRANSLATE_TYPE translate = bufp->translate; | 3824 register RE_TRANSLATE_TYPE translate = bufp->translate; |
3793 int total_size = size1 + size2; | 3825 int total_size = size1 + size2; |
3794 int endpos = startpos + range; | 3826 int endpos = startpos + range; |
3795 int anchored_start = 0; | 3827 boolean anchored_start; |
3796 | 3828 |
3797 /* Nonzero if we have to concern multibyte character. */ | 3829 /* Nonzero if we have to concern multibyte character. */ |
3798 const boolean multibyte = RE_MULTIBYTE_P (bufp); | 3830 const boolean multibyte = RE_MULTIBYTE_P (bufp); |
3799 | 3831 |
3800 /* Check for out-of-range STARTPOS. */ | 3832 /* Check for out-of-range STARTPOS. */ |
3834 if (fastmap && !bufp->fastmap_accurate) | 3866 if (fastmap && !bufp->fastmap_accurate) |
3835 if (re_compile_fastmap (bufp) == -2) | 3867 if (re_compile_fastmap (bufp) == -2) |
3836 return -2; | 3868 return -2; |
3837 | 3869 |
3838 /* See whether the pattern is anchored. */ | 3870 /* See whether the pattern is anchored. */ |
3839 if (bufp->buffer[0] == begline) | 3871 anchored_start = (bufp->buffer[0] == begline); |
3840 anchored_start = 1; | |
3841 | 3872 |
3842 #ifdef emacs | 3873 #ifdef emacs |
3843 gl_state.object = re_match_object; | 3874 gl_state.object = re_match_object; |
3844 { | 3875 { |
3845 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); | 3876 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); |
3855 skip quickly past places we cannot match. | 3886 skip quickly past places we cannot match. |
3856 We don't bother to treat startpos == 0 specially | 3887 We don't bother to treat startpos == 0 specially |
3857 because that case doesn't repeat. */ | 3888 because that case doesn't repeat. */ |
3858 if (anchored_start && startpos > 0) | 3889 if (anchored_start && startpos > 0) |
3859 { | 3890 { |
3860 if (! (bufp->newline_anchor | 3891 if (! ((startpos <= size1 ? string1[startpos - 1] |
3861 && ((startpos <= size1 ? string1[startpos - 1] | 3892 : string2[startpos - size1 - 1]) |
3862 : string2[startpos - size1 - 1]) | 3893 == '\n')) |
3863 == '\n'))) | |
3864 goto advance; | 3894 goto advance; |
3865 } | 3895 } |
3866 | 3896 |
3867 /* If a fastmap is supplied, skip quickly over characters that | 3897 /* If a fastmap is supplied, skip quickly over characters that |
3868 cannot be the start of a match. If the pattern can match the | 3898 cannot be the start of a match. If the pattern can match the |
4007 } | 4037 } |
4008 } | 4038 } |
4009 } | 4039 } |
4010 return -1; | 4040 return -1; |
4011 } /* re_search_2 */ | 4041 } /* re_search_2 */ |
4042 WEAK_ALIAS (__re_search_2, re_search_2) | |
4012 | 4043 |
4013 /* Declarations and macros for re_match_2. */ | 4044 /* Declarations and macros for re_match_2. */ |
4014 | 4045 |
4015 static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, | 4046 static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, |
4016 register int len, | 4047 register int len, |
4211 return 1; | 4242 return 1; |
4212 } | 4243 } |
4213 break; | 4244 break; |
4214 | 4245 |
4215 case endline: | 4246 case endline: |
4216 if (!bufp->newline_anchor) | |
4217 break; | |
4218 /* Fallthrough */ | |
4219 case exactn: | 4247 case exactn: |
4220 { | 4248 { |
4221 register unsigned int c | 4249 register unsigned int c |
4222 = (re_opcode_t) *p2 == endline ? '\n' | 4250 = (re_opcode_t) *p2 == endline ? '\n' |
4223 : RE_STRING_CHAR(p2 + 2, pend - p2 - 2); | 4251 : RE_STRING_CHAR(p2 + 2, pend - p2 - 2); |
4375 # if defined C_ALLOCA && !defined REGEX_MALLOC | 4403 # if defined C_ALLOCA && !defined REGEX_MALLOC |
4376 alloca (0); | 4404 alloca (0); |
4377 # endif | 4405 # endif |
4378 return result; | 4406 return result; |
4379 } | 4407 } |
4408 WEAK_ALIAS (__re_match, re_match) | |
4380 #endif /* not emacs */ | 4409 #endif /* not emacs */ |
4381 | 4410 |
4382 #ifdef emacs | 4411 #ifdef emacs |
4383 /* In Emacs, this is the string or buffer in which we | 4412 /* In Emacs, this is the string or buffer in which we |
4384 are matching. It is used for looking up syntax properties. */ | 4413 are matching. It is used for looking up syntax properties. */ |
4422 #if defined C_ALLOCA && !defined REGEX_MALLOC | 4451 #if defined C_ALLOCA && !defined REGEX_MALLOC |
4423 alloca (0); | 4452 alloca (0); |
4424 #endif | 4453 #endif |
4425 return result; | 4454 return result; |
4426 } | 4455 } |
4456 WEAK_ALIAS (__re_match_2, re_match_2) | |
4427 | 4457 |
4428 /* This is a separate function so that we can force an alloca cleanup | 4458 /* This is a separate function so that we can force an alloca cleanup |
4429 afterwards. */ | 4459 afterwards. */ |
4430 static int | 4460 static int |
4431 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | 4461 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) |
5087 } | 5117 } |
5088 break; | 5118 break; |
5089 | 5119 |
5090 | 5120 |
5091 /* begline matches the empty string at the beginning of the string | 5121 /* begline matches the empty string at the beginning of the string |
5092 (unless `not_bol' is set in `bufp'), and, if | 5122 (unless `not_bol' is set in `bufp'), and after newlines. */ |
5093 `newline_anchor' is set, after newlines. */ | |
5094 case begline: | 5123 case begline: |
5095 DEBUG_PRINT1 ("EXECUTING begline.\n"); | 5124 DEBUG_PRINT1 ("EXECUTING begline.\n"); |
5096 | 5125 |
5097 if (AT_STRINGS_BEG (d)) | 5126 if (AT_STRINGS_BEG (d)) |
5098 { | 5127 { |
5100 } | 5129 } |
5101 else | 5130 else |
5102 { | 5131 { |
5103 unsigned char c; | 5132 unsigned char c; |
5104 GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2); | 5133 GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2); |
5105 if (c == '\n' && bufp->newline_anchor) | 5134 if (c == '\n') |
5106 break; | 5135 break; |
5107 } | 5136 } |
5108 /* In all other cases, we fail. */ | 5137 /* In all other cases, we fail. */ |
5109 goto fail; | 5138 goto fail; |
5110 | 5139 |
5118 if (!bufp->not_eol) break; | 5147 if (!bufp->not_eol) break; |
5119 } | 5148 } |
5120 else | 5149 else |
5121 { | 5150 { |
5122 PREFETCH_NOLIMIT (); | 5151 PREFETCH_NOLIMIT (); |
5123 if (*d == '\n' && bufp->newline_anchor) | 5152 if (*d == '\n') |
5124 break; | 5153 break; |
5125 } | 5154 } |
5126 goto fail; | 5155 goto fail; |
5127 | 5156 |
5128 | 5157 |
5643 /* And GNU code determines whether or not to get register information | 5672 /* And GNU code determines whether or not to get register information |
5644 by passing null for the REGS argument to re_match, etc., not by | 5673 by passing null for the REGS argument to re_match, etc., not by |
5645 setting no_sub. */ | 5674 setting no_sub. */ |
5646 bufp->no_sub = 0; | 5675 bufp->no_sub = 0; |
5647 | 5676 |
5648 /* Match anchors at newline. */ | |
5649 bufp->newline_anchor = 1; | |
5650 | |
5651 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp); | 5677 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp); |
5652 | 5678 |
5653 if (!ret) | 5679 if (!ret) |
5654 return NULL; | 5680 return NULL; |
5655 return gettext (re_error_msgid[(int) ret]); | 5681 return gettext (re_error_msgid[(int) ret]); |
5656 } | 5682 } |
5683 WEAK_ALIAS (__re_compile_pattern, re_compile_pattern) | |
5657 | 5684 |
5658 /* Entry points compatible with 4.2 BSD regex library. We don't define | 5685 /* Entry points compatible with 4.2 BSD regex library. We don't define |
5659 them unless specifically requested. */ | 5686 them unless specifically requested. */ |
5660 | 5687 |
5661 #if defined _REGEX_RE_COMP || defined _LIBC | 5688 #if defined _REGEX_RE_COMP || defined _LIBC |
5697 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); | 5724 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); |
5698 } | 5725 } |
5699 | 5726 |
5700 /* Since `re_exec' always passes NULL for the `regs' argument, we | 5727 /* Since `re_exec' always passes NULL for the `regs' argument, we |
5701 don't need to initialize the pattern buffer fields which affect it. */ | 5728 don't need to initialize the pattern buffer fields which affect it. */ |
5702 | |
5703 /* Match anchors at newlines. */ | |
5704 re_comp_buf.newline_anchor = 1; | |
5705 | 5729 |
5706 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); | 5730 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); |
5707 | 5731 |
5708 if (!ret) | 5732 if (!ret) |
5709 return NULL; | 5733 return NULL; |
5738 `buffer' to the compiled pattern; | 5762 `buffer' to the compiled pattern; |
5739 `used' to the length of the compiled pattern; | 5763 `used' to the length of the compiled pattern; |
5740 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the | 5764 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the |
5741 REG_EXTENDED bit in CFLAGS is set; otherwise, to | 5765 REG_EXTENDED bit in CFLAGS is set; otherwise, to |
5742 RE_SYNTAX_POSIX_BASIC; | 5766 RE_SYNTAX_POSIX_BASIC; |
5743 `newline_anchor' to REG_NEWLINE being set in CFLAGS; | 5767 `fastmap' to an allocated space for the fastmap; |
5744 `fastmap' and `fastmap_accurate' to zero; | 5768 `fastmap_accurate' to zero; |
5745 `re_nsub' to the number of subexpressions in PATTERN. | 5769 `re_nsub' to the number of subexpressions in PATTERN. |
5746 | 5770 |
5747 PATTERN is the address of the pattern string. | 5771 PATTERN is the address of the pattern string. |
5748 | 5772 |
5749 CFLAGS is a series of bits which affect compilation. | 5773 CFLAGS is a series of bits which affect compilation. |
5778 /* regex_compile will allocate the space for the compiled pattern. */ | 5802 /* regex_compile will allocate the space for the compiled pattern. */ |
5779 preg->buffer = 0; | 5803 preg->buffer = 0; |
5780 preg->allocated = 0; | 5804 preg->allocated = 0; |
5781 preg->used = 0; | 5805 preg->used = 0; |
5782 | 5806 |
5783 /* Don't bother to use a fastmap when searching. This simplifies the | 5807 /* Try to allocate space for the fastmap. */ |
5784 REG_NEWLINE case: if we used a fastmap, we'd have to put all the | 5808 preg->fastmap = (char *) malloc (1 << BYTEWIDTH); |
5785 characters after newlines into the fastmap. This way, we just try | |
5786 every character. */ | |
5787 preg->fastmap = 0; | |
5788 | 5809 |
5789 if (cflags & REG_ICASE) | 5810 if (cflags & REG_ICASE) |
5790 { | 5811 { |
5791 unsigned i; | 5812 unsigned i; |
5792 | 5813 |
5806 /* If REG_NEWLINE is set, newlines are treated differently. */ | 5827 /* If REG_NEWLINE is set, newlines are treated differently. */ |
5807 if (cflags & REG_NEWLINE) | 5828 if (cflags & REG_NEWLINE) |
5808 { /* REG_NEWLINE implies neither . nor [^...] match newline. */ | 5829 { /* REG_NEWLINE implies neither . nor [^...] match newline. */ |
5809 syntax &= ~RE_DOT_NEWLINE; | 5830 syntax &= ~RE_DOT_NEWLINE; |
5810 syntax |= RE_HAT_LISTS_NOT_NEWLINE; | 5831 syntax |= RE_HAT_LISTS_NOT_NEWLINE; |
5811 /* It also changes the matching behavior. */ | |
5812 preg->newline_anchor = 1; | |
5813 } | 5832 } |
5814 else | 5833 else |
5815 preg->newline_anchor = 0; | 5834 syntax |= RE_NO_NEWLINE_ANCHOR; |
5816 | 5835 |
5817 preg->no_sub = !!(cflags & REG_NOSUB); | 5836 preg->no_sub = !!(cflags & REG_NOSUB); |
5818 | 5837 |
5819 /* POSIX says a null character in the pattern terminates it, so we | 5838 /* POSIX says a null character in the pattern terminates it, so we |
5820 can use strlen here in compiling the pattern. */ | 5839 can use strlen here in compiling the pattern. */ |
5821 ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg); | 5840 ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg); |
5822 | 5841 |
5823 /* POSIX doesn't distinguish between an unmatched open-group and an | 5842 /* POSIX doesn't distinguish between an unmatched open-group and an |
5824 unmatched close-group: both are REG_EPAREN. */ | 5843 unmatched close-group: both are REG_EPAREN. */ |
5825 if (ret == REG_ERPAREN) ret = REG_EPAREN; | 5844 if (ret == REG_ERPAREN) |
5826 | 5845 ret = REG_EPAREN; |
5846 | |
5847 if (ret == REG_NOERROR && preg->fastmap) | |
5848 { /* Compute the fastmap now, since regexec cannot modify the pattern | |
5849 buffer. */ | |
5850 re_compile_fastmap (preg); | |
5851 if (preg->can_be_null) | |
5852 { /* The fastmap can't be used anyway. */ | |
5853 free (preg->fastmap); | |
5854 preg->fastmap = NULL; | |
5855 } | |
5856 } | |
5827 return (int) ret; | 5857 return (int) ret; |
5828 } | 5858 } |
5859 WEAK_ALIAS (__regcomp, regcomp) | |
5829 | 5860 |
5830 | 5861 |
5831 /* regexec searches for a given pattern, specified by PREG, in the | 5862 /* regexec searches for a given pattern, specified by PREG, in the |
5832 string STRING. | 5863 string STRING. |
5833 | 5864 |
5852 { | 5883 { |
5853 int ret; | 5884 int ret; |
5854 struct re_registers regs; | 5885 struct re_registers regs; |
5855 regex_t private_preg; | 5886 regex_t private_preg; |
5856 int len = strlen (string); | 5887 int len = strlen (string); |
5857 boolean want_reg_info = !preg->no_sub && nmatch > 0; | 5888 boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch; |
5858 | 5889 |
5859 private_preg = *preg; | 5890 private_preg = *preg; |
5860 | 5891 |
5861 private_preg.not_bol = !!(eflags & REG_NOTBOL); | 5892 private_preg.not_bol = !!(eflags & REG_NOTBOL); |
5862 private_preg.not_eol = !!(eflags & REG_NOTEOL); | 5893 private_preg.not_eol = !!(eflags & REG_NOTEOL); |
5873 if (regs.start == NULL) | 5904 if (regs.start == NULL) |
5874 return (int) REG_NOMATCH; | 5905 return (int) REG_NOMATCH; |
5875 regs.end = regs.start + nmatch; | 5906 regs.end = regs.start + nmatch; |
5876 } | 5907 } |
5877 | 5908 |
5909 /* Instead of using not_eol to implement REG_NOTEOL, we could simply | |
5910 pass (&private_preg, string, len + 1, 0, len, ...) pretending the string | |
5911 was a little bit longer but still only matching the real part. | |
5912 This works because the `endline' will check for a '\n' and will find a | |
5913 '\0', correctly deciding that this is not the end of a line. | |
5914 But it doesn't work out so nicely for REG_NOTBOL, since we don't have | |
5915 a convenient '\0' there. For all we know, the string could be preceded | |
5916 by '\n' which would throw things off. */ | |
5917 | |
5878 /* Perform the searching operation. */ | 5918 /* Perform the searching operation. */ |
5879 ret = re_search (&private_preg, string, len, | 5919 ret = re_search (&private_preg, string, len, |
5880 /* start: */ 0, /* range: */ len, | 5920 /* start: */ 0, /* range: */ len, |
5881 want_reg_info ? ®s : (struct re_registers *) 0); | 5921 want_reg_info ? ®s : (struct re_registers *) 0); |
5882 | 5922 |
5899 } | 5939 } |
5900 | 5940 |
5901 /* We want zero return to mean success, unlike `re_search'. */ | 5941 /* We want zero return to mean success, unlike `re_search'. */ |
5902 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; | 5942 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; |
5903 } | 5943 } |
5944 WEAK_ALIAS (__regexec, regexec) | |
5904 | 5945 |
5905 | 5946 |
5906 /* Returns a message corresponding to an error code, ERRCODE, returned | 5947 /* Returns a message corresponding to an error code, ERRCODE, returned |
5907 from either regcomp or regexec. We don't use PREG here. */ | 5948 from either regcomp or regexec. We don't use PREG here. */ |
5908 | 5949 |
5939 strcpy (errbuf, msg); | 5980 strcpy (errbuf, msg); |
5940 } | 5981 } |
5941 | 5982 |
5942 return msg_size; | 5983 return msg_size; |
5943 } | 5984 } |
5985 WEAK_ALIAS (__regerror, regerror) | |
5944 | 5986 |
5945 | 5987 |
5946 /* Free dynamically allocated space used by PREG. */ | 5988 /* Free dynamically allocated space used by PREG. */ |
5947 | 5989 |
5948 void | 5990 void |
5963 | 6005 |
5964 if (preg->translate != NULL) | 6006 if (preg->translate != NULL) |
5965 free (preg->translate); | 6007 free (preg->translate); |
5966 preg->translate = NULL; | 6008 preg->translate = NULL; |
5967 } | 6009 } |
6010 WEAK_ALIAS (__regfree, regfree) | |
5968 | 6011 |
5969 #endif /* not emacs */ | 6012 #endif /* not emacs */ |