comparison src/regex.c @ 31312:e6b19a60e035

* regex.h (RE_NO_NEWLINE_ANCHOR): New syntax flag. (struct re_pattern_buffer): Remove newline_anchor. * regex.c: Keep namespace clean for GNU libc by renaming <fun> to __<fun> and using `weak_alias (__<fun>, <fun>)'. (re_max_failures, fail_stack): Use size_t rather than unsigned. (regex_compile): For ^ and $, choose between buffer and line (beg|end) depending on the new RE_NO_NEWLINE_ANCHOR syntax flag. (print_compiled_pattern, re_search_2, mutually_exclusive_p) (re_match_2_internal, re_compile_pattern, re_comp, regcomp): Get rid of references to newline_anchor. (regcomp): Allocate and precompute a fastmap.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Thu, 31 Aug 2000 17:19:15 +0000
parents 34c25566aab3
children b8513fe83893
comparison
equal deleted inserted replaced
31311:a04fdd7a9207 31312:e6b19a60e035
23 - structure the opcode space into opcode+flag. 23 - structure the opcode space into opcode+flag.
24 - merge with glibc's regex.[ch]. 24 - merge with glibc's regex.[ch].
25 - replace succeed_n + jump_n with a combined operation so that the counter 25 - replace succeed_n + jump_n with a combined operation so that the counter
26 can simply be decremented when popping the failure_point without having 26 can simply be decremented when popping the failure_point without having
27 to stack up failure_count entries. 27 to stack up failure_count entries.
28 - get rid of `newline_anchor'.
29 */ 28 */
30 29
31 /* AIX requires this to be the first thing in the file. */ 30 /* AIX requires this to be the first thing in the file. */
32 #if defined _AIX && !defined REGEX_MALLOC 31 #if defined _AIX && !defined REGEX_MALLOC
33 #pragma alloca 32 #pragma alloca
43 #if defined STDC_HEADERS && !defined emacs 42 #if defined STDC_HEADERS && !defined emacs
44 # include <stddef.h> 43 # include <stddef.h>
45 #else 44 #else
46 /* We need this for `regex.h', and perhaps for the Emacs include files. */ 45 /* We need this for `regex.h', and perhaps for the Emacs include files. */
47 # include <sys/types.h> 46 # include <sys/types.h>
47 #endif
48
49 #ifdef _LIBC
50 /* We have to keep the namespace clean. */
51 # define regfree(preg) __regfree (preg)
52 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
53 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
54 # define regerror(errcode, preg, errbuf, errbuf_size) \
55 __regerror(errcode, preg, errbuf, errbuf_size)
56 # define re_set_registers(bu, re, nu, st, en) \
57 __re_set_registers (bu, re, nu, st, en)
58 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
59 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
60 # define re_match(bufp, string, size, pos, regs) \
61 __re_match (bufp, string, size, pos, regs)
62 # define re_search(bufp, string, size, startpos, range, regs) \
63 __re_search (bufp, string, size, startpos, range, regs)
64 # define re_compile_pattern(pattern, length, bufp) \
65 __re_compile_pattern (pattern, length, bufp)
66 # define re_set_syntax(syntax) __re_set_syntax (syntax)
67 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
68 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
69 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
70
71 # define WEAK_ALIAS(a,b) weak_alias (a, b)
72
73 /* We are also using some library internals. */
74 # include <locale/localeinfo.h>
75 # include <locale/elem-hash.h>
76 # include <langinfo.h>
77 #else
78 # define WEAK_ALIAS(a,b)
48 #endif 79 #endif
49 80
50 /* This is for other GNU distributions with internationalized messages. */ 81 /* This is for other GNU distributions with internationalized messages. */
51 #if HAVE_LIBINTL_H || defined _LIBC 82 #if HAVE_LIBINTL_H || defined _LIBC
52 # include <libintl.h> 83 # include <libintl.h>
1106 } 1137 }
1107 1138
1108 printf ("re_nsub: %d\t", bufp->re_nsub); 1139 printf ("re_nsub: %d\t", bufp->re_nsub);
1109 printf ("regs_alloc: %d\t", bufp->regs_allocated); 1140 printf ("regs_alloc: %d\t", bufp->regs_allocated);
1110 printf ("can_be_null: %d\t", bufp->can_be_null); 1141 printf ("can_be_null: %d\t", bufp->can_be_null);
1111 printf ("newline_anchor: %d\n", bufp->newline_anchor);
1112 printf ("no_sub: %d\t", bufp->no_sub); 1142 printf ("no_sub: %d\t", bufp->no_sub);
1113 printf ("not_bol: %d\t", bufp->not_bol); 1143 printf ("not_bol: %d\t", bufp->not_bol);
1114 printf ("not_eol: %d\t", bufp->not_eol); 1144 printf ("not_eol: %d\t", bufp->not_eol);
1115 printf ("syntax: %lx\n", bufp->syntax); 1145 printf ("syntax: %lx\n", bufp->syntax);
1116 fflush (stdout); 1146 fflush (stdout);
1182 reg_syntax_t ret = re_syntax_options; 1212 reg_syntax_t ret = re_syntax_options;
1183 1213
1184 re_syntax_options = syntax; 1214 re_syntax_options = syntax;
1185 return ret; 1215 return ret;
1186 } 1216 }
1217 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1187 1218
1188 /* This table gives an error message for each of the error codes listed 1219 /* This table gives an error message for each of the error codes listed
1189 in regex.h. Obviously the order here has to be same as there. 1220 in regex.h. Obviously the order here has to be same as there.
1190 POSIX doesn't require that we do anything for REG_NOERROR, 1221 POSIX doesn't require that we do anything for REG_NOERROR,
1191 but why not be nice? */ 1222 but why not be nice? */
1262 #endif 1293 #endif
1263 1294
1264 /* Roughly the maximum number of failure points on the stack. Would be 1295 /* Roughly the maximum number of failure points on the stack. Would be
1265 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. 1296 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1266 This is a variable only so users of regex can assign to it; we never 1297 This is a variable only so users of regex can assign to it; we never
1267 change it ourselves. */ 1298 change it ourselves. */
1268 #if defined MATCH_MAY_ALLOCATE 1299 # if defined MATCH_MAY_ALLOCATE
1269 /* Note that 4400 is enough to cause a crash on Alpha OSF/1, 1300 /* Note that 4400 was enough to cause a crash on Alpha OSF/1,
1270 whose default stack limit is 2mb. In order for a larger 1301 whose default stack limit is 2mb. In order for a larger
1271 value to work reliably, you have to try to make it accord 1302 value to work reliably, you have to try to make it accord
1272 with the process stack limit. */ 1303 with the process stack limit. */
1273 int re_max_failures = 40000; 1304 size_t re_max_failures = 40000;
1274 #else 1305 # else
1275 int re_max_failures = 4000; 1306 size_t re_max_failures = 4000;
1276 #endif 1307 # endif
1277 1308
1278 union fail_stack_elt 1309 union fail_stack_elt
1279 { 1310 {
1280 const unsigned char *pointer; 1311 const unsigned char *pointer;
1281 unsigned int integer; 1312 /* This should be the biggest `int' that's no bigger than a pointer. */
1313 long integer;
1282 }; 1314 };
1283 1315
1284 typedef union fail_stack_elt fail_stack_elt_t; 1316 typedef union fail_stack_elt fail_stack_elt_t;
1285 1317
1286 typedef struct 1318 typedef struct
1287 { 1319 {
1288 fail_stack_elt_t *stack; 1320 fail_stack_elt_t *stack;
1289 unsigned size; 1321 size_t size;
1290 unsigned avail; /* Offset of next open position. */ 1322 size_t avail; /* Offset of next open position. */
1291 unsigned frame; /* Offset of the cur constructed frame. */ 1323 size_t frame; /* Offset of the cur constructed frame. */
1292 } fail_stack_type; 1324 } fail_stack_type;
1293 1325
1294 #define PATTERN_STACK_EMPTY() (fail_stack.avail == 0) 1326 #define PATTERN_STACK_EMPTY() (fail_stack.avail == 0)
1295 #define FAIL_STACK_EMPTY() (fail_stack.frame == 0) 1327 #define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
1296 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) 1328 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1961 `used' is set to the length of the compiled pattern; 1993 `used' is set to the length of the compiled pattern;
1962 `fastmap_accurate' is zero; 1994 `fastmap_accurate' is zero;
1963 `re_nsub' is the number of subexpressions in PATTERN; 1995 `re_nsub' is the number of subexpressions in PATTERN;
1964 `not_bol' and `not_eol' are zero; 1996 `not_bol' and `not_eol' are zero;
1965 1997
1966 The `fastmap' and `newline_anchor' fields are neither 1998 The `fastmap' field is neither examined nor set. */
1967 examined nor set. */
1968 1999
1969 /* Insert the `jump' from the end of last alternative to "here". 2000 /* Insert the `jump' from the end of last alternative to "here".
1970 The space for the jump has already been allocated. */ 2001 The space for the jump has already been allocated. */
1971 #define FIXUP_ALT_JUMP() \ 2002 #define FIXUP_ALT_JUMP() \
1972 do { \ 2003 do { \
2124 p == pattern + 1 2155 p == pattern + 1
2125 /* If context independent, it's an operator. */ 2156 /* If context independent, it's an operator. */
2126 || syntax & RE_CONTEXT_INDEP_ANCHORS 2157 || syntax & RE_CONTEXT_INDEP_ANCHORS
2127 /* Otherwise, depends on what's come before. */ 2158 /* Otherwise, depends on what's come before. */
2128 || at_begline_loc_p (pattern, p, syntax)) 2159 || at_begline_loc_p (pattern, p, syntax))
2129 BUF_PUSH (begline); 2160 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
2130 else 2161 else
2131 goto normal_char; 2162 goto normal_char;
2132 } 2163 }
2133 break; 2164 break;
2134 2165
2139 p == pend 2170 p == pend
2140 /* If context independent, it's an operator. */ 2171 /* If context independent, it's an operator. */
2141 || syntax & RE_CONTEXT_INDEP_ANCHORS 2172 || syntax & RE_CONTEXT_INDEP_ANCHORS
2142 /* Otherwise, depends on what's next. */ 2173 /* Otherwise, depends on what's next. */
2143 || at_endline_loc_p (p, pend, syntax)) 2174 || at_endline_loc_p (p, pend, syntax))
2144 BUF_PUSH (endline); 2175 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
2145 else 2176 else
2146 goto normal_char; 2177 goto normal_char;
2147 } 2178 }
2148 break; 2179 break;
2149 2180
3397 worklist. 3428 worklist.
3398 We guarantee termination by ignoring backward jumps (more or less), 3429 We guarantee termination by ignoring backward jumps (more or less),
3399 so that `p' is monotonically increasing. More to the point, we 3430 so that `p' is monotonically increasing. More to the point, we
3400 never set `p' (or push) anything `<= p1'. */ 3431 never set `p' (or push) anything `<= p1'. */
3401 3432
3402 /* If can_be_null is set, then the fastmap will not be used anyway. */
3403 while (1) 3433 while (1)
3404 { 3434 {
3405 /* `p1' is used as a marker of how far back a `on_failure_jump' 3435 /* `p1' is used as a marker of how far back a `on_failure_jump'
3406 can go without being ignored. It is normally equal to `p' 3436 can go without being ignored. It is normally equal to `p'
3407 (which prevents any backward `on_failure_jump') except right 3437 (which prevents any backward `on_failure_jump') except right
3687 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 3717 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
3688 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 3718 bufp->fastmap_accurate = 1; /* It will be when we're done. */
3689 3719
3690 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, 3720 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
3691 fastmap, RE_MULTIBYTE_P (bufp)); 3721 fastmap, RE_MULTIBYTE_P (bufp));
3722 bufp->can_be_null = (analysis != 0);
3692 if (analysis < -1) 3723 if (analysis < -1)
3693 return analysis; 3724 return analysis;
3694 bufp->can_be_null = (analysis != 0);
3695 return 0; 3725 return 0;
3696 } /* re_compile_fastmap */ 3726 } /* re_compile_fastmap */
3697 3727
3698 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 3728 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3699 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use 3729 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
3727 bufp->regs_allocated = REGS_UNALLOCATED; 3757 bufp->regs_allocated = REGS_UNALLOCATED;
3728 regs->num_regs = 0; 3758 regs->num_regs = 0;
3729 regs->start = regs->end = (regoff_t *) 0; 3759 regs->start = regs->end = (regoff_t *) 0;
3730 } 3760 }
3731 } 3761 }
3762 WEAK_ALIAS (__re_set_registers, re_set_registers)
3732 3763
3733 /* Searching routines. */ 3764 /* Searching routines. */
3734 3765
3735 /* Like re_search_2, below, but only one string is specified, and 3766 /* Like re_search_2, below, but only one string is specified, and
3736 doesn't let you say where to stop matching. */ 3767 doesn't let you say where to stop matching. */
3743 struct re_registers *regs; 3774 struct re_registers *regs;
3744 { 3775 {
3745 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 3776 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3746 regs, size); 3777 regs, size);
3747 } 3778 }
3779 WEAK_ALIAS (__re_search, re_search)
3748 3780
3749 /* End address of virtual concatenation of string. */ 3781 /* End address of virtual concatenation of string. */
3750 #define STOP_ADDR_VSTRING(P) \ 3782 #define STOP_ADDR_VSTRING(P) \
3751 (((P) >= size1 ? string2 + size2 : string1 + size1)) 3783 (((P) >= size1 ? string2 + size2 : string1 + size1))
3752 3784
3790 re_char *string2 = (re_char*) str2; 3822 re_char *string2 = (re_char*) str2;
3791 register char *fastmap = bufp->fastmap; 3823 register char *fastmap = bufp->fastmap;
3792 register RE_TRANSLATE_TYPE translate = bufp->translate; 3824 register RE_TRANSLATE_TYPE translate = bufp->translate;
3793 int total_size = size1 + size2; 3825 int total_size = size1 + size2;
3794 int endpos = startpos + range; 3826 int endpos = startpos + range;
3795 int anchored_start = 0; 3827 boolean anchored_start;
3796 3828
3797 /* Nonzero if we have to concern multibyte character. */ 3829 /* Nonzero if we have to concern multibyte character. */
3798 const boolean multibyte = RE_MULTIBYTE_P (bufp); 3830 const boolean multibyte = RE_MULTIBYTE_P (bufp);
3799 3831
3800 /* Check for out-of-range STARTPOS. */ 3832 /* Check for out-of-range STARTPOS. */
3834 if (fastmap && !bufp->fastmap_accurate) 3866 if (fastmap && !bufp->fastmap_accurate)
3835 if (re_compile_fastmap (bufp) == -2) 3867 if (re_compile_fastmap (bufp) == -2)
3836 return -2; 3868 return -2;
3837 3869
3838 /* See whether the pattern is anchored. */ 3870 /* See whether the pattern is anchored. */
3839 if (bufp->buffer[0] == begline) 3871 anchored_start = (bufp->buffer[0] == begline);
3840 anchored_start = 1;
3841 3872
3842 #ifdef emacs 3873 #ifdef emacs
3843 gl_state.object = re_match_object; 3874 gl_state.object = re_match_object;
3844 { 3875 {
3845 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); 3876 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
3855 skip quickly past places we cannot match. 3886 skip quickly past places we cannot match.
3856 We don't bother to treat startpos == 0 specially 3887 We don't bother to treat startpos == 0 specially
3857 because that case doesn't repeat. */ 3888 because that case doesn't repeat. */
3858 if (anchored_start && startpos > 0) 3889 if (anchored_start && startpos > 0)
3859 { 3890 {
3860 if (! (bufp->newline_anchor 3891 if (! ((startpos <= size1 ? string1[startpos - 1]
3861 && ((startpos <= size1 ? string1[startpos - 1] 3892 : string2[startpos - size1 - 1])
3862 : string2[startpos - size1 - 1]) 3893 == '\n'))
3863 == '\n')))
3864 goto advance; 3894 goto advance;
3865 } 3895 }
3866 3896
3867 /* If a fastmap is supplied, skip quickly over characters that 3897 /* If a fastmap is supplied, skip quickly over characters that
3868 cannot be the start of a match. If the pattern can match the 3898 cannot be the start of a match. If the pattern can match the
4007 } 4037 }
4008 } 4038 }
4009 } 4039 }
4010 return -1; 4040 return -1;
4011 } /* re_search_2 */ 4041 } /* re_search_2 */
4042 WEAK_ALIAS (__re_search_2, re_search_2)
4012 4043
4013 /* Declarations and macros for re_match_2. */ 4044 /* Declarations and macros for re_match_2. */
4014 4045
4015 static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, 4046 static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4016 register int len, 4047 register int len,
4211 return 1; 4242 return 1;
4212 } 4243 }
4213 break; 4244 break;
4214 4245
4215 case endline: 4246 case endline:
4216 if (!bufp->newline_anchor)
4217 break;
4218 /* Fallthrough */
4219 case exactn: 4247 case exactn:
4220 { 4248 {
4221 register unsigned int c 4249 register unsigned int c
4222 = (re_opcode_t) *p2 == endline ? '\n' 4250 = (re_opcode_t) *p2 == endline ? '\n'
4223 : RE_STRING_CHAR(p2 + 2, pend - p2 - 2); 4251 : RE_STRING_CHAR(p2 + 2, pend - p2 - 2);
4375 # if defined C_ALLOCA && !defined REGEX_MALLOC 4403 # if defined C_ALLOCA && !defined REGEX_MALLOC
4376 alloca (0); 4404 alloca (0);
4377 # endif 4405 # endif
4378 return result; 4406 return result;
4379 } 4407 }
4408 WEAK_ALIAS (__re_match, re_match)
4380 #endif /* not emacs */ 4409 #endif /* not emacs */
4381 4410
4382 #ifdef emacs 4411 #ifdef emacs
4383 /* In Emacs, this is the string or buffer in which we 4412 /* In Emacs, this is the string or buffer in which we
4384 are matching. It is used for looking up syntax properties. */ 4413 are matching. It is used for looking up syntax properties. */
4422 #if defined C_ALLOCA && !defined REGEX_MALLOC 4451 #if defined C_ALLOCA && !defined REGEX_MALLOC
4423 alloca (0); 4452 alloca (0);
4424 #endif 4453 #endif
4425 return result; 4454 return result;
4426 } 4455 }
4456 WEAK_ALIAS (__re_match_2, re_match_2)
4427 4457
4428 /* This is a separate function so that we can force an alloca cleanup 4458 /* This is a separate function so that we can force an alloca cleanup
4429 afterwards. */ 4459 afterwards. */
4430 static int 4460 static int
4431 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) 4461 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5087 } 5117 }
5088 break; 5118 break;
5089 5119
5090 5120
5091 /* begline matches the empty string at the beginning of the string 5121 /* begline matches the empty string at the beginning of the string
5092 (unless `not_bol' is set in `bufp'), and, if 5122 (unless `not_bol' is set in `bufp'), and after newlines. */
5093 `newline_anchor' is set, after newlines. */
5094 case begline: 5123 case begline:
5095 DEBUG_PRINT1 ("EXECUTING begline.\n"); 5124 DEBUG_PRINT1 ("EXECUTING begline.\n");
5096 5125
5097 if (AT_STRINGS_BEG (d)) 5126 if (AT_STRINGS_BEG (d))
5098 { 5127 {
5100 } 5129 }
5101 else 5130 else
5102 { 5131 {
5103 unsigned char c; 5132 unsigned char c;
5104 GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2); 5133 GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
5105 if (c == '\n' && bufp->newline_anchor) 5134 if (c == '\n')
5106 break; 5135 break;
5107 } 5136 }
5108 /* In all other cases, we fail. */ 5137 /* In all other cases, we fail. */
5109 goto fail; 5138 goto fail;
5110 5139
5118 if (!bufp->not_eol) break; 5147 if (!bufp->not_eol) break;
5119 } 5148 }
5120 else 5149 else
5121 { 5150 {
5122 PREFETCH_NOLIMIT (); 5151 PREFETCH_NOLIMIT ();
5123 if (*d == '\n' && bufp->newline_anchor) 5152 if (*d == '\n')
5124 break; 5153 break;
5125 } 5154 }
5126 goto fail; 5155 goto fail;
5127 5156
5128 5157
5643 /* And GNU code determines whether or not to get register information 5672 /* And GNU code determines whether or not to get register information
5644 by passing null for the REGS argument to re_match, etc., not by 5673 by passing null for the REGS argument to re_match, etc., not by
5645 setting no_sub. */ 5674 setting no_sub. */
5646 bufp->no_sub = 0; 5675 bufp->no_sub = 0;
5647 5676
5648 /* Match anchors at newline. */
5649 bufp->newline_anchor = 1;
5650
5651 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp); 5677 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp);
5652 5678
5653 if (!ret) 5679 if (!ret)
5654 return NULL; 5680 return NULL;
5655 return gettext (re_error_msgid[(int) ret]); 5681 return gettext (re_error_msgid[(int) ret]);
5656 } 5682 }
5683 WEAK_ALIAS (__re_compile_pattern, re_compile_pattern)
5657 5684
5658 /* Entry points compatible with 4.2 BSD regex library. We don't define 5685 /* Entry points compatible with 4.2 BSD regex library. We don't define
5659 them unless specifically requested. */ 5686 them unless specifically requested. */
5660 5687
5661 #if defined _REGEX_RE_COMP || defined _LIBC 5688 #if defined _REGEX_RE_COMP || defined _LIBC
5697 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); 5724 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5698 } 5725 }
5699 5726
5700 /* Since `re_exec' always passes NULL for the `regs' argument, we 5727 /* Since `re_exec' always passes NULL for the `regs' argument, we
5701 don't need to initialize the pattern buffer fields which affect it. */ 5728 don't need to initialize the pattern buffer fields which affect it. */
5702
5703 /* Match anchors at newlines. */
5704 re_comp_buf.newline_anchor = 1;
5705 5729
5706 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 5730 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5707 5731
5708 if (!ret) 5732 if (!ret)
5709 return NULL; 5733 return NULL;
5738 `buffer' to the compiled pattern; 5762 `buffer' to the compiled pattern;
5739 `used' to the length of the compiled pattern; 5763 `used' to the length of the compiled pattern;
5740 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the 5764 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5741 REG_EXTENDED bit in CFLAGS is set; otherwise, to 5765 REG_EXTENDED bit in CFLAGS is set; otherwise, to
5742 RE_SYNTAX_POSIX_BASIC; 5766 RE_SYNTAX_POSIX_BASIC;
5743 `newline_anchor' to REG_NEWLINE being set in CFLAGS; 5767 `fastmap' to an allocated space for the fastmap;
5744 `fastmap' and `fastmap_accurate' to zero; 5768 `fastmap_accurate' to zero;
5745 `re_nsub' to the number of subexpressions in PATTERN. 5769 `re_nsub' to the number of subexpressions in PATTERN.
5746 5770
5747 PATTERN is the address of the pattern string. 5771 PATTERN is the address of the pattern string.
5748 5772
5749 CFLAGS is a series of bits which affect compilation. 5773 CFLAGS is a series of bits which affect compilation.
5778 /* regex_compile will allocate the space for the compiled pattern. */ 5802 /* regex_compile will allocate the space for the compiled pattern. */
5779 preg->buffer = 0; 5803 preg->buffer = 0;
5780 preg->allocated = 0; 5804 preg->allocated = 0;
5781 preg->used = 0; 5805 preg->used = 0;
5782 5806
5783 /* Don't bother to use a fastmap when searching. This simplifies the 5807 /* Try to allocate space for the fastmap. */
5784 REG_NEWLINE case: if we used a fastmap, we'd have to put all the 5808 preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
5785 characters after newlines into the fastmap. This way, we just try
5786 every character. */
5787 preg->fastmap = 0;
5788 5809
5789 if (cflags & REG_ICASE) 5810 if (cflags & REG_ICASE)
5790 { 5811 {
5791 unsigned i; 5812 unsigned i;
5792 5813
5806 /* If REG_NEWLINE is set, newlines are treated differently. */ 5827 /* If REG_NEWLINE is set, newlines are treated differently. */
5807 if (cflags & REG_NEWLINE) 5828 if (cflags & REG_NEWLINE)
5808 { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 5829 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
5809 syntax &= ~RE_DOT_NEWLINE; 5830 syntax &= ~RE_DOT_NEWLINE;
5810 syntax |= RE_HAT_LISTS_NOT_NEWLINE; 5831 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5811 /* It also changes the matching behavior. */
5812 preg->newline_anchor = 1;
5813 } 5832 }
5814 else 5833 else
5815 preg->newline_anchor = 0; 5834 syntax |= RE_NO_NEWLINE_ANCHOR;
5816 5835
5817 preg->no_sub = !!(cflags & REG_NOSUB); 5836 preg->no_sub = !!(cflags & REG_NOSUB);
5818 5837
5819 /* POSIX says a null character in the pattern terminates it, so we 5838 /* POSIX says a null character in the pattern terminates it, so we
5820 can use strlen here in compiling the pattern. */ 5839 can use strlen here in compiling the pattern. */
5821 ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg); 5840 ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg);
5822 5841
5823 /* POSIX doesn't distinguish between an unmatched open-group and an 5842 /* POSIX doesn't distinguish between an unmatched open-group and an
5824 unmatched close-group: both are REG_EPAREN. */ 5843 unmatched close-group: both are REG_EPAREN. */
5825 if (ret == REG_ERPAREN) ret = REG_EPAREN; 5844 if (ret == REG_ERPAREN)
5826 5845 ret = REG_EPAREN;
5846
5847 if (ret == REG_NOERROR && preg->fastmap)
5848 { /* Compute the fastmap now, since regexec cannot modify the pattern
5849 buffer. */
5850 re_compile_fastmap (preg);
5851 if (preg->can_be_null)
5852 { /* The fastmap can't be used anyway. */
5853 free (preg->fastmap);
5854 preg->fastmap = NULL;
5855 }
5856 }
5827 return (int) ret; 5857 return (int) ret;
5828 } 5858 }
5859 WEAK_ALIAS (__regcomp, regcomp)
5829 5860
5830 5861
5831 /* regexec searches for a given pattern, specified by PREG, in the 5862 /* regexec searches for a given pattern, specified by PREG, in the
5832 string STRING. 5863 string STRING.
5833 5864
5852 { 5883 {
5853 int ret; 5884 int ret;
5854 struct re_registers regs; 5885 struct re_registers regs;
5855 regex_t private_preg; 5886 regex_t private_preg;
5856 int len = strlen (string); 5887 int len = strlen (string);
5857 boolean want_reg_info = !preg->no_sub && nmatch > 0; 5888 boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
5858 5889
5859 private_preg = *preg; 5890 private_preg = *preg;
5860 5891
5861 private_preg.not_bol = !!(eflags & REG_NOTBOL); 5892 private_preg.not_bol = !!(eflags & REG_NOTBOL);
5862 private_preg.not_eol = !!(eflags & REG_NOTEOL); 5893 private_preg.not_eol = !!(eflags & REG_NOTEOL);
5873 if (regs.start == NULL) 5904 if (regs.start == NULL)
5874 return (int) REG_NOMATCH; 5905 return (int) REG_NOMATCH;
5875 regs.end = regs.start + nmatch; 5906 regs.end = regs.start + nmatch;
5876 } 5907 }
5877 5908
5909 /* Instead of using not_eol to implement REG_NOTEOL, we could simply
5910 pass (&private_preg, string, len + 1, 0, len, ...) pretending the string
5911 was a little bit longer but still only matching the real part.
5912 This works because the `endline' will check for a '\n' and will find a
5913 '\0', correctly deciding that this is not the end of a line.
5914 But it doesn't work out so nicely for REG_NOTBOL, since we don't have
5915 a convenient '\0' there. For all we know, the string could be preceded
5916 by '\n' which would throw things off. */
5917
5878 /* Perform the searching operation. */ 5918 /* Perform the searching operation. */
5879 ret = re_search (&private_preg, string, len, 5919 ret = re_search (&private_preg, string, len,
5880 /* start: */ 0, /* range: */ len, 5920 /* start: */ 0, /* range: */ len,
5881 want_reg_info ? &regs : (struct re_registers *) 0); 5921 want_reg_info ? &regs : (struct re_registers *) 0);
5882 5922
5899 } 5939 }
5900 5940
5901 /* We want zero return to mean success, unlike `re_search'. */ 5941 /* We want zero return to mean success, unlike `re_search'. */
5902 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; 5942 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5903 } 5943 }
5944 WEAK_ALIAS (__regexec, regexec)
5904 5945
5905 5946
5906 /* Returns a message corresponding to an error code, ERRCODE, returned 5947 /* Returns a message corresponding to an error code, ERRCODE, returned
5907 from either regcomp or regexec. We don't use PREG here. */ 5948 from either regcomp or regexec. We don't use PREG here. */
5908 5949
5939 strcpy (errbuf, msg); 5980 strcpy (errbuf, msg);
5940 } 5981 }
5941 5982
5942 return msg_size; 5983 return msg_size;
5943 } 5984 }
5985 WEAK_ALIAS (__regerror, regerror)
5944 5986
5945 5987
5946 /* Free dynamically allocated space used by PREG. */ 5988 /* Free dynamically allocated space used by PREG. */
5947 5989
5948 void 5990 void
5963 6005
5964 if (preg->translate != NULL) 6006 if (preg->translate != NULL)
5965 free (preg->translate); 6007 free (preg->translate);
5966 preg->translate = NULL; 6008 preg->translate = NULL;
5967 } 6009 }
6010 WEAK_ALIAS (__regfree, regfree)
5968 6011
5969 #endif /* not emacs */ 6012 #endif /* not emacs */