comparison src/regex.c @ 31299:34c25566aab3

Merge some changes from GNU libc. Add prototypes. (bcopy, bcmp, REGEX_REALLOCATE, re_match_2_internal): Use memcmp and memcpy instead of bcopy and bcmp. (init_syntax_once): Use ISALNUM. (PUSH_FAILURE_POINT, re_match_2_internal): Remove failure_id. (REG_UNSET_VALUE): Remove. Use NULL instead. (REG_UNSET, re_match_2_internal): Use NULL. (SET_HIGH_BOUND, MOVE_BUFFER_POINTER, ELSE_EXTEND_BUFFER_HIGH_BOUND): New macros. (EXTEND_BUFFER): Use them (to work with BOUNDED_POINTERS). (GET_UNSIGNED_NUMBER): Don't use ISDIGIT. (regex_compile): In handle_interval, return an error rather than try to unfetch the interval if we can't find the closing brace. Obey the RE_NO_GNU_OPS syntax bit. (TOLOWER): New macro. (regcomp): Use it. (regexec): Allocate regs.start and regs.end as one block.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Wed, 30 Aug 2000 18:31:17 +0000
parents 9efb8adfefa4
children e6b19a60e035
comparison
equal deleted inserted replaced
31298:820483a506d0 31299:34c25566aab3
1 /* Extended regular expression matching and search library, version 1 /* Extended regular expression matching and search library, version
2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the 2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
3 internationalization features.) 3 internationalization features.)
4 4
5 Copyright (C) 1993,94,95,96,97,98,2000 Free Software Foundation, Inc. 5 Copyright (C) 1993,94,95,96,97,98,99,2000 Free Software Foundation, Inc.
6 6
7 This program is free software; you can redistribute it and/or modify 7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by 8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option) 9 the Free Software Foundation; either version 2, or (at your option)
10 any later version. 10 any later version.
23 - structure the opcode space into opcode+flag. 23 - structure the opcode space into opcode+flag.
24 - merge with glibc's regex.[ch]. 24 - merge with glibc's regex.[ch].
25 - replace succeed_n + jump_n with a combined operation so that the counter 25 - replace succeed_n + jump_n with a combined operation so that the counter
26 can simply be decremented when popping the failure_point without having 26 can simply be decremented when popping the failure_point without having
27 to stack up failure_count entries. 27 to stack up failure_count entries.
28 - get rid of `newline_anchor'.
28 */ 29 */
29 30
30 /* AIX requires this to be the first thing in the file. */ 31 /* AIX requires this to be the first thing in the file. */
31 #if defined _AIX && !defined REGEX_MALLOC 32 #if defined _AIX && !defined REGEX_MALLOC
32 #pragma alloca 33 #pragma alloca
37 38
38 #ifdef HAVE_CONFIG_H 39 #ifdef HAVE_CONFIG_H
39 # include <config.h> 40 # include <config.h>
40 #endif 41 #endif
41 42
42 /* We need this for `regex.h', and perhaps for the Emacs include files. */ 43 #if defined STDC_HEADERS && !defined emacs
43 #include <sys/types.h> 44 # include <stddef.h>
44 45 #else
45 /* This is for other GNU distributions with internationalized messages. */ 46 /* We need this for `regex.h', and perhaps for the Emacs include files. */
47 # include <sys/types.h>
48 #endif
49
50 /* This is for other GNU distributions with internationalized messages. */
46 #if HAVE_LIBINTL_H || defined _LIBC 51 #if HAVE_LIBINTL_H || defined _LIBC
47 # include <libintl.h> 52 # include <libintl.h>
48 #else 53 #else
49 # define gettext(msgid) (msgid) 54 # define gettext(msgid) (msgid)
50 #endif 55 #endif
113 char *malloc (); 118 char *malloc ();
114 char *realloc (); 119 char *realloc ();
115 # endif 120 # endif
116 121
117 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 122 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
118 If nothing else has been done, use the method below. */ 123 If nothing else has been done, use the method below. */
119 # ifdef INHIBIT_STRING_HEADER 124 # ifdef INHIBIT_STRING_HEADER
120 # if !(defined HAVE_BZERO && defined HAVE_BCOPY) 125 # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
121 # if !defined bzero && !defined bcopy 126 # if !defined bzero && !defined bcopy
122 # undef INHIBIT_STRING_HEADER 127 # undef INHIBIT_STRING_HEADER
123 # endif 128 # endif
124 # endif 129 # endif
125 # endif 130 # endif
126 131
127 /* This is the normal way of making sure we have a bcopy and a bzero. 132 /* This is the normal way of making sure we have memcpy, memcmp and bzero.
128 This is used in most programs--a few other programs avoid this 133 This is used in most programs--a few other programs avoid this
129 by defining INHIBIT_STRING_HEADER. */ 134 by defining INHIBIT_STRING_HEADER. */
130 # ifndef INHIBIT_STRING_HEADER 135 # ifndef INHIBIT_STRING_HEADER
131 # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC 136 # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
132 # include <string.h> 137 # include <string.h>
133 # ifndef bcmp
134 # define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
135 # endif
136 # ifndef bcopy
137 # define bcopy(s, d, n) memcpy ((d), (s), (n))
138 # endif
139 # ifndef bzero 138 # ifndef bzero
140 # define bzero(s, n) memset ((s), 0, (n)) 139 # ifndef _LIBC
140 # define bzero(s, n) (memset (s, '\0', n), (s))
141 # else
142 # define bzero(s, n) __bzero (s, n)
143 # endif
141 # endif 144 # endif
142 # else 145 # else
143 # include <strings.h> 146 # include <strings.h>
147 # ifndef memcmp
148 # define memcmp(s1, s2, n) bcmp (s1, s2, n)
149 # endif
150 # ifndef memcpy
151 # define memcpy(d, s, n) (bcopy (s, d, n), (d))
152 # endif
144 # endif 153 # endif
145 # endif 154 # endif
146 155
147 /* Define the syntax stuff for \<, \>, etc. */ 156 /* Define the syntax stuff for \<, \>, etc. */
148 157
152 # ifdef SWITCH_ENUM_BUG 161 # ifdef SWITCH_ENUM_BUG
153 # define SWITCH_ENUM_CAST(x) ((int)(x)) 162 # define SWITCH_ENUM_CAST(x) ((int)(x))
154 # else 163 # else
155 # define SWITCH_ENUM_CAST(x) (x) 164 # define SWITCH_ENUM_CAST(x) (x)
156 # endif 165 # endif
157
158 # define SYNTAX(c) re_syntax_table[c]
159 166
160 /* Dummy macros for non-Emacs environments. */ 167 /* Dummy macros for non-Emacs environments. */
161 # define BASE_LEADING_CODE_P(c) (0) 168 # define BASE_LEADING_CODE_P(c) (0)
162 # define CHAR_CHARSET(c) 0 169 # define CHAR_CHARSET(c) 0
163 # define CHARSET_LEADING_CODE_BASE(c) 0 170 # define CHARSET_LEADING_CODE_BASE(c) 0
233 # define ISLOWER(c) (LOWERCASEP (c)) 240 # define ISLOWER(c) (LOWERCASEP (c))
234 241
235 # define ISPUNCT(c) (IS_REAL_ASCII (c) \ 242 # define ISPUNCT(c) (IS_REAL_ASCII (c) \
236 ? ((c) > ' ' && (c) < 0177 \ 243 ? ((c) > ' ' && (c) < 0177 \
237 && !(((c) >= 'a' && (c) <= 'z') \ 244 && !(((c) >= 'a' && (c) <= 'z') \
238 || ((c) >= 'A' && (c) <= 'Z') \ 245 || ((c) >= 'A' && (c) <= 'Z') \
239 || ((c) >= '0' && (c) <= '9'))) \ 246 || ((c) >= '0' && (c) <= '9'))) \
240 : SYNTAX (c) != Sword) 247 : SYNTAX (c) != Sword)
241 248
242 # define ISSPACE(c) (SYNTAX (c) == Swhitespace) 249 # define ISSPACE(c) (SYNTAX (c) == Swhitespace)
243 250
244 # define ISUPPER(c) (UPPERCASEP (c)) 251 # define ISUPPER(c) (UPPERCASEP (c))
250 /* Jim Meyering writes: 257 /* Jim Meyering writes:
251 258
252 "... Some ctype macros are valid only for character codes that 259 "... Some ctype macros are valid only for character codes that
253 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 260 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
254 using /bin/cc or gcc but without giving an ansi option). So, all 261 using /bin/cc or gcc but without giving an ansi option). So, all
255 ctype uses should be through macros like ISPRINT... If 262 ctype uses should be through macros like ISPRINT... If
256 STDC_HEADERS is defined, then autoconf has verified that the ctype 263 STDC_HEADERS is defined, then autoconf has verified that the ctype
257 macros don't need to be guarded with references to isascii. ... 264 macros don't need to be guarded with references to isascii. ...
258 Defining isascii to 1 should let any compiler worth its salt 265 Defining isascii to 1 should let any compiler worth its salt
259 eliminate the && through constant folding." */ 266 eliminate the && through constant folding."
260 267 Solaris defines some of these symbols so we must undefine them first. */
268
269 # undef ISASCII
261 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) 270 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
262 # define ISASCII(c) 1 271 # define ISASCII(c) 1
263 # else 272 # else
264 # define ISASCII(c) isascii(c) 273 # define ISASCII(c) isascii(c)
265 # endif 274 # endif
279 # define ISGRAPH(c) (ISASCII (c) && isgraph (c)) 288 # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
280 # else 289 # else
281 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) 290 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
282 # endif 291 # endif
283 292
293 # undef ISPRINT
284 # define ISPRINT(c) (ISASCII (c) && isprint (c)) 294 # define ISPRINT(c) (ISASCII (c) && isprint (c))
285 # define ISDIGIT(c) (ISASCII (c) && isdigit (c)) 295 # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
286 # define ISALNUM(c) (ISASCII (c) && isalnum (c)) 296 # define ISALNUM(c) (ISASCII (c) && isalnum (c))
287 # define ISALPHA(c) (ISASCII (c) && isalpha (c)) 297 # define ISALPHA(c) (ISASCII (c) && isalpha (c))
288 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) 298 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
292 # define ISUPPER(c) (ISASCII (c) && isupper (c)) 302 # define ISUPPER(c) (ISASCII (c) && isupper (c))
293 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) 303 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
294 304
295 # define ISWORD(c) ISALPHA(c) 305 # define ISWORD(c) ISALPHA(c)
296 306
307 # ifdef _tolower
308 # define TOLOWER(c) _tolower(c)
309 # else
310 # define TOLOWER(c) tolower(c)
311 # endif
312
313 /* How many characters in the character set. */
314 # define CHAR_SET_SIZE 256
315
297 # ifdef SYNTAX_TABLE 316 # ifdef SYNTAX_TABLE
298 317
299 extern char *re_syntax_table; 318 extern char *re_syntax_table;
300 319
301 # else /* not SYNTAX_TABLE */ 320 # else /* not SYNTAX_TABLE */
302
303 /* How many characters in the character set. */
304 # define CHAR_SET_SIZE 256
305 321
306 static char re_syntax_table[CHAR_SET_SIZE]; 322 static char re_syntax_table[CHAR_SET_SIZE];
307 323
308 static void 324 static void
309 init_syntax_once () 325 init_syntax_once ()
314 if (done) 330 if (done)
315 return; 331 return;
316 332
317 bzero (re_syntax_table, sizeof re_syntax_table); 333 bzero (re_syntax_table, sizeof re_syntax_table);
318 334
319 for (c = 'a'; c <= 'z'; c++) 335 for (c = 0; c < CHAR_SET_SIZE; ++c)
320 re_syntax_table[c] = Sword; 336 if (ISALNUM (c))
321 337 re_syntax_table[c] = Sword;
322 for (c = 'A'; c <= 'Z'; c++)
323 re_syntax_table[c] = Sword;
324
325 for (c = '0'; c <= '9'; c++)
326 re_syntax_table[c] = Sword;
327 338
328 re_syntax_table['_'] = Sword; 339 re_syntax_table['_'] = Sword;
329 340
330 done = 1; 341 done = 1;
331 } 342 }
332 343
333 # endif /* not SYNTAX_TABLE */ 344 # endif /* not SYNTAX_TABLE */
345
346 # define SYNTAX(c) re_syntax_table[(c)]
334 347
335 #endif /* not emacs */ 348 #endif /* not emacs */
336 349
337 #ifndef NULL 350 #ifndef NULL
338 # define NULL (void *)0 351 # define NULL (void *)0
339 #endif 352 #endif
340 353
341 /* We remove any previous definition of `SIGN_EXTEND_CHAR', 354 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
342 since ours (we hope) works properly with all combinations of 355 since ours (we hope) works properly with all combinations of
343 machines, compilers, `char' and `unsigned char' argument types. 356 machines, compilers, `char' and `unsigned char' argument types.
344 (Per Bothner suggested the basic approach.) */ 357 (Per Bothner suggested the basic approach.) */
345 #undef SIGN_EXTEND_CHAR 358 #undef SIGN_EXTEND_CHAR
346 #if __STDC__ 359 #if __STDC__
347 # define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 360 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
348 #else /* not __STDC__ */ 361 #else /* not __STDC__ */
349 /* As in Harbison and Steele. */ 362 /* As in Harbison and Steele. */
385 # define REGEX_ALLOCATE alloca 398 # define REGEX_ALLOCATE alloca
386 399
387 /* Assumes a `char *destination' variable. */ 400 /* Assumes a `char *destination' variable. */
388 # define REGEX_REALLOCATE(source, osize, nsize) \ 401 # define REGEX_REALLOCATE(source, osize, nsize) \
389 (destination = (char *) alloca (nsize), \ 402 (destination = (char *) alloca (nsize), \
390 bcopy (source, destination, osize), \ 403 memcpy (destination, source, osize))
391 destination)
392 404
393 /* No need to do anything to free, after alloca. */ 405 /* No need to do anything to free, after alloca. */
394 # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 406 # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
395 407
396 #endif /* not REGEX_MALLOC */ 408 #endif /* not REGEX_MALLOC */
438 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 450 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
439 #define RETALLOC_IF(addr, n, t) \ 451 #define RETALLOC_IF(addr, n, t) \
440 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) 452 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
441 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 453 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
442 454
443 #define BYTEWIDTH 8 /* In bits. */ 455 #define BYTEWIDTH 8 /* In bits. */
444 456
445 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) 457 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
446 458
447 #undef MAX 459 #undef MAX
448 #undef MIN 460 #undef MIN
454 466
455 typedef char boolean; 467 typedef char boolean;
456 #define false 0 468 #define false 0
457 #define true 1 469 #define true 1
458 470
459 static int re_match_2_internal (); 471 static int re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp,
472 re_char *string1, int size1,
473 re_char *string2, int size2,
474 int pos,
475 struct re_registers *regs,
476 int stop));
460 477
461 /* These are the command codes that appear in compiled regular 478 /* These are the command codes that appear in compiled regular
462 expressions. Some opcodes are followed by argument bytes. A 479 expressions. Some opcodes are followed by argument bytes. A
463 command code can specify any interpretation whatsoever for its 480 command code can specify any interpretation whatsoever for its
464 arguments. Zero bytes may appear in the compiled regular expression. */ 481 arguments. Zero bytes may appear in the compiled regular expression. */
465 482
466 typedef enum 483 typedef enum
467 { 484 {
468 no_op = 0, 485 no_op = 0,
469 486
470 /* Succeed right away--no more backtracking. */ 487 /* Succeed right away--no more backtracking. */
471 succeed, 488 succeed,
472 489
473 /* Followed by one byte giving n, then by n literal bytes. */ 490 /* Followed by one byte giving n, then by n literal bytes. */
474 exactn, 491 exactn,
475 492
491 pairs, each 2 multibyte characters, 508 pairs, each 2 multibyte characters,
492 each multibyte character represented as 3 bytes. */ 509 each multibyte character represented as 3 bytes. */
493 charset, 510 charset,
494 511
495 /* Same parameters as charset, but match any character that is 512 /* Same parameters as charset, but match any character that is
496 not one of those specified. */ 513 not one of those specified. */
497 charset_not, 514 charset_not,
498 515
499 /* Start remembering the text that is matched, for storing in a 516 /* Start remembering the text that is matched, for storing in a
500 register. Followed by one byte with the register number, in 517 register. Followed by one byte with the register number, in
501 the range 0 to one less than the pattern buffer's re_nsub 518 the range 0 to one less than the pattern buffer's re_nsub
507 number, in the range 0 to one less than `re_nsub' in the 524 number, in the range 0 to one less than `re_nsub' in the
508 pattern buffer. */ 525 pattern buffer. */
509 stop_memory, 526 stop_memory,
510 527
511 /* Match a duplicate of something remembered. Followed by one 528 /* Match a duplicate of something remembered. Followed by one
512 byte containing the register number. */ 529 byte containing the register number. */
513 duplicate, 530 duplicate,
514 531
515 /* Fail unless at beginning of line. */ 532 /* Fail unless at beginning of line. */
516 begline, 533 begline,
517 534
518 /* Fail unless at end of line. */ 535 /* Fail unless at end of line. */
519 endline, 536 endline,
520 537
521 /* Succeeds if at beginning of buffer (if emacs) or at beginning 538 /* Succeeds if at beginning of buffer (if emacs) or at beginning
522 of string to be matched (if not). */ 539 of string to be matched (if not). */
523 begbuf, 540 begbuf,
628 (destination) = *(source) & 0377; \ 645 (destination) = *(source) & 0377; \
629 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ 646 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
630 } while (0) 647 } while (0)
631 648
632 #ifdef DEBUG 649 #ifdef DEBUG
650 static void extract_number _RE_ARGS ((int *dest, re_char *source));
633 static void 651 static void
634 extract_number (dest, source) 652 extract_number (dest, source)
635 int *dest; 653 int *dest;
636 unsigned char *source; 654 unsigned char *source;
637 { 655 {
638 int temp = SIGN_EXTEND_CHAR (*(source + 1)); 656 int temp = SIGN_EXTEND_CHAR (*(source + 1));
639 *dest = *source & 0377; 657 *dest = *source & 0377;
640 *dest += temp << 8; 658 *dest += temp << 8;
641 } 659 }
642 660
643 # ifndef EXTRACT_MACROS /* To debug the macros. */ 661 # ifndef EXTRACT_MACROS /* To debug the macros. */
644 # undef EXTRACT_NUMBER 662 # undef EXTRACT_NUMBER
645 # define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) 663 # define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
646 # endif /* not EXTRACT_MACROS */ 664 # endif /* not EXTRACT_MACROS */
647 665
648 #endif /* DEBUG */ 666 #endif /* DEBUG */
655 EXTRACT_NUMBER (destination, source); \ 673 EXTRACT_NUMBER (destination, source); \
656 (source) += 2; \ 674 (source) += 2; \
657 } while (0) 675 } while (0)
658 676
659 #ifdef DEBUG 677 #ifdef DEBUG
678 static void extract_number_and_incr _RE_ARGS ((int *destination,
679 re_char **source));
660 static void 680 static void
661 extract_number_and_incr (destination, source) 681 extract_number_and_incr (destination, source)
662 int *destination; 682 int *destination;
663 unsigned char **source; 683 unsigned char **source;
664 { 684 {
769 789
770 /* If DEBUG is defined, Regex prints many voluminous messages about what 790 /* If DEBUG is defined, Regex prints many voluminous messages about what
771 it is doing (if the variable `debug' is nonzero). If linked with the 791 it is doing (if the variable `debug' is nonzero). If linked with the
772 main program in `iregex.c', you can enter patterns and strings 792 main program in `iregex.c', you can enter patterns and strings
773 interactively. And if linked with the main program in `main.c' and 793 interactively. And if linked with the main program in `main.c' and
774 the other test files, you can run the already-written tests. */ 794 the other test files, you can run the already-written tests. */
775 795
776 #ifdef DEBUG 796 #ifdef DEBUG
777 797
778 /* We use standard I/O for debugging. */ 798 /* We use standard I/O for debugging. */
779 # include <stdio.h> 799 # include <stdio.h>
1074 struct re_pattern_buffer *bufp; 1094 struct re_pattern_buffer *bufp;
1075 { 1095 {
1076 unsigned char *buffer = bufp->buffer; 1096 unsigned char *buffer = bufp->buffer;
1077 1097
1078 print_partial_compiled_pattern (buffer, buffer + bufp->used); 1098 print_partial_compiled_pattern (buffer, buffer + bufp->used);
1079 printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, bufp->allocated); 1099 printf ("%ld bytes used/%ld bytes allocated.\n",
1100 bufp->used, bufp->allocated);
1080 1101
1081 if (bufp->fastmap_accurate && bufp->fastmap) 1102 if (bufp->fastmap_accurate && bufp->fastmap)
1082 { 1103 {
1083 printf ("fastmap: "); 1104 printf ("fastmap: ");
1084 print_fastmap (bufp->fastmap); 1105 print_fastmap (bufp->fastmap);
1089 printf ("can_be_null: %d\t", bufp->can_be_null); 1110 printf ("can_be_null: %d\t", bufp->can_be_null);
1090 printf ("newline_anchor: %d\n", bufp->newline_anchor); 1111 printf ("newline_anchor: %d\n", bufp->newline_anchor);
1091 printf ("no_sub: %d\t", bufp->no_sub); 1112 printf ("no_sub: %d\t", bufp->no_sub);
1092 printf ("not_bol: %d\t", bufp->not_bol); 1113 printf ("not_bol: %d\t", bufp->not_bol);
1093 printf ("not_eol: %d\t", bufp->not_eol); 1114 printf ("not_eol: %d\t", bufp->not_eol);
1094 printf ("syntax: %d\n", bufp->syntax); 1115 printf ("syntax: %lx\n", bufp->syntax);
1095 fflush (stdout); 1116 fflush (stdout);
1096 /* Perhaps we should print the translate table? */ 1117 /* Perhaps we should print the translate table? */
1097 } 1118 }
1098 1119
1099 1120
1103 re_char *string1; 1124 re_char *string1;
1104 re_char *string2; 1125 re_char *string2;
1105 int size1; 1126 int size1;
1106 int size2; 1127 int size2;
1107 { 1128 {
1108 unsigned this_char; 1129 int this_char;
1109 1130
1110 if (where == NULL) 1131 if (where == NULL)
1111 printf ("(null)"); 1132 printf ("(null)");
1112 else 1133 else
1113 { 1134 {
1150 /* Specify the precise syntax of regexps for compilation. This provides 1171 /* Specify the precise syntax of regexps for compilation. This provides
1151 for compatibility for various utilities which historically have 1172 for compatibility for various utilities which historically have
1152 different, incompatible syntaxes. 1173 different, incompatible syntaxes.
1153 1174
1154 The argument SYNTAX is a bit mask comprised of the various bits 1175 The argument SYNTAX is a bit mask comprised of the various bits
1155 defined in regex.h. We return the old syntax. */ 1176 defined in regex.h. We return the old syntax. */
1156 1177
1157 reg_syntax_t 1178 reg_syntax_t
1158 re_set_syntax (syntax) 1179 re_set_syntax (syntax)
1159 reg_syntax_t syntax; 1180 reg_syntax_t syntax;
1160 { 1181 {
1163 re_syntax_options = syntax; 1184 re_syntax_options = syntax;
1164 return ret; 1185 return ret;
1165 } 1186 }
1166 1187
1167 /* This table gives an error message for each of the error codes listed 1188 /* This table gives an error message for each of the error codes listed
1168 in regex.h. Obviously the order here has to be same as there. 1189 in regex.h. Obviously the order here has to be same as there.
1169 POSIX doesn't require that we do anything for REG_NOERROR, 1190 POSIX doesn't require that we do anything for REG_NOERROR,
1170 but why not be nice? */ 1191 but why not be nice? */
1171 1192
1172 static const char *re_error_msgid[] = 1193 static const char *re_error_msgid[] =
1173 { 1194 {
1174 gettext_noop ("Success"), /* REG_NOERROR */ 1195 gettext_noop ("Success"), /* REG_NOERROR */
1175 gettext_noop ("No match"), /* REG_NOMATCH */ 1196 gettext_noop ("No match"), /* REG_NOMATCH */
1188 gettext_noop ("Premature end of regular expression"), /* REG_EEND */ 1209 gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1189 gettext_noop ("Regular expression too big"), /* REG_ESIZE */ 1210 gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1190 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ 1211 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1191 }; 1212 };
1192 1213
1193 /* Avoiding alloca during matching, to placate r_alloc. */ 1214 /* Avoiding alloca during matching, to placate r_alloc. */
1194 1215
1195 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 1216 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1196 searching and matching functions should not call alloca. On some 1217 searching and matching functions should not call alloca. On some
1197 systems, alloca is implemented in terms of malloc, and if we're 1218 systems, alloca is implemented in terms of malloc, and if we're
1198 using the relocating allocator routines, then malloc could cause a 1219 using the relocating allocator routines, then malloc could cause a
1220 1241
1221 /* The match routines may not allocate if (1) they would do it with malloc 1242 /* The match routines may not allocate if (1) they would do it with malloc
1222 and (2) it's not safe for them to use malloc. 1243 and (2) it's not safe for them to use malloc.
1223 Note that if REL_ALLOC is defined, matching would not use malloc for the 1244 Note that if REL_ALLOC is defined, matching would not use malloc for the
1224 failure stack, but we would still use it for the register vectors; 1245 failure stack, but we would still use it for the register vectors;
1225 so REL_ALLOC should not affect this. */ 1246 so REL_ALLOC should not affect this. */
1226 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs 1247 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1227 # undef MATCH_MAY_ALLOCATE 1248 # undef MATCH_MAY_ALLOCATE
1228 #endif 1249 #endif
1229 1250
1230 1251
1309 which allows approximately `re_max_failures' items. 1330 which allows approximately `re_max_failures' items.
1310 1331
1311 Return 1 if succeeds, and 0 if either ran out of memory 1332 Return 1 if succeeds, and 0 if either ran out of memory
1312 allocating space for it or it was already too large. 1333 allocating space for it or it was already too large.
1313 1334
1314 REGEX_REALLOCATE_STACK requires `destination' be declared. */ 1335 REGEX_REALLOCATE_STACK requires `destination' be declared. */
1315 1336
1316 /* Factor to increase the failure stack size by 1337 /* Factor to increase the failure stack size by
1317 when we increase it. 1338 when we increase it.
1318 This used to be 2, but 2 was too wasteful 1339 This used to be 2, but 2 was too wasteful
1319 because the old discarded stacks added up to as much space 1340 because the old discarded stacks added up to as much space
1353 1)) 1374 1))
1354 #define POP_PATTERN_OP() POP_FAILURE_POINTER () 1375 #define POP_PATTERN_OP() POP_FAILURE_POINTER ()
1355 1376
1356 /* Push a pointer value onto the failure stack. 1377 /* Push a pointer value onto the failure stack.
1357 Assumes the variable `fail_stack'. Probably should only 1378 Assumes the variable `fail_stack'. Probably should only
1358 be called from within `PUSH_FAILURE_POINT'. */ 1379 be called from within `PUSH_FAILURE_POINT'. */
1359 #define PUSH_FAILURE_POINTER(item) \ 1380 #define PUSH_FAILURE_POINTER(item) \
1360 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) 1381 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1361 1382
1362 /* This pushes an integer-valued item onto the failure stack. 1383 /* This pushes an integer-valued item onto the failure stack.
1363 Assumes the variable `fail_stack'. Probably should only 1384 Assumes the variable `fail_stack'. Probably should only
1364 be called from within `PUSH_FAILURE_POINT'. */ 1385 be called from within `PUSH_FAILURE_POINT'. */
1365 #define PUSH_FAILURE_INT(item) \ 1386 #define PUSH_FAILURE_INT(item) \
1366 fail_stack.stack[fail_stack.avail++].integer = (item) 1387 fail_stack.stack[fail_stack.avail++].integer = (item)
1367 1388
1368 /* Push a fail_stack_elt_t value onto the failure stack. 1389 /* Push a fail_stack_elt_t value onto the failure stack.
1369 Assumes the variable `fail_stack'. Probably should only 1390 Assumes the variable `fail_stack'. Probably should only
1370 be called from within `PUSH_FAILURE_POINT'. */ 1391 be called from within `PUSH_FAILURE_POINT'. */
1371 #define PUSH_FAILURE_ELT(item) \ 1392 #define PUSH_FAILURE_ELT(item) \
1372 fail_stack.stack[fail_stack.avail++] = (item) 1393 fail_stack.stack[fail_stack.avail++] = (item)
1373 1394
1374 /* These three POP... operations complement the three PUSH... operations. 1395 /* These three POP... operations complement the three PUSH... operations.
1375 All assume that `fail_stack' is nonempty. */ 1396 All assume that `fail_stack' is nonempty. */
1472 do { \ 1493 do { \
1473 char *destination; \ 1494 char *destination; \
1474 /* Must be int, so when we don't save any registers, the arithmetic \ 1495 /* Must be int, so when we don't save any registers, the arithmetic \
1475 of 0 + -1 isn't done as unsigned. */ \ 1496 of 0 + -1 isn't done as unsigned. */ \
1476 \ 1497 \
1477 DEBUG_STATEMENT (failure_id++); \
1478 DEBUG_STATEMENT (nfailure_points_pushed++); \ 1498 DEBUG_STATEMENT (nfailure_points_pushed++); \
1479 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 1499 DEBUG_PRINT1 ("\nPUSH_FAILURE_POINT:\n"); \
1480 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \ 1500 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \
1481 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 1501 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1482 \ 1502 \
1483 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \ 1503 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \
1484 \ 1504 \
1555 } while (0) /* POP_FAILURE_POINT */ 1575 } while (0) /* POP_FAILURE_POINT */
1556 1576
1557 1577
1558 1578
1559 /* Registers are set to a sentinel when they haven't yet matched. */ 1579 /* Registers are set to a sentinel when they haven't yet matched. */
1560 #define REG_UNSET_VALUE NULL 1580 #define REG_UNSET(e) ((e) == NULL)
1561 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1562 1581
1563 /* Subroutine declarations and macros for regex_compile. */ 1582 /* Subroutine declarations and macros for regex_compile. */
1564 1583
1565 static void store_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc, int arg)); 1584 static reg_errcode_t regex_compile _RE_ARGS ((re_char *pattern, size_t size,
1566 static void store_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc, 1585 reg_syntax_t syntax,
1567 int arg1, int arg2)); 1586 struct re_pattern_buffer *bufp));
1568 static void insert_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc, 1587 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1569 int arg, unsigned char *end)); 1588 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1570 static void insert_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc, 1589 int arg1, int arg2));
1571 int arg1, int arg2, unsigned char *end)); 1590 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1572 static boolean at_begline_loc_p _RE_ARGS((const unsigned char *pattern, 1591 int arg, unsigned char *end));
1573 const unsigned char *p, 1592 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1574 reg_syntax_t syntax)); 1593 int arg1, int arg2, unsigned char *end));
1575 static boolean at_endline_loc_p _RE_ARGS((const unsigned char *p, 1594 static boolean at_begline_loc_p _RE_ARGS ((const unsigned char *pattern,
1576 const unsigned char *pend, 1595 const unsigned char *p,
1577 reg_syntax_t syntax)); 1596 reg_syntax_t syntax));
1578 static unsigned char *skip_one_char _RE_ARGS((unsigned char *p)); 1597 static boolean at_endline_loc_p _RE_ARGS ((const unsigned char *p,
1579 static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend, 1598 const unsigned char *pend,
1580 char *fastmap, const int multibyte)); 1599 reg_syntax_t syntax));
1600 static unsigned char *skip_one_char _RE_ARGS ((unsigned char *p));
1601 static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
1602 char *fastmap, const int multibyte));
1581 1603
1582 /* Fetch the next character in the uncompiled pattern---translating it 1604 /* Fetch the next character in the uncompiled pattern---translating it
1583 if necessary. Also cast from a signed character in the constant 1605 if necessary. Also cast from a signed character in the constant
1584 string passed to us by the user to an unsigned char that we can use 1606 string passed to us by the user to an unsigned char that we can use
1585 as an array index (in, e.g., `translate'). */ 1607 as an array index (in, e.g., `translate'). */
1588 PATFETCH_RAW (c); \ 1610 PATFETCH_RAW (c); \
1589 c = TRANSLATE (c); \ 1611 c = TRANSLATE (c); \
1590 } while (0) 1612 } while (0)
1591 1613
1592 /* Fetch the next character in the uncompiled pattern, with no 1614 /* Fetch the next character in the uncompiled pattern, with no
1593 translation. */ 1615 translation. */
1594 #define PATFETCH_RAW(c) \ 1616 #define PATFETCH_RAW(c) \
1595 do { \ 1617 do { \
1596 int len; \ 1618 int len; \
1597 if (p == pend) return REG_EEND; \ 1619 if (p == pend) return REG_EEND; \
1598 c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len); \ 1620 c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len); \
1613 /* Macros for outputting the compiled pattern into `buffer'. */ 1635 /* Macros for outputting the compiled pattern into `buffer'. */
1614 1636
1615 /* If the buffer isn't allocated when it comes in, use this. */ 1637 /* If the buffer isn't allocated when it comes in, use this. */
1616 #define INIT_BUF_SIZE 32 1638 #define INIT_BUF_SIZE 32
1617 1639
1618 /* Make sure we have at least N more bytes of space in buffer. */ 1640 /* Make sure we have at least N more bytes of space in buffer. */
1619 #define GET_BUFFER_SPACE(n) \ 1641 #define GET_BUFFER_SPACE(n) \
1620 while (b - bufp->buffer + (n) > bufp->allocated) \ 1642 while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
1621 EXTEND_BUFFER () 1643 EXTEND_BUFFER ()
1622 1644
1623 /* Make sure we have one more byte of buffer space and then add C to it. */ 1645 /* Make sure we have one more byte of buffer space and then add C to it. */
1624 #define BUF_PUSH(c) \ 1646 #define BUF_PUSH(c) \
1625 do { \ 1647 do { \
1635 *b++ = (unsigned char) (c1); \ 1657 *b++ = (unsigned char) (c1); \
1636 *b++ = (unsigned char) (c2); \ 1658 *b++ = (unsigned char) (c2); \
1637 } while (0) 1659 } while (0)
1638 1660
1639 1661
1640 /* As with BUF_PUSH_2, except for three bytes. */ 1662 /* As with BUF_PUSH_2, except for three bytes. */
1641 #define BUF_PUSH_3(c1, c2, c3) \ 1663 #define BUF_PUSH_3(c1, c2, c3) \
1642 do { \ 1664 do { \
1643 GET_BUFFER_SPACE (3); \ 1665 GET_BUFFER_SPACE (3); \
1644 *b++ = (unsigned char) (c1); \ 1666 *b++ = (unsigned char) (c1); \
1645 *b++ = (unsigned char) (c2); \ 1667 *b++ = (unsigned char) (c2); \
1646 *b++ = (unsigned char) (c3); \ 1668 *b++ = (unsigned char) (c3); \
1647 } while (0) 1669 } while (0)
1648 1670
1649 1671
1650 /* Store a jump with opcode OP at LOC to location TO. We store a 1672 /* Store a jump with opcode OP at LOC to location TO. We store a
1651 relative address offset by the three bytes the jump itself occupies. */ 1673 relative address offset by the three bytes the jump itself occupies. */
1652 #define STORE_JUMP(op, loc, to) \ 1674 #define STORE_JUMP(op, loc, to) \
1653 store_op1 (op, loc, (to) - (loc) - 3) 1675 store_op1 (op, loc, (to) - (loc) - 3)
1654 1676
1655 /* Likewise, for a two-argument jump. */ 1677 /* Likewise, for a two-argument jump. */
1656 #define STORE_JUMP2(op, loc, to, arg) \ 1678 #define STORE_JUMP2(op, loc, to, arg) \
1657 store_op2 (op, loc, (to) - (loc) - 3, arg) 1679 store_op2 (op, loc, (to) - (loc) - 3, arg)
1658 1680
1659 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 1681 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
1660 #define INSERT_JUMP(op, loc, to) \ 1682 #define INSERT_JUMP(op, loc, to) \
1661 insert_op1 (op, loc, (to) - (loc) - 3, b) 1683 insert_op1 (op, loc, (to) - (loc) - 3, b)
1662 1684
1663 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ 1685 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
1664 #define INSERT_JUMP2(op, loc, to, arg) \ 1686 #define INSERT_JUMP2(op, loc, to, arg) \
1665 insert_op2 (op, loc, (to) - (loc) - 3, arg, b) 1687 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1666 1688
1667 1689
1668 /* This is not an arbitrary limit: the arguments which represent offsets 1690 /* This is not an arbitrary limit: the arguments which represent offsets
1669 into the pattern are two bytes long. So if 2^16 bytes turns out to 1691 into the pattern are two bytes long. So if 2^16 bytes turns out to
1670 be too small, many things would have to change. */ 1692 be too small, many things would have to change. */
1671 #define MAX_BUF_SIZE (1L << 16) 1693 /* Any other compiler which, like MSC, has allocation limit below 2^16
1672 1694 bytes will have to use approach similar to what was done below for
1695 MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
1696 reallocating to 0 bytes. Such thing is not going to work too well.
1697 You have been warned!! */
1698 #if defined _MSC_VER && !defined WIN32
1699 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. */
1700 # define MAX_BUF_SIZE 65500L
1701 #else
1702 # define MAX_BUF_SIZE (1L << 16)
1703 #endif
1673 1704
1674 /* Extend the buffer by twice its current size via realloc and 1705 /* Extend the buffer by twice its current size via realloc and
1675 reset the pointers that pointed into the old block to point to the 1706 reset the pointers that pointed into the old block to point to the
1676 correct places in the new one. If extending the buffer results in it 1707 correct places in the new one. If extending the buffer results in it
1677 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 1708 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
1709 #if __BOUNDED_POINTERS__
1710 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
1711 # define MOVE_BUFFER_POINTER(P) \
1712 (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
1713 # define ELSE_EXTEND_BUFFER_HIGH_BOUND \
1714 else \
1715 { \
1716 SET_HIGH_BOUND (b); \
1717 SET_HIGH_BOUND (begalt); \
1718 if (fixup_alt_jump) \
1719 SET_HIGH_BOUND (fixup_alt_jump); \
1720 if (laststart) \
1721 SET_HIGH_BOUND (laststart); \
1722 if (pending_exact) \
1723 SET_HIGH_BOUND (pending_exact); \
1724 }
1725 #else
1726 # define MOVE_BUFFER_POINTER(P) (P) += incr
1727 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
1728 #endif
1678 #define EXTEND_BUFFER() \ 1729 #define EXTEND_BUFFER() \
1679 do { \ 1730 do { \
1680 unsigned char *old_buffer = bufp->buffer; \ 1731 unsigned char *old_buffer = bufp->buffer; \
1681 if (bufp->allocated == MAX_BUF_SIZE) \ 1732 if (bufp->allocated == MAX_BUF_SIZE) \
1682 return REG_ESIZE; \ 1733 return REG_ESIZE; \
1687 if (bufp->buffer == NULL) \ 1738 if (bufp->buffer == NULL) \
1688 return REG_ESPACE; \ 1739 return REG_ESPACE; \
1689 /* If the buffer moved, move all the pointers into it. */ \ 1740 /* If the buffer moved, move all the pointers into it. */ \
1690 if (old_buffer != bufp->buffer) \ 1741 if (old_buffer != bufp->buffer) \
1691 { \ 1742 { \
1692 b = (b - old_buffer) + bufp->buffer; \ 1743 int incr = bufp->buffer - old_buffer; \
1693 begalt = (begalt - old_buffer) + bufp->buffer; \ 1744 MOVE_BUFFER_POINTER (b); \
1745 MOVE_BUFFER_POINTER (begalt); \
1694 if (fixup_alt_jump) \ 1746 if (fixup_alt_jump) \
1695 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ 1747 MOVE_BUFFER_POINTER (fixup_alt_jump); \
1696 if (laststart) \ 1748 if (laststart) \
1697 laststart = (laststart - old_buffer) + bufp->buffer; \ 1749 MOVE_BUFFER_POINTER (laststart); \
1698 if (pending_exact) \ 1750 if (pending_exact) \
1699 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ 1751 MOVE_BUFFER_POINTER (pending_exact); \
1700 } \ 1752 } \
1753 ELSE_EXTEND_BUFFER_HIGH_BOUND \
1701 } while (0) 1754 } while (0)
1702 1755
1703 1756
1704 /* Since we have one byte reserved for the register number argument to 1757 /* Since we have one byte reserved for the register number argument to
1705 {start,stop}_memory, the maximum number of groups we can report 1758 {start,stop}_memory, the maximum number of groups we can report
1712 1765
1713 1766
1714 /* Macros for the compile stack. */ 1767 /* Macros for the compile stack. */
1715 1768
1716 /* Since offsets can go either forwards or backwards, this type needs to 1769 /* Since offsets can go either forwards or backwards, this type needs to
1717 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ 1770 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
1718 typedef int pattern_offset_t; 1771 /* int may be not enough when sizeof(int) == 2. */
1772 typedef long pattern_offset_t;
1719 1773
1720 typedef struct 1774 typedef struct
1721 { 1775 {
1722 pattern_offset_t begalt_offset; 1776 pattern_offset_t begalt_offset;
1723 pattern_offset_t fixup_alt_jump; 1777 pattern_offset_t fixup_alt_jump;
1737 #define INIT_COMPILE_STACK_SIZE 32 1791 #define INIT_COMPILE_STACK_SIZE 32
1738 1792
1739 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) 1793 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1740 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) 1794 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1741 1795
1742 /* The next available element. */ 1796 /* The next available element. */
1743 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 1797 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1744 1798
1745 1799
1746 /* Structure to manage work area for range table. */ 1800 /* Structure to manage work area for range table. */
1747 struct range_table_work_area 1801 struct range_table_work_area
1808 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits) 1862 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
1809 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) 1863 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1810 1864
1811 1865
1812 /* Set the bit for character C in a list. */ 1866 /* Set the bit for character C in a list. */
1813 #define SET_LIST_BIT(c) \ 1867 #define SET_LIST_BIT(c) \
1814 (b[((unsigned char) (c)) / BYTEWIDTH] \ 1868 (b[((unsigned char) (c)) / BYTEWIDTH] \
1815 |= 1 << (((unsigned char) c) % BYTEWIDTH)) 1869 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1816 1870
1817 1871
1818 /* Get the next unsigned number in the uncompiled pattern. */ 1872 /* Get the next unsigned number in the uncompiled pattern. */
1819 #define GET_UNSIGNED_NUMBER(num) \ 1873 #define GET_UNSIGNED_NUMBER(num) \
1820 do { if (p != pend) \ 1874 do { if (p != pend) \
1821 { \ 1875 { \
1822 PATFETCH (c); \ 1876 PATFETCH (c); \
1823 while (ISDIGIT (c)) \ 1877 while ('0' <= c && c <= '9') \
1824 { \ 1878 { \
1825 if (num < 0) \ 1879 if (num < 0) \
1826 num = 0; \ 1880 num = 0; \
1827 num = num * 10 + c - '0'; \ 1881 num = num * 10 + c - '0'; \
1828 if (p == pend) \ 1882 if (p == pend) \
1844 || STREQ (string, "word") \ 1898 || STREQ (string, "word") \
1845 || STREQ (string, "ascii") || STREQ (string, "nonascii") \ 1899 || STREQ (string, "ascii") || STREQ (string, "nonascii") \
1846 || STREQ (string, "unibyte") || STREQ (string, "multibyte")) 1900 || STREQ (string, "unibyte") || STREQ (string, "multibyte"))
1847 1901
1848 /* QUIT is only used on NTemacs. */ 1902 /* QUIT is only used on NTemacs. */
1849 #if !defined WINDOWSNT || !defined emacs 1903 #if !defined WINDOWSNT || !defined emacs || !defined QUIT
1850 # undef QUIT 1904 # undef QUIT
1851 # define QUIT 1905 # define QUIT
1852 #endif 1906 #endif
1853 1907
1854 #ifndef MATCH_MAY_ALLOCATE 1908 #ifndef MATCH_MAY_ALLOCATE
1862 1916
1863 static fail_stack_type fail_stack; 1917 static fail_stack_type fail_stack;
1864 1918
1865 /* Size with which the following vectors are currently allocated. 1919 /* Size with which the following vectors are currently allocated.
1866 That is so we can make them bigger as needed, 1920 That is so we can make them bigger as needed,
1867 but never make them smaller. */ 1921 but never make them smaller. */
1868 static int regs_allocated_size; 1922 static int regs_allocated_size;
1869 1923
1870 static re_char ** regstart, ** regend; 1924 static re_char ** regstart, ** regend;
1871 static re_char **best_regstart, **best_regend; 1925 static re_char **best_regstart, **best_regend;
1872 1926
1873 /* Make the register vectors big enough for NUM_REGS registers, 1927 /* Make the register vectors big enough for NUM_REGS registers,
1874 but don't make them smaller. */ 1928 but don't make them smaller. */
1875 1929
1876 static 1930 static
1877 regex_grow_registers (num_regs) 1931 regex_grow_registers (num_regs)
1878 int num_regs; 1932 int num_regs;
1879 { 1933 {
1930 } while (0) 1984 } while (0)
1931 1985
1932 static reg_errcode_t 1986 static reg_errcode_t
1933 regex_compile (pattern, size, syntax, bufp) 1987 regex_compile (pattern, size, syntax, bufp)
1934 re_char *pattern; 1988 re_char *pattern;
1935 int size; 1989 size_t size;
1936 reg_syntax_t syntax; 1990 reg_syntax_t syntax;
1937 struct re_pattern_buffer *bufp; 1991 struct re_pattern_buffer *bufp;
1938 { 1992 {
1939 /* We fetch characters from PATTERN here. Even though PATTERN is 1993 /* We fetch characters from PATTERN here. Even though PATTERN is
1940 `char *' (i.e., signed), we declare these variables as unsigned, so 1994 `char *' (i.e., signed), we declare these variables as unsigned, so
2756 case '{': 2810 case '{':
2757 /* If \{ is a literal. */ 2811 /* If \{ is a literal. */
2758 if (!(syntax & RE_INTERVALS) 2812 if (!(syntax & RE_INTERVALS)
2759 /* If we're at `\{' and it's not the open-interval 2813 /* If we're at `\{' and it's not the open-interval
2760 operator. */ 2814 operator. */
2761 || (syntax & RE_NO_BK_BRACES) 2815 || (syntax & RE_NO_BK_BRACES))
2762 /* What is that? -sm */
2763 /* || (p - 2 == pattern && p == pend) */)
2764 goto normal_backslash; 2816 goto normal_backslash;
2765 2817
2766 handle_interval: 2818 handle_interval:
2767 { 2819 {
2768 /* If got here, then the syntax allows intervals. */ 2820 /* If got here, then the syntax allows intervals. */
2771 int lower_bound = 0, upper_bound = -1; 2823 int lower_bound = 0, upper_bound = -1;
2772 2824
2773 beg_interval = p; 2825 beg_interval = p;
2774 2826
2775 if (p == pend) 2827 if (p == pend)
2776 { 2828 FREE_STACK_RETURN (REG_EBRACE);
2777 if (syntax & RE_NO_BK_BRACES)
2778 goto unfetch_interval;
2779 else
2780 FREE_STACK_RETURN (REG_EBRACE);
2781 }
2782 2829
2783 GET_UNSIGNED_NUMBER (lower_bound); 2830 GET_UNSIGNED_NUMBER (lower_bound);
2784 2831
2785 if (c == ',') 2832 if (c == ',')
2786 GET_UNSIGNED_NUMBER (upper_bound); 2833 GET_UNSIGNED_NUMBER (upper_bound);
2788 /* Interval such as `{1}' => match exactly once. */ 2835 /* Interval such as `{1}' => match exactly once. */
2789 upper_bound = lower_bound; 2836 upper_bound = lower_bound;
2790 2837
2791 if (lower_bound < 0 || upper_bound > RE_DUP_MAX 2838 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2792 || (upper_bound >= 0 && lower_bound > upper_bound)) 2839 || (upper_bound >= 0 && lower_bound > upper_bound))
2793 { 2840 FREE_STACK_RETURN (REG_BADBR);
2794 if (syntax & RE_NO_BK_BRACES)
2795 goto unfetch_interval;
2796 else
2797 FREE_STACK_RETURN (REG_BADBR);
2798 }
2799 2841
2800 if (!(syntax & RE_NO_BK_BRACES)) 2842 if (!(syntax & RE_NO_BK_BRACES))
2801 { 2843 {
2802 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); 2844 if (c != '\\')
2845 FREE_STACK_RETURN (REG_BADBR);
2803 2846
2804 PATFETCH (c); 2847 PATFETCH (c);
2805 } 2848 }
2806 2849
2807 if (c != '}') 2850 if (c != '}')
2808 { 2851 FREE_STACK_RETURN (REG_BADBR);
2809 if (syntax & RE_NO_BK_BRACES)
2810 goto unfetch_interval;
2811 else
2812 FREE_STACK_RETURN (REG_BADBR);
2813 }
2814 2852
2815 /* We just parsed a valid interval. */ 2853 /* We just parsed a valid interval. */
2816 2854
2817 /* If it's invalid to have no preceding re. */ 2855 /* If it's invalid to have no preceding re. */
2818 if (!laststart) 2856 if (!laststart)
2972 break; 3010 break;
2973 #endif /* emacs */ 3011 #endif /* emacs */
2974 3012
2975 3013
2976 case 'w': 3014 case 'w':
3015 if (syntax & RE_NO_GNU_OPS)
3016 goto normal_char;
2977 laststart = b; 3017 laststart = b;
2978 BUF_PUSH_2 (syntaxspec, Sword); 3018 BUF_PUSH_2 (syntaxspec, Sword);
2979 break; 3019 break;
2980 3020
2981 3021
2982 case 'W': 3022 case 'W':
3023 if (syntax & RE_NO_GNU_OPS)
3024 goto normal_char;
2983 laststart = b; 3025 laststart = b;
2984 BUF_PUSH_2 (notsyntaxspec, Sword); 3026 BUF_PUSH_2 (notsyntaxspec, Sword);
2985 break; 3027 break;
2986 3028
2987 3029
2988 case '<': 3030 case '<':
3031 if (syntax & RE_NO_GNU_OPS)
3032 goto normal_char;
2989 BUF_PUSH (wordbeg); 3033 BUF_PUSH (wordbeg);
2990 break; 3034 break;
2991 3035
2992 case '>': 3036 case '>':
3037 if (syntax & RE_NO_GNU_OPS)
3038 goto normal_char;
2993 BUF_PUSH (wordend); 3039 BUF_PUSH (wordend);
2994 break; 3040 break;
2995 3041
2996 case 'b': 3042 case 'b':
3043 if (syntax & RE_NO_GNU_OPS)
3044 goto normal_char;
2997 BUF_PUSH (wordbound); 3045 BUF_PUSH (wordbound);
2998 break; 3046 break;
2999 3047
3000 case 'B': 3048 case 'B':
3049 if (syntax & RE_NO_GNU_OPS)
3050 goto normal_char;
3001 BUF_PUSH (notwordbound); 3051 BUF_PUSH (notwordbound);
3002 break; 3052 break;
3003 3053
3004 case '`': 3054 case '`':
3055 if (syntax & RE_NO_GNU_OPS)
3056 goto normal_char;
3005 BUF_PUSH (begbuf); 3057 BUF_PUSH (begbuf);
3006 break; 3058 break;
3007 3059
3008 case '\'': 3060 case '\'':
3061 if (syntax & RE_NO_GNU_OPS)
3062 goto normal_char;
3009 BUF_PUSH (endbuf); 3063 BUF_PUSH (endbuf);
3010 break; 3064 break;
3011 3065
3012 case '1': case '2': case '3': case '4': case '5': 3066 case '1': case '2': case '3': case '4': case '5':
3013 case '6': case '7': case '8': case '9': 3067 case '6': case '7': case '8': case '9':
3018 3072
3019 if (c1 > regnum) 3073 if (c1 > regnum)
3020 FREE_STACK_RETURN (REG_ESUBREG); 3074 FREE_STACK_RETURN (REG_ESUBREG);
3021 3075
3022 /* Can't back reference to a subexpression if inside of it. */ 3076 /* Can't back reference to a subexpression if inside of it. */
3023 if (group_in_compile_stack (compile_stack, c1)) 3077 if (group_in_compile_stack (compile_stack, (regnum_t) c1))
3024 goto normal_char; 3078 goto normal_char;
3025 3079
3026 laststart = b; 3080 laststart = b;
3027 BUF_PUSH_2 (duplicate, c1); 3081 BUF_PUSH_2 (duplicate, c1);
3028 break; 3082 break;
3037 3091
3038 default: 3092 default:
3039 normal_backslash: 3093 normal_backslash:
3040 /* You might think it would be useful for \ to mean 3094 /* You might think it would be useful for \ to mean
3041 not to translate; but if we don't translate it 3095 not to translate; but if we don't translate it
3042 it will never match anything. */ 3096 it will never match anything. */
3043 c = TRANSLATE (c); 3097 c = TRANSLATE (c);
3044 goto normal_char; 3098 goto normal_char;
3045 } 3099 }
3046 break; 3100 break;
3047 3101
3315 it is allocated relocatably. */ 3369 it is allocated relocatably. */
3316 fail_stack_elt_t *failure_stack_ptr; 3370 fail_stack_elt_t *failure_stack_ptr;
3317 #endif 3371 #endif
3318 3372
3319 /* Assume that each path through the pattern can be null until 3373 /* Assume that each path through the pattern can be null until
3320 proven otherwise. We set this false at the bottom of switch 3374 proven otherwise. We set this false at the bottom of switch
3321 statement, to which we get only if a particular path doesn't 3375 statement, to which we get only if a particular path doesn't
3322 match the empty string. */ 3376 match the empty string. */
3323 boolean path_can_be_null = true; 3377 boolean path_can_be_null = true;
3324 3378
3325 /* If all elements for base leading-codes in fastmap is set, this 3379 /* If all elements for base leading-codes in fastmap is set, this
3978 while (d == dend) \ 4032 while (d == dend) \
3979 { \ 4033 { \
3980 /* End of string2 => fail. */ \ 4034 /* End of string2 => fail. */ \
3981 if (dend == end_match_2) \ 4035 if (dend == end_match_2) \
3982 goto fail; \ 4036 goto fail; \
3983 /* End of string1 => advance to string2. */ \ 4037 /* End of string1 => advance to string2. */ \
3984 d = string2; \ 4038 d = string2; \
3985 dend = end_match_2; \ 4039 dend = end_match_2; \
3986 } 4040 }
3987 4041
3988 /* Call before fetching a char with *d if you already checked other limits. 4042 /* Call before fetching a char with *d if you already checked other limits.
4314 struct re_pattern_buffer *bufp; 4368 struct re_pattern_buffer *bufp;
4315 const char *string; 4369 const char *string;
4316 int size, pos; 4370 int size, pos;
4317 struct re_registers *regs; 4371 struct re_registers *regs;
4318 { 4372 {
4319 int result = re_match_2_internal (bufp, NULL, 0, string, size, 4373 int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size,
4320 pos, regs, size); 4374 pos, regs, size);
4321 # if defined C_ALLOCA && !defined REGEX_MALLOC 4375 # if defined C_ALLOCA && !defined REGEX_MALLOC
4322 alloca (0); 4376 alloca (0);
4323 # endif 4377 # endif
4324 return result; 4378 return result;
4360 gl_state.object = re_match_object; 4414 gl_state.object = re_match_object;
4361 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); 4415 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
4362 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); 4416 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4363 #endif 4417 #endif
4364 4418
4365 result = re_match_2_internal (bufp, string1, size1, string2, size2, 4419 result = re_match_2_internal (bufp, (re_char*) string1, size1,
4420 (re_char*) string2, size2,
4366 pos, regs, stop); 4421 pos, regs, stop);
4367 #if defined C_ALLOCA && !defined REGEX_MALLOC 4422 #if defined C_ALLOCA && !defined REGEX_MALLOC
4368 alloca (0); 4423 alloca (0);
4369 #endif 4424 #endif
4370 return result; 4425 return result;
4421 scanning the strings. */ 4476 scanning the strings. */
4422 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 4477 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4423 fail_stack_type fail_stack; 4478 fail_stack_type fail_stack;
4424 #endif 4479 #endif
4425 #ifdef DEBUG 4480 #ifdef DEBUG
4426 static unsigned failure_id = 0;
4427 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 4481 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4428 #endif 4482 #endif
4429 4483
4430 #if defined REL_ALLOC && defined REGEX_MALLOC 4484 #if defined REL_ALLOC && defined REGEX_MALLOC
4431 /* This holds the pointer to the failure stack, when 4485 /* This holds the pointer to the failure stack, when
4434 #endif 4488 #endif
4435 4489
4436 /* We fill all the registers internally, independent of what we 4490 /* We fill all the registers internally, independent of what we
4437 return, for use in backreferences. The number here includes 4491 return, for use in backreferences. The number here includes
4438 an element for register zero. */ 4492 an element for register zero. */
4439 unsigned num_regs = bufp->re_nsub + 1; 4493 size_t num_regs = bufp->re_nsub + 1;
4440 4494
4441 /* Information on the contents of registers. These are pointers into 4495 /* Information on the contents of registers. These are pointers into
4442 the input strings; they record just what was matched (on this 4496 the input strings; they record just what was matched (on this
4443 attempt) by a subexpression part of the pattern, that is, the 4497 attempt) by a subexpression part of the pattern, that is, the
4444 regnum-th regstart pointer points to where in the pattern we began 4498 regnum-th regstart pointer points to where in the pattern we began
4513 4567
4514 /* Initialize subexpression text positions to -1 to mark ones that no 4568 /* Initialize subexpression text positions to -1 to mark ones that no
4515 start_memory/stop_memory has been seen for. Also initialize the 4569 start_memory/stop_memory has been seen for. Also initialize the
4516 register information struct. */ 4570 register information struct. */
4517 for (mcnt = 1; mcnt < num_regs; mcnt++) 4571 for (mcnt = 1; mcnt < num_regs; mcnt++)
4518 regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE; 4572 regstart[mcnt] = regend[mcnt] = NULL;
4519 4573
4520 /* We move `string1' into `string2' if the latter's empty -- but not if 4574 /* We move `string1' into `string2' if the latter's empty -- but not if
4521 `string1' is null. */ 4575 `string1' is null. */
4522 if (size2 == 0 && string1 != NULL) 4576 if (size2 == 0 && string1 != NULL)
4523 { 4577 {
4928 4982
4929 /* In case we need to undo this operation (via backtracking). */ 4983 /* In case we need to undo this operation (via backtracking). */
4930 PUSH_FAILURE_REG ((unsigned int)*p); 4984 PUSH_FAILURE_REG ((unsigned int)*p);
4931 4985
4932 regstart[*p] = d; 4986 regstart[*p] = d;
4933 regend[*p] = REG_UNSET_VALUE; /* probably unnecessary. -sm */ 4987 regend[*p] = NULL; /* probably unnecessary. -sm */
4934 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 4988 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4935 4989
4936 /* Move past the register number and inner group count. */ 4990 /* Move past the register number and inner group count. */
4937 p += 1; 4991 p += 1;
4938 break; 4992 break;
5021 5075
5022 /* Compare that many; failure if mismatch, else move 5076 /* Compare that many; failure if mismatch, else move
5023 past them. */ 5077 past them. */
5024 if (RE_TRANSLATE_P (translate) 5078 if (RE_TRANSLATE_P (translate)
5025 ? bcmp_translate (d, d2, mcnt, translate, multibyte) 5079 ? bcmp_translate (d, d2, mcnt, translate, multibyte)
5026 : bcmp (d, d2, mcnt)) 5080 : memcmp (d, d2, mcnt))
5027 { 5081 {
5028 d = dfail; 5082 d = dfail;
5029 goto fail; 5083 goto fail;
5030 } 5084 }
5031 d += mcnt, d2 += mcnt; 5085 d += mcnt, d2 += mcnt;
5235 if (mcnt != 0) 5289 if (mcnt != 0)
5236 { 5290 {
5237 mcnt--; 5291 mcnt--;
5238 p += 2; 5292 p += 2;
5239 PUSH_FAILURE_COUNT (p); 5293 PUSH_FAILURE_COUNT (p);
5294 DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt);
5240 STORE_NUMBER_AND_INCR (p, mcnt); 5295 STORE_NUMBER_AND_INCR (p, mcnt);
5241 DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt);
5242 } 5296 }
5243 else 5297 else
5244 /* The two bytes encoding mcnt == 0 are two no_op opcodes. */ 5298 /* The two bytes encoding mcnt == 0 are two no_op opcodes. */
5245 goto on_failure; 5299 goto on_failure;
5246 break; 5300 break;
5538 { 5592 {
5539 register re_char *p1 = s1, *p2 = s2; 5593 register re_char *p1 = s1, *p2 = s2;
5540 re_char *p1_end = s1 + len; 5594 re_char *p1_end = s1 + len;
5541 re_char *p2_end = s2 + len; 5595 re_char *p2_end = s2 + len;
5542 5596
5543 while (p1 != p1_end && p2 != p2_end) 5597 /* FIXME: Checking both p1 and p2 presumes that the two strings might have
5598 different lengths, but relying on a single `len' would break this. -sm */
5599 while (p1 < p1_end && p2 < p2_end)
5544 { 5600 {
5545 int p1_charlen, p2_charlen; 5601 int p1_charlen, p2_charlen;
5546 int p1_ch, p2_ch; 5602 int p1_ch, p2_ch;
5547 5603
5548 p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); 5604 p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
5590 bufp->no_sub = 0; 5646 bufp->no_sub = 0;
5591 5647
5592 /* Match anchors at newline. */ 5648 /* Match anchors at newline. */
5593 bufp->newline_anchor = 1; 5649 bufp->newline_anchor = 1;
5594 5650
5595 ret = regex_compile (pattern, length, re_syntax_options, bufp); 5651 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp);
5596 5652
5597 if (!ret) 5653 if (!ret)
5598 return NULL; 5654 return NULL;
5599 return gettext (re_error_msgid[(int) ret]); 5655 return gettext (re_error_msgid[(int) ret]);
5600 } 5656 }
5713 regex_t *preg; 5769 regex_t *preg;
5714 const char *pattern; 5770 const char *pattern;
5715 int cflags; 5771 int cflags;
5716 { 5772 {
5717 reg_errcode_t ret; 5773 reg_errcode_t ret;
5718 unsigned syntax 5774 reg_syntax_t syntax
5719 = (cflags & REG_EXTENDED) ? 5775 = (cflags & REG_EXTENDED) ?
5720 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; 5776 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5721 5777
5722 /* regex_compile will allocate the space for the compiled pattern. */ 5778 /* regex_compile will allocate the space for the compiled pattern. */
5723 preg->buffer = 0; 5779 preg->buffer = 0;
5740 if (preg->translate == NULL) 5796 if (preg->translate == NULL)
5741 return (int) REG_ESPACE; 5797 return (int) REG_ESPACE;
5742 5798
5743 /* Map uppercase characters to corresponding lowercase ones. */ 5799 /* Map uppercase characters to corresponding lowercase ones. */
5744 for (i = 0; i < CHAR_SET_SIZE; i++) 5800 for (i = 0; i < CHAR_SET_SIZE; i++)
5745 preg->translate[i] = ISUPPER (i) ? tolower (i) : i; 5801 preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
5746 } 5802 }
5747 else 5803 else
5748 preg->translate = NULL; 5804 preg->translate = NULL;
5749 5805
5750 /* If REG_NEWLINE is set, newlines are treated differently. */ 5806 /* If REG_NEWLINE is set, newlines are treated differently. */
5760 5816
5761 preg->no_sub = !!(cflags & REG_NOSUB); 5817 preg->no_sub = !!(cflags & REG_NOSUB);
5762 5818
5763 /* POSIX says a null character in the pattern terminates it, so we 5819 /* POSIX says a null character in the pattern terminates it, so we
5764 can use strlen here in compiling the pattern. */ 5820 can use strlen here in compiling the pattern. */
5765 ret = regex_compile (pattern, strlen (pattern), syntax, preg); 5821 ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg);
5766 5822
5767 /* POSIX doesn't distinguish between an unmatched open-group and an 5823 /* POSIX doesn't distinguish between an unmatched open-group and an
5768 unmatched close-group: both are REG_EPAREN. */ 5824 unmatched close-group: both are REG_EPAREN. */
5769 if (ret == REG_ERPAREN) ret = REG_EPAREN; 5825 if (ret == REG_ERPAREN) ret = REG_EPAREN;
5770 5826
5811 private_preg.regs_allocated = REGS_FIXED; 5867 private_preg.regs_allocated = REGS_FIXED;
5812 5868
5813 if (want_reg_info) 5869 if (want_reg_info)
5814 { 5870 {
5815 regs.num_regs = nmatch; 5871 regs.num_regs = nmatch;
5816 regs.start = TALLOC (nmatch, regoff_t); 5872 regs.start = TALLOC (nmatch * 2, regoff_t);
5817 regs.end = TALLOC (nmatch, regoff_t); 5873 if (regs.start == NULL)
5818 if (regs.start == NULL || regs.end == NULL)
5819 return (int) REG_NOMATCH; 5874 return (int) REG_NOMATCH;
5875 regs.end = regs.start + nmatch;
5820 } 5876 }
5821 5877
5822 /* Perform the searching operation. */ 5878 /* Perform the searching operation. */
5823 ret = re_search (&private_preg, string, len, 5879 ret = re_search (&private_preg, string, len,
5824 /* start: */ 0, /* range: */ len, 5880 /* start: */ 0, /* range: */ len,
5838 } 5894 }
5839 } 5895 }
5840 5896
5841 /* If we needed the temporary register info, free the space now. */ 5897 /* If we needed the temporary register info, free the space now. */
5842 free (regs.start); 5898 free (regs.start);
5843 free (regs.end);
5844 } 5899 }
5845 5900
5846 /* We want zero return to mean success, unlike `re_search'. */ 5901 /* We want zero return to mean success, unlike `re_search'. */
5847 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; 5902 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5848 } 5903 }