Mercurial > emacs
comparison src/regex.c @ 31299:34c25566aab3
Merge some changes from GNU libc. Add prototypes.
(bcopy, bcmp, REGEX_REALLOCATE, re_match_2_internal):
Use memcmp and memcpy instead of bcopy and bcmp.
(init_syntax_once): Use ISALNUM.
(PUSH_FAILURE_POINT, re_match_2_internal): Remove failure_id.
(REG_UNSET_VALUE): Remove. Use NULL instead.
(REG_UNSET, re_match_2_internal): Use NULL.
(SET_HIGH_BOUND, MOVE_BUFFER_POINTER, ELSE_EXTEND_BUFFER_HIGH_BOUND):
New macros.
(EXTEND_BUFFER): Use them (to work with BOUNDED_POINTERS).
(GET_UNSIGNED_NUMBER): Don't use ISDIGIT.
(regex_compile): In handle_interval, return an error rather than try to
unfetch the interval if we can't find the closing brace.
Obey the RE_NO_GNU_OPS syntax bit.
(TOLOWER): New macro.
(regcomp): Use it.
(regexec): Allocate regs.start and regs.end as one block.
author | Stefan Monnier <monnier@iro.umontreal.ca> |
---|---|
date | Wed, 30 Aug 2000 18:31:17 +0000 |
parents | 9efb8adfefa4 |
children | e6b19a60e035 |
comparison
equal
deleted
inserted
replaced
31298:820483a506d0 | 31299:34c25566aab3 |
---|---|
1 /* Extended regular expression matching and search library, version | 1 /* Extended regular expression matching and search library, version |
2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the | 2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the |
3 internationalization features.) | 3 internationalization features.) |
4 | 4 |
5 Copyright (C) 1993,94,95,96,97,98,2000 Free Software Foundation, Inc. | 5 Copyright (C) 1993,94,95,96,97,98,99,2000 Free Software Foundation, Inc. |
6 | 6 |
7 This program is free software; you can redistribute it and/or modify | 7 This program is free software; you can redistribute it and/or modify |
8 it under the terms of the GNU General Public License as published by | 8 it under the terms of the GNU General Public License as published by |
9 the Free Software Foundation; either version 2, or (at your option) | 9 the Free Software Foundation; either version 2, or (at your option) |
10 any later version. | 10 any later version. |
23 - structure the opcode space into opcode+flag. | 23 - structure the opcode space into opcode+flag. |
24 - merge with glibc's regex.[ch]. | 24 - merge with glibc's regex.[ch]. |
25 - replace succeed_n + jump_n with a combined operation so that the counter | 25 - replace succeed_n + jump_n with a combined operation so that the counter |
26 can simply be decremented when popping the failure_point without having | 26 can simply be decremented when popping the failure_point without having |
27 to stack up failure_count entries. | 27 to stack up failure_count entries. |
28 - get rid of `newline_anchor'. | |
28 */ | 29 */ |
29 | 30 |
30 /* AIX requires this to be the first thing in the file. */ | 31 /* AIX requires this to be the first thing in the file. */ |
31 #if defined _AIX && !defined REGEX_MALLOC | 32 #if defined _AIX && !defined REGEX_MALLOC |
32 #pragma alloca | 33 #pragma alloca |
37 | 38 |
38 #ifdef HAVE_CONFIG_H | 39 #ifdef HAVE_CONFIG_H |
39 # include <config.h> | 40 # include <config.h> |
40 #endif | 41 #endif |
41 | 42 |
42 /* We need this for `regex.h', and perhaps for the Emacs include files. */ | 43 #if defined STDC_HEADERS && !defined emacs |
43 #include <sys/types.h> | 44 # include <stddef.h> |
44 | 45 #else |
45 /* This is for other GNU distributions with internationalized messages. */ | 46 /* We need this for `regex.h', and perhaps for the Emacs include files. */ |
47 # include <sys/types.h> | |
48 #endif | |
49 | |
50 /* This is for other GNU distributions with internationalized messages. */ | |
46 #if HAVE_LIBINTL_H || defined _LIBC | 51 #if HAVE_LIBINTL_H || defined _LIBC |
47 # include <libintl.h> | 52 # include <libintl.h> |
48 #else | 53 #else |
49 # define gettext(msgid) (msgid) | 54 # define gettext(msgid) (msgid) |
50 #endif | 55 #endif |
113 char *malloc (); | 118 char *malloc (); |
114 char *realloc (); | 119 char *realloc (); |
115 # endif | 120 # endif |
116 | 121 |
117 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. | 122 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. |
118 If nothing else has been done, use the method below. */ | 123 If nothing else has been done, use the method below. */ |
119 # ifdef INHIBIT_STRING_HEADER | 124 # ifdef INHIBIT_STRING_HEADER |
120 # if !(defined HAVE_BZERO && defined HAVE_BCOPY) | 125 # if !(defined HAVE_BZERO && defined HAVE_BCOPY) |
121 # if !defined bzero && !defined bcopy | 126 # if !defined bzero && !defined bcopy |
122 # undef INHIBIT_STRING_HEADER | 127 # undef INHIBIT_STRING_HEADER |
123 # endif | 128 # endif |
124 # endif | 129 # endif |
125 # endif | 130 # endif |
126 | 131 |
127 /* This is the normal way of making sure we have a bcopy and a bzero. | 132 /* This is the normal way of making sure we have memcpy, memcmp and bzero. |
128 This is used in most programs--a few other programs avoid this | 133 This is used in most programs--a few other programs avoid this |
129 by defining INHIBIT_STRING_HEADER. */ | 134 by defining INHIBIT_STRING_HEADER. */ |
130 # ifndef INHIBIT_STRING_HEADER | 135 # ifndef INHIBIT_STRING_HEADER |
131 # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC | 136 # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC |
132 # include <string.h> | 137 # include <string.h> |
133 # ifndef bcmp | |
134 # define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) | |
135 # endif | |
136 # ifndef bcopy | |
137 # define bcopy(s, d, n) memcpy ((d), (s), (n)) | |
138 # endif | |
139 # ifndef bzero | 138 # ifndef bzero |
140 # define bzero(s, n) memset ((s), 0, (n)) | 139 # ifndef _LIBC |
140 # define bzero(s, n) (memset (s, '\0', n), (s)) | |
141 # else | |
142 # define bzero(s, n) __bzero (s, n) | |
143 # endif | |
141 # endif | 144 # endif |
142 # else | 145 # else |
143 # include <strings.h> | 146 # include <strings.h> |
147 # ifndef memcmp | |
148 # define memcmp(s1, s2, n) bcmp (s1, s2, n) | |
149 # endif | |
150 # ifndef memcpy | |
151 # define memcpy(d, s, n) (bcopy (s, d, n), (d)) | |
152 # endif | |
144 # endif | 153 # endif |
145 # endif | 154 # endif |
146 | 155 |
147 /* Define the syntax stuff for \<, \>, etc. */ | 156 /* Define the syntax stuff for \<, \>, etc. */ |
148 | 157 |
152 # ifdef SWITCH_ENUM_BUG | 161 # ifdef SWITCH_ENUM_BUG |
153 # define SWITCH_ENUM_CAST(x) ((int)(x)) | 162 # define SWITCH_ENUM_CAST(x) ((int)(x)) |
154 # else | 163 # else |
155 # define SWITCH_ENUM_CAST(x) (x) | 164 # define SWITCH_ENUM_CAST(x) (x) |
156 # endif | 165 # endif |
157 | |
158 # define SYNTAX(c) re_syntax_table[c] | |
159 | 166 |
160 /* Dummy macros for non-Emacs environments. */ | 167 /* Dummy macros for non-Emacs environments. */ |
161 # define BASE_LEADING_CODE_P(c) (0) | 168 # define BASE_LEADING_CODE_P(c) (0) |
162 # define CHAR_CHARSET(c) 0 | 169 # define CHAR_CHARSET(c) 0 |
163 # define CHARSET_LEADING_CODE_BASE(c) 0 | 170 # define CHARSET_LEADING_CODE_BASE(c) 0 |
233 # define ISLOWER(c) (LOWERCASEP (c)) | 240 # define ISLOWER(c) (LOWERCASEP (c)) |
234 | 241 |
235 # define ISPUNCT(c) (IS_REAL_ASCII (c) \ | 242 # define ISPUNCT(c) (IS_REAL_ASCII (c) \ |
236 ? ((c) > ' ' && (c) < 0177 \ | 243 ? ((c) > ' ' && (c) < 0177 \ |
237 && !(((c) >= 'a' && (c) <= 'z') \ | 244 && !(((c) >= 'a' && (c) <= 'z') \ |
238 || ((c) >= 'A' && (c) <= 'Z') \ | 245 || ((c) >= 'A' && (c) <= 'Z') \ |
239 || ((c) >= '0' && (c) <= '9'))) \ | 246 || ((c) >= '0' && (c) <= '9'))) \ |
240 : SYNTAX (c) != Sword) | 247 : SYNTAX (c) != Sword) |
241 | 248 |
242 # define ISSPACE(c) (SYNTAX (c) == Swhitespace) | 249 # define ISSPACE(c) (SYNTAX (c) == Swhitespace) |
243 | 250 |
244 # define ISUPPER(c) (UPPERCASEP (c)) | 251 # define ISUPPER(c) (UPPERCASEP (c)) |
250 /* Jim Meyering writes: | 257 /* Jim Meyering writes: |
251 | 258 |
252 "... Some ctype macros are valid only for character codes that | 259 "... Some ctype macros are valid only for character codes that |
253 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when | 260 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when |
254 using /bin/cc or gcc but without giving an ansi option). So, all | 261 using /bin/cc or gcc but without giving an ansi option). So, all |
255 ctype uses should be through macros like ISPRINT... If | 262 ctype uses should be through macros like ISPRINT... If |
256 STDC_HEADERS is defined, then autoconf has verified that the ctype | 263 STDC_HEADERS is defined, then autoconf has verified that the ctype |
257 macros don't need to be guarded with references to isascii. ... | 264 macros don't need to be guarded with references to isascii. ... |
258 Defining isascii to 1 should let any compiler worth its salt | 265 Defining isascii to 1 should let any compiler worth its salt |
259 eliminate the && through constant folding." */ | 266 eliminate the && through constant folding." |
260 | 267 Solaris defines some of these symbols so we must undefine them first. */ |
268 | |
269 # undef ISASCII | |
261 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) | 270 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) |
262 # define ISASCII(c) 1 | 271 # define ISASCII(c) 1 |
263 # else | 272 # else |
264 # define ISASCII(c) isascii(c) | 273 # define ISASCII(c) isascii(c) |
265 # endif | 274 # endif |
279 # define ISGRAPH(c) (ISASCII (c) && isgraph (c)) | 288 # define ISGRAPH(c) (ISASCII (c) && isgraph (c)) |
280 # else | 289 # else |
281 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) | 290 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) |
282 # endif | 291 # endif |
283 | 292 |
293 # undef ISPRINT | |
284 # define ISPRINT(c) (ISASCII (c) && isprint (c)) | 294 # define ISPRINT(c) (ISASCII (c) && isprint (c)) |
285 # define ISDIGIT(c) (ISASCII (c) && isdigit (c)) | 295 # define ISDIGIT(c) (ISASCII (c) && isdigit (c)) |
286 # define ISALNUM(c) (ISASCII (c) && isalnum (c)) | 296 # define ISALNUM(c) (ISASCII (c) && isalnum (c)) |
287 # define ISALPHA(c) (ISASCII (c) && isalpha (c)) | 297 # define ISALPHA(c) (ISASCII (c) && isalpha (c)) |
288 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) | 298 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) |
292 # define ISUPPER(c) (ISASCII (c) && isupper (c)) | 302 # define ISUPPER(c) (ISASCII (c) && isupper (c)) |
293 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) | 303 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) |
294 | 304 |
295 # define ISWORD(c) ISALPHA(c) | 305 # define ISWORD(c) ISALPHA(c) |
296 | 306 |
307 # ifdef _tolower | |
308 # define TOLOWER(c) _tolower(c) | |
309 # else | |
310 # define TOLOWER(c) tolower(c) | |
311 # endif | |
312 | |
313 /* How many characters in the character set. */ | |
314 # define CHAR_SET_SIZE 256 | |
315 | |
297 # ifdef SYNTAX_TABLE | 316 # ifdef SYNTAX_TABLE |
298 | 317 |
299 extern char *re_syntax_table; | 318 extern char *re_syntax_table; |
300 | 319 |
301 # else /* not SYNTAX_TABLE */ | 320 # else /* not SYNTAX_TABLE */ |
302 | |
303 /* How many characters in the character set. */ | |
304 # define CHAR_SET_SIZE 256 | |
305 | 321 |
306 static char re_syntax_table[CHAR_SET_SIZE]; | 322 static char re_syntax_table[CHAR_SET_SIZE]; |
307 | 323 |
308 static void | 324 static void |
309 init_syntax_once () | 325 init_syntax_once () |
314 if (done) | 330 if (done) |
315 return; | 331 return; |
316 | 332 |
317 bzero (re_syntax_table, sizeof re_syntax_table); | 333 bzero (re_syntax_table, sizeof re_syntax_table); |
318 | 334 |
319 for (c = 'a'; c <= 'z'; c++) | 335 for (c = 0; c < CHAR_SET_SIZE; ++c) |
320 re_syntax_table[c] = Sword; | 336 if (ISALNUM (c)) |
321 | 337 re_syntax_table[c] = Sword; |
322 for (c = 'A'; c <= 'Z'; c++) | |
323 re_syntax_table[c] = Sword; | |
324 | |
325 for (c = '0'; c <= '9'; c++) | |
326 re_syntax_table[c] = Sword; | |
327 | 338 |
328 re_syntax_table['_'] = Sword; | 339 re_syntax_table['_'] = Sword; |
329 | 340 |
330 done = 1; | 341 done = 1; |
331 } | 342 } |
332 | 343 |
333 # endif /* not SYNTAX_TABLE */ | 344 # endif /* not SYNTAX_TABLE */ |
345 | |
346 # define SYNTAX(c) re_syntax_table[(c)] | |
334 | 347 |
335 #endif /* not emacs */ | 348 #endif /* not emacs */ |
336 | 349 |
337 #ifndef NULL | 350 #ifndef NULL |
338 # define NULL (void *)0 | 351 # define NULL (void *)0 |
339 #endif | 352 #endif |
340 | 353 |
341 /* We remove any previous definition of `SIGN_EXTEND_CHAR', | 354 /* We remove any previous definition of `SIGN_EXTEND_CHAR', |
342 since ours (we hope) works properly with all combinations of | 355 since ours (we hope) works properly with all combinations of |
343 machines, compilers, `char' and `unsigned char' argument types. | 356 machines, compilers, `char' and `unsigned char' argument types. |
344 (Per Bothner suggested the basic approach.) */ | 357 (Per Bothner suggested the basic approach.) */ |
345 #undef SIGN_EXTEND_CHAR | 358 #undef SIGN_EXTEND_CHAR |
346 #if __STDC__ | 359 #if __STDC__ |
347 # define SIGN_EXTEND_CHAR(c) ((signed char) (c)) | 360 # define SIGN_EXTEND_CHAR(c) ((signed char) (c)) |
348 #else /* not __STDC__ */ | 361 #else /* not __STDC__ */ |
349 /* As in Harbison and Steele. */ | 362 /* As in Harbison and Steele. */ |
385 # define REGEX_ALLOCATE alloca | 398 # define REGEX_ALLOCATE alloca |
386 | 399 |
387 /* Assumes a `char *destination' variable. */ | 400 /* Assumes a `char *destination' variable. */ |
388 # define REGEX_REALLOCATE(source, osize, nsize) \ | 401 # define REGEX_REALLOCATE(source, osize, nsize) \ |
389 (destination = (char *) alloca (nsize), \ | 402 (destination = (char *) alloca (nsize), \ |
390 bcopy (source, destination, osize), \ | 403 memcpy (destination, source, osize)) |
391 destination) | |
392 | 404 |
393 /* No need to do anything to free, after alloca. */ | 405 /* No need to do anything to free, after alloca. */ |
394 # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ | 406 # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ |
395 | 407 |
396 #endif /* not REGEX_MALLOC */ | 408 #endif /* not REGEX_MALLOC */ |
438 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) | 450 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) |
439 #define RETALLOC_IF(addr, n, t) \ | 451 #define RETALLOC_IF(addr, n, t) \ |
440 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) | 452 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) |
441 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) | 453 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) |
442 | 454 |
443 #define BYTEWIDTH 8 /* In bits. */ | 455 #define BYTEWIDTH 8 /* In bits. */ |
444 | 456 |
445 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) | 457 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) |
446 | 458 |
447 #undef MAX | 459 #undef MAX |
448 #undef MIN | 460 #undef MIN |
454 | 466 |
455 typedef char boolean; | 467 typedef char boolean; |
456 #define false 0 | 468 #define false 0 |
457 #define true 1 | 469 #define true 1 |
458 | 470 |
459 static int re_match_2_internal (); | 471 static int re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp, |
472 re_char *string1, int size1, | |
473 re_char *string2, int size2, | |
474 int pos, | |
475 struct re_registers *regs, | |
476 int stop)); | |
460 | 477 |
461 /* These are the command codes that appear in compiled regular | 478 /* These are the command codes that appear in compiled regular |
462 expressions. Some opcodes are followed by argument bytes. A | 479 expressions. Some opcodes are followed by argument bytes. A |
463 command code can specify any interpretation whatsoever for its | 480 command code can specify any interpretation whatsoever for its |
464 arguments. Zero bytes may appear in the compiled regular expression. */ | 481 arguments. Zero bytes may appear in the compiled regular expression. */ |
465 | 482 |
466 typedef enum | 483 typedef enum |
467 { | 484 { |
468 no_op = 0, | 485 no_op = 0, |
469 | 486 |
470 /* Succeed right away--no more backtracking. */ | 487 /* Succeed right away--no more backtracking. */ |
471 succeed, | 488 succeed, |
472 | 489 |
473 /* Followed by one byte giving n, then by n literal bytes. */ | 490 /* Followed by one byte giving n, then by n literal bytes. */ |
474 exactn, | 491 exactn, |
475 | 492 |
491 pairs, each 2 multibyte characters, | 508 pairs, each 2 multibyte characters, |
492 each multibyte character represented as 3 bytes. */ | 509 each multibyte character represented as 3 bytes. */ |
493 charset, | 510 charset, |
494 | 511 |
495 /* Same parameters as charset, but match any character that is | 512 /* Same parameters as charset, but match any character that is |
496 not one of those specified. */ | 513 not one of those specified. */ |
497 charset_not, | 514 charset_not, |
498 | 515 |
499 /* Start remembering the text that is matched, for storing in a | 516 /* Start remembering the text that is matched, for storing in a |
500 register. Followed by one byte with the register number, in | 517 register. Followed by one byte with the register number, in |
501 the range 0 to one less than the pattern buffer's re_nsub | 518 the range 0 to one less than the pattern buffer's re_nsub |
507 number, in the range 0 to one less than `re_nsub' in the | 524 number, in the range 0 to one less than `re_nsub' in the |
508 pattern buffer. */ | 525 pattern buffer. */ |
509 stop_memory, | 526 stop_memory, |
510 | 527 |
511 /* Match a duplicate of something remembered. Followed by one | 528 /* Match a duplicate of something remembered. Followed by one |
512 byte containing the register number. */ | 529 byte containing the register number. */ |
513 duplicate, | 530 duplicate, |
514 | 531 |
515 /* Fail unless at beginning of line. */ | 532 /* Fail unless at beginning of line. */ |
516 begline, | 533 begline, |
517 | 534 |
518 /* Fail unless at end of line. */ | 535 /* Fail unless at end of line. */ |
519 endline, | 536 endline, |
520 | 537 |
521 /* Succeeds if at beginning of buffer (if emacs) or at beginning | 538 /* Succeeds if at beginning of buffer (if emacs) or at beginning |
522 of string to be matched (if not). */ | 539 of string to be matched (if not). */ |
523 begbuf, | 540 begbuf, |
628 (destination) = *(source) & 0377; \ | 645 (destination) = *(source) & 0377; \ |
629 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ | 646 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ |
630 } while (0) | 647 } while (0) |
631 | 648 |
632 #ifdef DEBUG | 649 #ifdef DEBUG |
650 static void extract_number _RE_ARGS ((int *dest, re_char *source)); | |
633 static void | 651 static void |
634 extract_number (dest, source) | 652 extract_number (dest, source) |
635 int *dest; | 653 int *dest; |
636 unsigned char *source; | 654 unsigned char *source; |
637 { | 655 { |
638 int temp = SIGN_EXTEND_CHAR (*(source + 1)); | 656 int temp = SIGN_EXTEND_CHAR (*(source + 1)); |
639 *dest = *source & 0377; | 657 *dest = *source & 0377; |
640 *dest += temp << 8; | 658 *dest += temp << 8; |
641 } | 659 } |
642 | 660 |
643 # ifndef EXTRACT_MACROS /* To debug the macros. */ | 661 # ifndef EXTRACT_MACROS /* To debug the macros. */ |
644 # undef EXTRACT_NUMBER | 662 # undef EXTRACT_NUMBER |
645 # define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) | 663 # define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) |
646 # endif /* not EXTRACT_MACROS */ | 664 # endif /* not EXTRACT_MACROS */ |
647 | 665 |
648 #endif /* DEBUG */ | 666 #endif /* DEBUG */ |
655 EXTRACT_NUMBER (destination, source); \ | 673 EXTRACT_NUMBER (destination, source); \ |
656 (source) += 2; \ | 674 (source) += 2; \ |
657 } while (0) | 675 } while (0) |
658 | 676 |
659 #ifdef DEBUG | 677 #ifdef DEBUG |
678 static void extract_number_and_incr _RE_ARGS ((int *destination, | |
679 re_char **source)); | |
660 static void | 680 static void |
661 extract_number_and_incr (destination, source) | 681 extract_number_and_incr (destination, source) |
662 int *destination; | 682 int *destination; |
663 unsigned char **source; | 683 unsigned char **source; |
664 { | 684 { |
769 | 789 |
770 /* If DEBUG is defined, Regex prints many voluminous messages about what | 790 /* If DEBUG is defined, Regex prints many voluminous messages about what |
771 it is doing (if the variable `debug' is nonzero). If linked with the | 791 it is doing (if the variable `debug' is nonzero). If linked with the |
772 main program in `iregex.c', you can enter patterns and strings | 792 main program in `iregex.c', you can enter patterns and strings |
773 interactively. And if linked with the main program in `main.c' and | 793 interactively. And if linked with the main program in `main.c' and |
774 the other test files, you can run the already-written tests. */ | 794 the other test files, you can run the already-written tests. */ |
775 | 795 |
776 #ifdef DEBUG | 796 #ifdef DEBUG |
777 | 797 |
778 /* We use standard I/O for debugging. */ | 798 /* We use standard I/O for debugging. */ |
779 # include <stdio.h> | 799 # include <stdio.h> |
1074 struct re_pattern_buffer *bufp; | 1094 struct re_pattern_buffer *bufp; |
1075 { | 1095 { |
1076 unsigned char *buffer = bufp->buffer; | 1096 unsigned char *buffer = bufp->buffer; |
1077 | 1097 |
1078 print_partial_compiled_pattern (buffer, buffer + bufp->used); | 1098 print_partial_compiled_pattern (buffer, buffer + bufp->used); |
1079 printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, bufp->allocated); | 1099 printf ("%ld bytes used/%ld bytes allocated.\n", |
1100 bufp->used, bufp->allocated); | |
1080 | 1101 |
1081 if (bufp->fastmap_accurate && bufp->fastmap) | 1102 if (bufp->fastmap_accurate && bufp->fastmap) |
1082 { | 1103 { |
1083 printf ("fastmap: "); | 1104 printf ("fastmap: "); |
1084 print_fastmap (bufp->fastmap); | 1105 print_fastmap (bufp->fastmap); |
1089 printf ("can_be_null: %d\t", bufp->can_be_null); | 1110 printf ("can_be_null: %d\t", bufp->can_be_null); |
1090 printf ("newline_anchor: %d\n", bufp->newline_anchor); | 1111 printf ("newline_anchor: %d\n", bufp->newline_anchor); |
1091 printf ("no_sub: %d\t", bufp->no_sub); | 1112 printf ("no_sub: %d\t", bufp->no_sub); |
1092 printf ("not_bol: %d\t", bufp->not_bol); | 1113 printf ("not_bol: %d\t", bufp->not_bol); |
1093 printf ("not_eol: %d\t", bufp->not_eol); | 1114 printf ("not_eol: %d\t", bufp->not_eol); |
1094 printf ("syntax: %d\n", bufp->syntax); | 1115 printf ("syntax: %lx\n", bufp->syntax); |
1095 fflush (stdout); | 1116 fflush (stdout); |
1096 /* Perhaps we should print the translate table? */ | 1117 /* Perhaps we should print the translate table? */ |
1097 } | 1118 } |
1098 | 1119 |
1099 | 1120 |
1103 re_char *string1; | 1124 re_char *string1; |
1104 re_char *string2; | 1125 re_char *string2; |
1105 int size1; | 1126 int size1; |
1106 int size2; | 1127 int size2; |
1107 { | 1128 { |
1108 unsigned this_char; | 1129 int this_char; |
1109 | 1130 |
1110 if (where == NULL) | 1131 if (where == NULL) |
1111 printf ("(null)"); | 1132 printf ("(null)"); |
1112 else | 1133 else |
1113 { | 1134 { |
1150 /* Specify the precise syntax of regexps for compilation. This provides | 1171 /* Specify the precise syntax of regexps for compilation. This provides |
1151 for compatibility for various utilities which historically have | 1172 for compatibility for various utilities which historically have |
1152 different, incompatible syntaxes. | 1173 different, incompatible syntaxes. |
1153 | 1174 |
1154 The argument SYNTAX is a bit mask comprised of the various bits | 1175 The argument SYNTAX is a bit mask comprised of the various bits |
1155 defined in regex.h. We return the old syntax. */ | 1176 defined in regex.h. We return the old syntax. */ |
1156 | 1177 |
1157 reg_syntax_t | 1178 reg_syntax_t |
1158 re_set_syntax (syntax) | 1179 re_set_syntax (syntax) |
1159 reg_syntax_t syntax; | 1180 reg_syntax_t syntax; |
1160 { | 1181 { |
1163 re_syntax_options = syntax; | 1184 re_syntax_options = syntax; |
1164 return ret; | 1185 return ret; |
1165 } | 1186 } |
1166 | 1187 |
1167 /* This table gives an error message for each of the error codes listed | 1188 /* This table gives an error message for each of the error codes listed |
1168 in regex.h. Obviously the order here has to be same as there. | 1189 in regex.h. Obviously the order here has to be same as there. |
1169 POSIX doesn't require that we do anything for REG_NOERROR, | 1190 POSIX doesn't require that we do anything for REG_NOERROR, |
1170 but why not be nice? */ | 1191 but why not be nice? */ |
1171 | 1192 |
1172 static const char *re_error_msgid[] = | 1193 static const char *re_error_msgid[] = |
1173 { | 1194 { |
1174 gettext_noop ("Success"), /* REG_NOERROR */ | 1195 gettext_noop ("Success"), /* REG_NOERROR */ |
1175 gettext_noop ("No match"), /* REG_NOMATCH */ | 1196 gettext_noop ("No match"), /* REG_NOMATCH */ |
1188 gettext_noop ("Premature end of regular expression"), /* REG_EEND */ | 1209 gettext_noop ("Premature end of regular expression"), /* REG_EEND */ |
1189 gettext_noop ("Regular expression too big"), /* REG_ESIZE */ | 1210 gettext_noop ("Regular expression too big"), /* REG_ESIZE */ |
1190 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ | 1211 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ |
1191 }; | 1212 }; |
1192 | 1213 |
1193 /* Avoiding alloca during matching, to placate r_alloc. */ | 1214 /* Avoiding alloca during matching, to placate r_alloc. */ |
1194 | 1215 |
1195 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the | 1216 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the |
1196 searching and matching functions should not call alloca. On some | 1217 searching and matching functions should not call alloca. On some |
1197 systems, alloca is implemented in terms of malloc, and if we're | 1218 systems, alloca is implemented in terms of malloc, and if we're |
1198 using the relocating allocator routines, then malloc could cause a | 1219 using the relocating allocator routines, then malloc could cause a |
1220 | 1241 |
1221 /* The match routines may not allocate if (1) they would do it with malloc | 1242 /* The match routines may not allocate if (1) they would do it with malloc |
1222 and (2) it's not safe for them to use malloc. | 1243 and (2) it's not safe for them to use malloc. |
1223 Note that if REL_ALLOC is defined, matching would not use malloc for the | 1244 Note that if REL_ALLOC is defined, matching would not use malloc for the |
1224 failure stack, but we would still use it for the register vectors; | 1245 failure stack, but we would still use it for the register vectors; |
1225 so REL_ALLOC should not affect this. */ | 1246 so REL_ALLOC should not affect this. */ |
1226 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs | 1247 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs |
1227 # undef MATCH_MAY_ALLOCATE | 1248 # undef MATCH_MAY_ALLOCATE |
1228 #endif | 1249 #endif |
1229 | 1250 |
1230 | 1251 |
1309 which allows approximately `re_max_failures' items. | 1330 which allows approximately `re_max_failures' items. |
1310 | 1331 |
1311 Return 1 if succeeds, and 0 if either ran out of memory | 1332 Return 1 if succeeds, and 0 if either ran out of memory |
1312 allocating space for it or it was already too large. | 1333 allocating space for it or it was already too large. |
1313 | 1334 |
1314 REGEX_REALLOCATE_STACK requires `destination' be declared. */ | 1335 REGEX_REALLOCATE_STACK requires `destination' be declared. */ |
1315 | 1336 |
1316 /* Factor to increase the failure stack size by | 1337 /* Factor to increase the failure stack size by |
1317 when we increase it. | 1338 when we increase it. |
1318 This used to be 2, but 2 was too wasteful | 1339 This used to be 2, but 2 was too wasteful |
1319 because the old discarded stacks added up to as much space | 1340 because the old discarded stacks added up to as much space |
1353 1)) | 1374 1)) |
1354 #define POP_PATTERN_OP() POP_FAILURE_POINTER () | 1375 #define POP_PATTERN_OP() POP_FAILURE_POINTER () |
1355 | 1376 |
1356 /* Push a pointer value onto the failure stack. | 1377 /* Push a pointer value onto the failure stack. |
1357 Assumes the variable `fail_stack'. Probably should only | 1378 Assumes the variable `fail_stack'. Probably should only |
1358 be called from within `PUSH_FAILURE_POINT'. */ | 1379 be called from within `PUSH_FAILURE_POINT'. */ |
1359 #define PUSH_FAILURE_POINTER(item) \ | 1380 #define PUSH_FAILURE_POINTER(item) \ |
1360 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) | 1381 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) |
1361 | 1382 |
1362 /* This pushes an integer-valued item onto the failure stack. | 1383 /* This pushes an integer-valued item onto the failure stack. |
1363 Assumes the variable `fail_stack'. Probably should only | 1384 Assumes the variable `fail_stack'. Probably should only |
1364 be called from within `PUSH_FAILURE_POINT'. */ | 1385 be called from within `PUSH_FAILURE_POINT'. */ |
1365 #define PUSH_FAILURE_INT(item) \ | 1386 #define PUSH_FAILURE_INT(item) \ |
1366 fail_stack.stack[fail_stack.avail++].integer = (item) | 1387 fail_stack.stack[fail_stack.avail++].integer = (item) |
1367 | 1388 |
1368 /* Push a fail_stack_elt_t value onto the failure stack. | 1389 /* Push a fail_stack_elt_t value onto the failure stack. |
1369 Assumes the variable `fail_stack'. Probably should only | 1390 Assumes the variable `fail_stack'. Probably should only |
1370 be called from within `PUSH_FAILURE_POINT'. */ | 1391 be called from within `PUSH_FAILURE_POINT'. */ |
1371 #define PUSH_FAILURE_ELT(item) \ | 1392 #define PUSH_FAILURE_ELT(item) \ |
1372 fail_stack.stack[fail_stack.avail++] = (item) | 1393 fail_stack.stack[fail_stack.avail++] = (item) |
1373 | 1394 |
1374 /* These three POP... operations complement the three PUSH... operations. | 1395 /* These three POP... operations complement the three PUSH... operations. |
1375 All assume that `fail_stack' is nonempty. */ | 1396 All assume that `fail_stack' is nonempty. */ |
1472 do { \ | 1493 do { \ |
1473 char *destination; \ | 1494 char *destination; \ |
1474 /* Must be int, so when we don't save any registers, the arithmetic \ | 1495 /* Must be int, so when we don't save any registers, the arithmetic \ |
1475 of 0 + -1 isn't done as unsigned. */ \ | 1496 of 0 + -1 isn't done as unsigned. */ \ |
1476 \ | 1497 \ |
1477 DEBUG_STATEMENT (failure_id++); \ | |
1478 DEBUG_STATEMENT (nfailure_points_pushed++); \ | 1498 DEBUG_STATEMENT (nfailure_points_pushed++); \ |
1479 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ | 1499 DEBUG_PRINT1 ("\nPUSH_FAILURE_POINT:\n"); \ |
1480 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \ | 1500 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \ |
1481 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ | 1501 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ |
1482 \ | 1502 \ |
1483 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \ | 1503 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \ |
1484 \ | 1504 \ |
1555 } while (0) /* POP_FAILURE_POINT */ | 1575 } while (0) /* POP_FAILURE_POINT */ |
1556 | 1576 |
1557 | 1577 |
1558 | 1578 |
1559 /* Registers are set to a sentinel when they haven't yet matched. */ | 1579 /* Registers are set to a sentinel when they haven't yet matched. */ |
1560 #define REG_UNSET_VALUE NULL | 1580 #define REG_UNSET(e) ((e) == NULL) |
1561 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) | |
1562 | 1581 |
1563 /* Subroutine declarations and macros for regex_compile. */ | 1582 /* Subroutine declarations and macros for regex_compile. */ |
1564 | 1583 |
1565 static void store_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc, int arg)); | 1584 static reg_errcode_t regex_compile _RE_ARGS ((re_char *pattern, size_t size, |
1566 static void store_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc, | 1585 reg_syntax_t syntax, |
1567 int arg1, int arg2)); | 1586 struct re_pattern_buffer *bufp)); |
1568 static void insert_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc, | 1587 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg)); |
1569 int arg, unsigned char *end)); | 1588 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, |
1570 static void insert_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc, | 1589 int arg1, int arg2)); |
1571 int arg1, int arg2, unsigned char *end)); | 1590 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, |
1572 static boolean at_begline_loc_p _RE_ARGS((const unsigned char *pattern, | 1591 int arg, unsigned char *end)); |
1573 const unsigned char *p, | 1592 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, |
1574 reg_syntax_t syntax)); | 1593 int arg1, int arg2, unsigned char *end)); |
1575 static boolean at_endline_loc_p _RE_ARGS((const unsigned char *p, | 1594 static boolean at_begline_loc_p _RE_ARGS ((const unsigned char *pattern, |
1576 const unsigned char *pend, | 1595 const unsigned char *p, |
1577 reg_syntax_t syntax)); | 1596 reg_syntax_t syntax)); |
1578 static unsigned char *skip_one_char _RE_ARGS((unsigned char *p)); | 1597 static boolean at_endline_loc_p _RE_ARGS ((const unsigned char *p, |
1579 static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend, | 1598 const unsigned char *pend, |
1580 char *fastmap, const int multibyte)); | 1599 reg_syntax_t syntax)); |
1600 static unsigned char *skip_one_char _RE_ARGS ((unsigned char *p)); | |
1601 static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend, | |
1602 char *fastmap, const int multibyte)); | |
1581 | 1603 |
1582 /* Fetch the next character in the uncompiled pattern---translating it | 1604 /* Fetch the next character in the uncompiled pattern---translating it |
1583 if necessary. Also cast from a signed character in the constant | 1605 if necessary. Also cast from a signed character in the constant |
1584 string passed to us by the user to an unsigned char that we can use | 1606 string passed to us by the user to an unsigned char that we can use |
1585 as an array index (in, e.g., `translate'). */ | 1607 as an array index (in, e.g., `translate'). */ |
1588 PATFETCH_RAW (c); \ | 1610 PATFETCH_RAW (c); \ |
1589 c = TRANSLATE (c); \ | 1611 c = TRANSLATE (c); \ |
1590 } while (0) | 1612 } while (0) |
1591 | 1613 |
1592 /* Fetch the next character in the uncompiled pattern, with no | 1614 /* Fetch the next character in the uncompiled pattern, with no |
1593 translation. */ | 1615 translation. */ |
1594 #define PATFETCH_RAW(c) \ | 1616 #define PATFETCH_RAW(c) \ |
1595 do { \ | 1617 do { \ |
1596 int len; \ | 1618 int len; \ |
1597 if (p == pend) return REG_EEND; \ | 1619 if (p == pend) return REG_EEND; \ |
1598 c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len); \ | 1620 c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len); \ |
1613 /* Macros for outputting the compiled pattern into `buffer'. */ | 1635 /* Macros for outputting the compiled pattern into `buffer'. */ |
1614 | 1636 |
1615 /* If the buffer isn't allocated when it comes in, use this. */ | 1637 /* If the buffer isn't allocated when it comes in, use this. */ |
1616 #define INIT_BUF_SIZE 32 | 1638 #define INIT_BUF_SIZE 32 |
1617 | 1639 |
1618 /* Make sure we have at least N more bytes of space in buffer. */ | 1640 /* Make sure we have at least N more bytes of space in buffer. */ |
1619 #define GET_BUFFER_SPACE(n) \ | 1641 #define GET_BUFFER_SPACE(n) \ |
1620 while (b - bufp->buffer + (n) > bufp->allocated) \ | 1642 while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ |
1621 EXTEND_BUFFER () | 1643 EXTEND_BUFFER () |
1622 | 1644 |
1623 /* Make sure we have one more byte of buffer space and then add C to it. */ | 1645 /* Make sure we have one more byte of buffer space and then add C to it. */ |
1624 #define BUF_PUSH(c) \ | 1646 #define BUF_PUSH(c) \ |
1625 do { \ | 1647 do { \ |
1635 *b++ = (unsigned char) (c1); \ | 1657 *b++ = (unsigned char) (c1); \ |
1636 *b++ = (unsigned char) (c2); \ | 1658 *b++ = (unsigned char) (c2); \ |
1637 } while (0) | 1659 } while (0) |
1638 | 1660 |
1639 | 1661 |
1640 /* As with BUF_PUSH_2, except for three bytes. */ | 1662 /* As with BUF_PUSH_2, except for three bytes. */ |
1641 #define BUF_PUSH_3(c1, c2, c3) \ | 1663 #define BUF_PUSH_3(c1, c2, c3) \ |
1642 do { \ | 1664 do { \ |
1643 GET_BUFFER_SPACE (3); \ | 1665 GET_BUFFER_SPACE (3); \ |
1644 *b++ = (unsigned char) (c1); \ | 1666 *b++ = (unsigned char) (c1); \ |
1645 *b++ = (unsigned char) (c2); \ | 1667 *b++ = (unsigned char) (c2); \ |
1646 *b++ = (unsigned char) (c3); \ | 1668 *b++ = (unsigned char) (c3); \ |
1647 } while (0) | 1669 } while (0) |
1648 | 1670 |
1649 | 1671 |
1650 /* Store a jump with opcode OP at LOC to location TO. We store a | 1672 /* Store a jump with opcode OP at LOC to location TO. We store a |
1651 relative address offset by the three bytes the jump itself occupies. */ | 1673 relative address offset by the three bytes the jump itself occupies. */ |
1652 #define STORE_JUMP(op, loc, to) \ | 1674 #define STORE_JUMP(op, loc, to) \ |
1653 store_op1 (op, loc, (to) - (loc) - 3) | 1675 store_op1 (op, loc, (to) - (loc) - 3) |
1654 | 1676 |
1655 /* Likewise, for a two-argument jump. */ | 1677 /* Likewise, for a two-argument jump. */ |
1656 #define STORE_JUMP2(op, loc, to, arg) \ | 1678 #define STORE_JUMP2(op, loc, to, arg) \ |
1657 store_op2 (op, loc, (to) - (loc) - 3, arg) | 1679 store_op2 (op, loc, (to) - (loc) - 3, arg) |
1658 | 1680 |
1659 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ | 1681 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ |
1660 #define INSERT_JUMP(op, loc, to) \ | 1682 #define INSERT_JUMP(op, loc, to) \ |
1661 insert_op1 (op, loc, (to) - (loc) - 3, b) | 1683 insert_op1 (op, loc, (to) - (loc) - 3, b) |
1662 | 1684 |
1663 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ | 1685 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ |
1664 #define INSERT_JUMP2(op, loc, to, arg) \ | 1686 #define INSERT_JUMP2(op, loc, to, arg) \ |
1665 insert_op2 (op, loc, (to) - (loc) - 3, arg, b) | 1687 insert_op2 (op, loc, (to) - (loc) - 3, arg, b) |
1666 | 1688 |
1667 | 1689 |
1668 /* This is not an arbitrary limit: the arguments which represent offsets | 1690 /* This is not an arbitrary limit: the arguments which represent offsets |
1669 into the pattern are two bytes long. So if 2^16 bytes turns out to | 1691 into the pattern are two bytes long. So if 2^16 bytes turns out to |
1670 be too small, many things would have to change. */ | 1692 be too small, many things would have to change. */ |
1671 #define MAX_BUF_SIZE (1L << 16) | 1693 /* Any other compiler which, like MSC, has allocation limit below 2^16 |
1672 | 1694 bytes will have to use approach similar to what was done below for |
1695 MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up | |
1696 reallocating to 0 bytes. Such thing is not going to work too well. | |
1697 You have been warned!! */ | |
1698 #if defined _MSC_VER && !defined WIN32 | |
1699 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. */ | |
1700 # define MAX_BUF_SIZE 65500L | |
1701 #else | |
1702 # define MAX_BUF_SIZE (1L << 16) | |
1703 #endif | |
1673 | 1704 |
1674 /* Extend the buffer by twice its current size via realloc and | 1705 /* Extend the buffer by twice its current size via realloc and |
1675 reset the pointers that pointed into the old block to point to the | 1706 reset the pointers that pointed into the old block to point to the |
1676 correct places in the new one. If extending the buffer results in it | 1707 correct places in the new one. If extending the buffer results in it |
1677 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ | 1708 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ |
1709 #if __BOUNDED_POINTERS__ | |
1710 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated) | |
1711 # define MOVE_BUFFER_POINTER(P) \ | |
1712 (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr) | |
1713 # define ELSE_EXTEND_BUFFER_HIGH_BOUND \ | |
1714 else \ | |
1715 { \ | |
1716 SET_HIGH_BOUND (b); \ | |
1717 SET_HIGH_BOUND (begalt); \ | |
1718 if (fixup_alt_jump) \ | |
1719 SET_HIGH_BOUND (fixup_alt_jump); \ | |
1720 if (laststart) \ | |
1721 SET_HIGH_BOUND (laststart); \ | |
1722 if (pending_exact) \ | |
1723 SET_HIGH_BOUND (pending_exact); \ | |
1724 } | |
1725 #else | |
1726 # define MOVE_BUFFER_POINTER(P) (P) += incr | |
1727 # define ELSE_EXTEND_BUFFER_HIGH_BOUND | |
1728 #endif | |
1678 #define EXTEND_BUFFER() \ | 1729 #define EXTEND_BUFFER() \ |
1679 do { \ | 1730 do { \ |
1680 unsigned char *old_buffer = bufp->buffer; \ | 1731 unsigned char *old_buffer = bufp->buffer; \ |
1681 if (bufp->allocated == MAX_BUF_SIZE) \ | 1732 if (bufp->allocated == MAX_BUF_SIZE) \ |
1682 return REG_ESIZE; \ | 1733 return REG_ESIZE; \ |
1687 if (bufp->buffer == NULL) \ | 1738 if (bufp->buffer == NULL) \ |
1688 return REG_ESPACE; \ | 1739 return REG_ESPACE; \ |
1689 /* If the buffer moved, move all the pointers into it. */ \ | 1740 /* If the buffer moved, move all the pointers into it. */ \ |
1690 if (old_buffer != bufp->buffer) \ | 1741 if (old_buffer != bufp->buffer) \ |
1691 { \ | 1742 { \ |
1692 b = (b - old_buffer) + bufp->buffer; \ | 1743 int incr = bufp->buffer - old_buffer; \ |
1693 begalt = (begalt - old_buffer) + bufp->buffer; \ | 1744 MOVE_BUFFER_POINTER (b); \ |
1745 MOVE_BUFFER_POINTER (begalt); \ | |
1694 if (fixup_alt_jump) \ | 1746 if (fixup_alt_jump) \ |
1695 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ | 1747 MOVE_BUFFER_POINTER (fixup_alt_jump); \ |
1696 if (laststart) \ | 1748 if (laststart) \ |
1697 laststart = (laststart - old_buffer) + bufp->buffer; \ | 1749 MOVE_BUFFER_POINTER (laststart); \ |
1698 if (pending_exact) \ | 1750 if (pending_exact) \ |
1699 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ | 1751 MOVE_BUFFER_POINTER (pending_exact); \ |
1700 } \ | 1752 } \ |
1753 ELSE_EXTEND_BUFFER_HIGH_BOUND \ | |
1701 } while (0) | 1754 } while (0) |
1702 | 1755 |
1703 | 1756 |
1704 /* Since we have one byte reserved for the register number argument to | 1757 /* Since we have one byte reserved for the register number argument to |
1705 {start,stop}_memory, the maximum number of groups we can report | 1758 {start,stop}_memory, the maximum number of groups we can report |
1712 | 1765 |
1713 | 1766 |
1714 /* Macros for the compile stack. */ | 1767 /* Macros for the compile stack. */ |
1715 | 1768 |
1716 /* Since offsets can go either forwards or backwards, this type needs to | 1769 /* Since offsets can go either forwards or backwards, this type needs to |
1717 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ | 1770 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ |
1718 typedef int pattern_offset_t; | 1771 /* int may be not enough when sizeof(int) == 2. */ |
1772 typedef long pattern_offset_t; | |
1719 | 1773 |
1720 typedef struct | 1774 typedef struct |
1721 { | 1775 { |
1722 pattern_offset_t begalt_offset; | 1776 pattern_offset_t begalt_offset; |
1723 pattern_offset_t fixup_alt_jump; | 1777 pattern_offset_t fixup_alt_jump; |
1737 #define INIT_COMPILE_STACK_SIZE 32 | 1791 #define INIT_COMPILE_STACK_SIZE 32 |
1738 | 1792 |
1739 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) | 1793 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) |
1740 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) | 1794 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) |
1741 | 1795 |
1742 /* The next available element. */ | 1796 /* The next available element. */ |
1743 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) | 1797 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) |
1744 | 1798 |
1745 | 1799 |
1746 /* Structure to manage work area for range table. */ | 1800 /* Structure to manage work area for range table. */ |
1747 struct range_table_work_area | 1801 struct range_table_work_area |
1808 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits) | 1862 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits) |
1809 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) | 1863 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) |
1810 | 1864 |
1811 | 1865 |
1812 /* Set the bit for character C in a list. */ | 1866 /* Set the bit for character C in a list. */ |
1813 #define SET_LIST_BIT(c) \ | 1867 #define SET_LIST_BIT(c) \ |
1814 (b[((unsigned char) (c)) / BYTEWIDTH] \ | 1868 (b[((unsigned char) (c)) / BYTEWIDTH] \ |
1815 |= 1 << (((unsigned char) c) % BYTEWIDTH)) | 1869 |= 1 << (((unsigned char) c) % BYTEWIDTH)) |
1816 | 1870 |
1817 | 1871 |
1818 /* Get the next unsigned number in the uncompiled pattern. */ | 1872 /* Get the next unsigned number in the uncompiled pattern. */ |
1819 #define GET_UNSIGNED_NUMBER(num) \ | 1873 #define GET_UNSIGNED_NUMBER(num) \ |
1820 do { if (p != pend) \ | 1874 do { if (p != pend) \ |
1821 { \ | 1875 { \ |
1822 PATFETCH (c); \ | 1876 PATFETCH (c); \ |
1823 while (ISDIGIT (c)) \ | 1877 while ('0' <= c && c <= '9') \ |
1824 { \ | 1878 { \ |
1825 if (num < 0) \ | 1879 if (num < 0) \ |
1826 num = 0; \ | 1880 num = 0; \ |
1827 num = num * 10 + c - '0'; \ | 1881 num = num * 10 + c - '0'; \ |
1828 if (p == pend) \ | 1882 if (p == pend) \ |
1844 || STREQ (string, "word") \ | 1898 || STREQ (string, "word") \ |
1845 || STREQ (string, "ascii") || STREQ (string, "nonascii") \ | 1899 || STREQ (string, "ascii") || STREQ (string, "nonascii") \ |
1846 || STREQ (string, "unibyte") || STREQ (string, "multibyte")) | 1900 || STREQ (string, "unibyte") || STREQ (string, "multibyte")) |
1847 | 1901 |
1848 /* QUIT is only used on NTemacs. */ | 1902 /* QUIT is only used on NTemacs. */ |
1849 #if !defined WINDOWSNT || !defined emacs | 1903 #if !defined WINDOWSNT || !defined emacs || !defined QUIT |
1850 # undef QUIT | 1904 # undef QUIT |
1851 # define QUIT | 1905 # define QUIT |
1852 #endif | 1906 #endif |
1853 | 1907 |
1854 #ifndef MATCH_MAY_ALLOCATE | 1908 #ifndef MATCH_MAY_ALLOCATE |
1862 | 1916 |
1863 static fail_stack_type fail_stack; | 1917 static fail_stack_type fail_stack; |
1864 | 1918 |
1865 /* Size with which the following vectors are currently allocated. | 1919 /* Size with which the following vectors are currently allocated. |
1866 That is so we can make them bigger as needed, | 1920 That is so we can make them bigger as needed, |
1867 but never make them smaller. */ | 1921 but never make them smaller. */ |
1868 static int regs_allocated_size; | 1922 static int regs_allocated_size; |
1869 | 1923 |
1870 static re_char ** regstart, ** regend; | 1924 static re_char ** regstart, ** regend; |
1871 static re_char **best_regstart, **best_regend; | 1925 static re_char **best_regstart, **best_regend; |
1872 | 1926 |
1873 /* Make the register vectors big enough for NUM_REGS registers, | 1927 /* Make the register vectors big enough for NUM_REGS registers, |
1874 but don't make them smaller. */ | 1928 but don't make them smaller. */ |
1875 | 1929 |
1876 static | 1930 static |
1877 regex_grow_registers (num_regs) | 1931 regex_grow_registers (num_regs) |
1878 int num_regs; | 1932 int num_regs; |
1879 { | 1933 { |
1930 } while (0) | 1984 } while (0) |
1931 | 1985 |
1932 static reg_errcode_t | 1986 static reg_errcode_t |
1933 regex_compile (pattern, size, syntax, bufp) | 1987 regex_compile (pattern, size, syntax, bufp) |
1934 re_char *pattern; | 1988 re_char *pattern; |
1935 int size; | 1989 size_t size; |
1936 reg_syntax_t syntax; | 1990 reg_syntax_t syntax; |
1937 struct re_pattern_buffer *bufp; | 1991 struct re_pattern_buffer *bufp; |
1938 { | 1992 { |
1939 /* We fetch characters from PATTERN here. Even though PATTERN is | 1993 /* We fetch characters from PATTERN here. Even though PATTERN is |
1940 `char *' (i.e., signed), we declare these variables as unsigned, so | 1994 `char *' (i.e., signed), we declare these variables as unsigned, so |
2756 case '{': | 2810 case '{': |
2757 /* If \{ is a literal. */ | 2811 /* If \{ is a literal. */ |
2758 if (!(syntax & RE_INTERVALS) | 2812 if (!(syntax & RE_INTERVALS) |
2759 /* If we're at `\{' and it's not the open-interval | 2813 /* If we're at `\{' and it's not the open-interval |
2760 operator. */ | 2814 operator. */ |
2761 || (syntax & RE_NO_BK_BRACES) | 2815 || (syntax & RE_NO_BK_BRACES)) |
2762 /* What is that? -sm */ | |
2763 /* || (p - 2 == pattern && p == pend) */) | |
2764 goto normal_backslash; | 2816 goto normal_backslash; |
2765 | 2817 |
2766 handle_interval: | 2818 handle_interval: |
2767 { | 2819 { |
2768 /* If got here, then the syntax allows intervals. */ | 2820 /* If got here, then the syntax allows intervals. */ |
2771 int lower_bound = 0, upper_bound = -1; | 2823 int lower_bound = 0, upper_bound = -1; |
2772 | 2824 |
2773 beg_interval = p; | 2825 beg_interval = p; |
2774 | 2826 |
2775 if (p == pend) | 2827 if (p == pend) |
2776 { | 2828 FREE_STACK_RETURN (REG_EBRACE); |
2777 if (syntax & RE_NO_BK_BRACES) | |
2778 goto unfetch_interval; | |
2779 else | |
2780 FREE_STACK_RETURN (REG_EBRACE); | |
2781 } | |
2782 | 2829 |
2783 GET_UNSIGNED_NUMBER (lower_bound); | 2830 GET_UNSIGNED_NUMBER (lower_bound); |
2784 | 2831 |
2785 if (c == ',') | 2832 if (c == ',') |
2786 GET_UNSIGNED_NUMBER (upper_bound); | 2833 GET_UNSIGNED_NUMBER (upper_bound); |
2788 /* Interval such as `{1}' => match exactly once. */ | 2835 /* Interval such as `{1}' => match exactly once. */ |
2789 upper_bound = lower_bound; | 2836 upper_bound = lower_bound; |
2790 | 2837 |
2791 if (lower_bound < 0 || upper_bound > RE_DUP_MAX | 2838 if (lower_bound < 0 || upper_bound > RE_DUP_MAX |
2792 || (upper_bound >= 0 && lower_bound > upper_bound)) | 2839 || (upper_bound >= 0 && lower_bound > upper_bound)) |
2793 { | 2840 FREE_STACK_RETURN (REG_BADBR); |
2794 if (syntax & RE_NO_BK_BRACES) | |
2795 goto unfetch_interval; | |
2796 else | |
2797 FREE_STACK_RETURN (REG_BADBR); | |
2798 } | |
2799 | 2841 |
2800 if (!(syntax & RE_NO_BK_BRACES)) | 2842 if (!(syntax & RE_NO_BK_BRACES)) |
2801 { | 2843 { |
2802 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); | 2844 if (c != '\\') |
2845 FREE_STACK_RETURN (REG_BADBR); | |
2803 | 2846 |
2804 PATFETCH (c); | 2847 PATFETCH (c); |
2805 } | 2848 } |
2806 | 2849 |
2807 if (c != '}') | 2850 if (c != '}') |
2808 { | 2851 FREE_STACK_RETURN (REG_BADBR); |
2809 if (syntax & RE_NO_BK_BRACES) | |
2810 goto unfetch_interval; | |
2811 else | |
2812 FREE_STACK_RETURN (REG_BADBR); | |
2813 } | |
2814 | 2852 |
2815 /* We just parsed a valid interval. */ | 2853 /* We just parsed a valid interval. */ |
2816 | 2854 |
2817 /* If it's invalid to have no preceding re. */ | 2855 /* If it's invalid to have no preceding re. */ |
2818 if (!laststart) | 2856 if (!laststart) |
2972 break; | 3010 break; |
2973 #endif /* emacs */ | 3011 #endif /* emacs */ |
2974 | 3012 |
2975 | 3013 |
2976 case 'w': | 3014 case 'w': |
3015 if (syntax & RE_NO_GNU_OPS) | |
3016 goto normal_char; | |
2977 laststart = b; | 3017 laststart = b; |
2978 BUF_PUSH_2 (syntaxspec, Sword); | 3018 BUF_PUSH_2 (syntaxspec, Sword); |
2979 break; | 3019 break; |
2980 | 3020 |
2981 | 3021 |
2982 case 'W': | 3022 case 'W': |
3023 if (syntax & RE_NO_GNU_OPS) | |
3024 goto normal_char; | |
2983 laststart = b; | 3025 laststart = b; |
2984 BUF_PUSH_2 (notsyntaxspec, Sword); | 3026 BUF_PUSH_2 (notsyntaxspec, Sword); |
2985 break; | 3027 break; |
2986 | 3028 |
2987 | 3029 |
2988 case '<': | 3030 case '<': |
3031 if (syntax & RE_NO_GNU_OPS) | |
3032 goto normal_char; | |
2989 BUF_PUSH (wordbeg); | 3033 BUF_PUSH (wordbeg); |
2990 break; | 3034 break; |
2991 | 3035 |
2992 case '>': | 3036 case '>': |
3037 if (syntax & RE_NO_GNU_OPS) | |
3038 goto normal_char; | |
2993 BUF_PUSH (wordend); | 3039 BUF_PUSH (wordend); |
2994 break; | 3040 break; |
2995 | 3041 |
2996 case 'b': | 3042 case 'b': |
3043 if (syntax & RE_NO_GNU_OPS) | |
3044 goto normal_char; | |
2997 BUF_PUSH (wordbound); | 3045 BUF_PUSH (wordbound); |
2998 break; | 3046 break; |
2999 | 3047 |
3000 case 'B': | 3048 case 'B': |
3049 if (syntax & RE_NO_GNU_OPS) | |
3050 goto normal_char; | |
3001 BUF_PUSH (notwordbound); | 3051 BUF_PUSH (notwordbound); |
3002 break; | 3052 break; |
3003 | 3053 |
3004 case '`': | 3054 case '`': |
3055 if (syntax & RE_NO_GNU_OPS) | |
3056 goto normal_char; | |
3005 BUF_PUSH (begbuf); | 3057 BUF_PUSH (begbuf); |
3006 break; | 3058 break; |
3007 | 3059 |
3008 case '\'': | 3060 case '\'': |
3061 if (syntax & RE_NO_GNU_OPS) | |
3062 goto normal_char; | |
3009 BUF_PUSH (endbuf); | 3063 BUF_PUSH (endbuf); |
3010 break; | 3064 break; |
3011 | 3065 |
3012 case '1': case '2': case '3': case '4': case '5': | 3066 case '1': case '2': case '3': case '4': case '5': |
3013 case '6': case '7': case '8': case '9': | 3067 case '6': case '7': case '8': case '9': |
3018 | 3072 |
3019 if (c1 > regnum) | 3073 if (c1 > regnum) |
3020 FREE_STACK_RETURN (REG_ESUBREG); | 3074 FREE_STACK_RETURN (REG_ESUBREG); |
3021 | 3075 |
3022 /* Can't back reference to a subexpression if inside of it. */ | 3076 /* Can't back reference to a subexpression if inside of it. */ |
3023 if (group_in_compile_stack (compile_stack, c1)) | 3077 if (group_in_compile_stack (compile_stack, (regnum_t) c1)) |
3024 goto normal_char; | 3078 goto normal_char; |
3025 | 3079 |
3026 laststart = b; | 3080 laststart = b; |
3027 BUF_PUSH_2 (duplicate, c1); | 3081 BUF_PUSH_2 (duplicate, c1); |
3028 break; | 3082 break; |
3037 | 3091 |
3038 default: | 3092 default: |
3039 normal_backslash: | 3093 normal_backslash: |
3040 /* You might think it would be useful for \ to mean | 3094 /* You might think it would be useful for \ to mean |
3041 not to translate; but if we don't translate it | 3095 not to translate; but if we don't translate it |
3042 it will never match anything. */ | 3096 it will never match anything. */ |
3043 c = TRANSLATE (c); | 3097 c = TRANSLATE (c); |
3044 goto normal_char; | 3098 goto normal_char; |
3045 } | 3099 } |
3046 break; | 3100 break; |
3047 | 3101 |
3315 it is allocated relocatably. */ | 3369 it is allocated relocatably. */ |
3316 fail_stack_elt_t *failure_stack_ptr; | 3370 fail_stack_elt_t *failure_stack_ptr; |
3317 #endif | 3371 #endif |
3318 | 3372 |
3319 /* Assume that each path through the pattern can be null until | 3373 /* Assume that each path through the pattern can be null until |
3320 proven otherwise. We set this false at the bottom of switch | 3374 proven otherwise. We set this false at the bottom of switch |
3321 statement, to which we get only if a particular path doesn't | 3375 statement, to which we get only if a particular path doesn't |
3322 match the empty string. */ | 3376 match the empty string. */ |
3323 boolean path_can_be_null = true; | 3377 boolean path_can_be_null = true; |
3324 | 3378 |
3325 /* If all elements for base leading-codes in fastmap is set, this | 3379 /* If all elements for base leading-codes in fastmap is set, this |
3978 while (d == dend) \ | 4032 while (d == dend) \ |
3979 { \ | 4033 { \ |
3980 /* End of string2 => fail. */ \ | 4034 /* End of string2 => fail. */ \ |
3981 if (dend == end_match_2) \ | 4035 if (dend == end_match_2) \ |
3982 goto fail; \ | 4036 goto fail; \ |
3983 /* End of string1 => advance to string2. */ \ | 4037 /* End of string1 => advance to string2. */ \ |
3984 d = string2; \ | 4038 d = string2; \ |
3985 dend = end_match_2; \ | 4039 dend = end_match_2; \ |
3986 } | 4040 } |
3987 | 4041 |
3988 /* Call before fetching a char with *d if you already checked other limits. | 4042 /* Call before fetching a char with *d if you already checked other limits. |
4314 struct re_pattern_buffer *bufp; | 4368 struct re_pattern_buffer *bufp; |
4315 const char *string; | 4369 const char *string; |
4316 int size, pos; | 4370 int size, pos; |
4317 struct re_registers *regs; | 4371 struct re_registers *regs; |
4318 { | 4372 { |
4319 int result = re_match_2_internal (bufp, NULL, 0, string, size, | 4373 int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size, |
4320 pos, regs, size); | 4374 pos, regs, size); |
4321 # if defined C_ALLOCA && !defined REGEX_MALLOC | 4375 # if defined C_ALLOCA && !defined REGEX_MALLOC |
4322 alloca (0); | 4376 alloca (0); |
4323 # endif | 4377 # endif |
4324 return result; | 4378 return result; |
4360 gl_state.object = re_match_object; | 4414 gl_state.object = re_match_object; |
4361 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); | 4415 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); |
4362 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); | 4416 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); |
4363 #endif | 4417 #endif |
4364 | 4418 |
4365 result = re_match_2_internal (bufp, string1, size1, string2, size2, | 4419 result = re_match_2_internal (bufp, (re_char*) string1, size1, |
4420 (re_char*) string2, size2, | |
4366 pos, regs, stop); | 4421 pos, regs, stop); |
4367 #if defined C_ALLOCA && !defined REGEX_MALLOC | 4422 #if defined C_ALLOCA && !defined REGEX_MALLOC |
4368 alloca (0); | 4423 alloca (0); |
4369 #endif | 4424 #endif |
4370 return result; | 4425 return result; |
4421 scanning the strings. */ | 4476 scanning the strings. */ |
4422 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ | 4477 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ |
4423 fail_stack_type fail_stack; | 4478 fail_stack_type fail_stack; |
4424 #endif | 4479 #endif |
4425 #ifdef DEBUG | 4480 #ifdef DEBUG |
4426 static unsigned failure_id = 0; | |
4427 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; | 4481 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; |
4428 #endif | 4482 #endif |
4429 | 4483 |
4430 #if defined REL_ALLOC && defined REGEX_MALLOC | 4484 #if defined REL_ALLOC && defined REGEX_MALLOC |
4431 /* This holds the pointer to the failure stack, when | 4485 /* This holds the pointer to the failure stack, when |
4434 #endif | 4488 #endif |
4435 | 4489 |
4436 /* We fill all the registers internally, independent of what we | 4490 /* We fill all the registers internally, independent of what we |
4437 return, for use in backreferences. The number here includes | 4491 return, for use in backreferences. The number here includes |
4438 an element for register zero. */ | 4492 an element for register zero. */ |
4439 unsigned num_regs = bufp->re_nsub + 1; | 4493 size_t num_regs = bufp->re_nsub + 1; |
4440 | 4494 |
4441 /* Information on the contents of registers. These are pointers into | 4495 /* Information on the contents of registers. These are pointers into |
4442 the input strings; they record just what was matched (on this | 4496 the input strings; they record just what was matched (on this |
4443 attempt) by a subexpression part of the pattern, that is, the | 4497 attempt) by a subexpression part of the pattern, that is, the |
4444 regnum-th regstart pointer points to where in the pattern we began | 4498 regnum-th regstart pointer points to where in the pattern we began |
4513 | 4567 |
4514 /* Initialize subexpression text positions to -1 to mark ones that no | 4568 /* Initialize subexpression text positions to -1 to mark ones that no |
4515 start_memory/stop_memory has been seen for. Also initialize the | 4569 start_memory/stop_memory has been seen for. Also initialize the |
4516 register information struct. */ | 4570 register information struct. */ |
4517 for (mcnt = 1; mcnt < num_regs; mcnt++) | 4571 for (mcnt = 1; mcnt < num_regs; mcnt++) |
4518 regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE; | 4572 regstart[mcnt] = regend[mcnt] = NULL; |
4519 | 4573 |
4520 /* We move `string1' into `string2' if the latter's empty -- but not if | 4574 /* We move `string1' into `string2' if the latter's empty -- but not if |
4521 `string1' is null. */ | 4575 `string1' is null. */ |
4522 if (size2 == 0 && string1 != NULL) | 4576 if (size2 == 0 && string1 != NULL) |
4523 { | 4577 { |
4928 | 4982 |
4929 /* In case we need to undo this operation (via backtracking). */ | 4983 /* In case we need to undo this operation (via backtracking). */ |
4930 PUSH_FAILURE_REG ((unsigned int)*p); | 4984 PUSH_FAILURE_REG ((unsigned int)*p); |
4931 | 4985 |
4932 regstart[*p] = d; | 4986 regstart[*p] = d; |
4933 regend[*p] = REG_UNSET_VALUE; /* probably unnecessary. -sm */ | 4987 regend[*p] = NULL; /* probably unnecessary. -sm */ |
4934 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); | 4988 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); |
4935 | 4989 |
4936 /* Move past the register number and inner group count. */ | 4990 /* Move past the register number and inner group count. */ |
4937 p += 1; | 4991 p += 1; |
4938 break; | 4992 break; |
5021 | 5075 |
5022 /* Compare that many; failure if mismatch, else move | 5076 /* Compare that many; failure if mismatch, else move |
5023 past them. */ | 5077 past them. */ |
5024 if (RE_TRANSLATE_P (translate) | 5078 if (RE_TRANSLATE_P (translate) |
5025 ? bcmp_translate (d, d2, mcnt, translate, multibyte) | 5079 ? bcmp_translate (d, d2, mcnt, translate, multibyte) |
5026 : bcmp (d, d2, mcnt)) | 5080 : memcmp (d, d2, mcnt)) |
5027 { | 5081 { |
5028 d = dfail; | 5082 d = dfail; |
5029 goto fail; | 5083 goto fail; |
5030 } | 5084 } |
5031 d += mcnt, d2 += mcnt; | 5085 d += mcnt, d2 += mcnt; |
5235 if (mcnt != 0) | 5289 if (mcnt != 0) |
5236 { | 5290 { |
5237 mcnt--; | 5291 mcnt--; |
5238 p += 2; | 5292 p += 2; |
5239 PUSH_FAILURE_COUNT (p); | 5293 PUSH_FAILURE_COUNT (p); |
5294 DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt); | |
5240 STORE_NUMBER_AND_INCR (p, mcnt); | 5295 STORE_NUMBER_AND_INCR (p, mcnt); |
5241 DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt); | |
5242 } | 5296 } |
5243 else | 5297 else |
5244 /* The two bytes encoding mcnt == 0 are two no_op opcodes. */ | 5298 /* The two bytes encoding mcnt == 0 are two no_op opcodes. */ |
5245 goto on_failure; | 5299 goto on_failure; |
5246 break; | 5300 break; |
5538 { | 5592 { |
5539 register re_char *p1 = s1, *p2 = s2; | 5593 register re_char *p1 = s1, *p2 = s2; |
5540 re_char *p1_end = s1 + len; | 5594 re_char *p1_end = s1 + len; |
5541 re_char *p2_end = s2 + len; | 5595 re_char *p2_end = s2 + len; |
5542 | 5596 |
5543 while (p1 != p1_end && p2 != p2_end) | 5597 /* FIXME: Checking both p1 and p2 presumes that the two strings might have |
5598 different lengths, but relying on a single `len' would break this. -sm */ | |
5599 while (p1 < p1_end && p2 < p2_end) | |
5544 { | 5600 { |
5545 int p1_charlen, p2_charlen; | 5601 int p1_charlen, p2_charlen; |
5546 int p1_ch, p2_ch; | 5602 int p1_ch, p2_ch; |
5547 | 5603 |
5548 p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); | 5604 p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen); |
5590 bufp->no_sub = 0; | 5646 bufp->no_sub = 0; |
5591 | 5647 |
5592 /* Match anchors at newline. */ | 5648 /* Match anchors at newline. */ |
5593 bufp->newline_anchor = 1; | 5649 bufp->newline_anchor = 1; |
5594 | 5650 |
5595 ret = regex_compile (pattern, length, re_syntax_options, bufp); | 5651 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp); |
5596 | 5652 |
5597 if (!ret) | 5653 if (!ret) |
5598 return NULL; | 5654 return NULL; |
5599 return gettext (re_error_msgid[(int) ret]); | 5655 return gettext (re_error_msgid[(int) ret]); |
5600 } | 5656 } |
5713 regex_t *preg; | 5769 regex_t *preg; |
5714 const char *pattern; | 5770 const char *pattern; |
5715 int cflags; | 5771 int cflags; |
5716 { | 5772 { |
5717 reg_errcode_t ret; | 5773 reg_errcode_t ret; |
5718 unsigned syntax | 5774 reg_syntax_t syntax |
5719 = (cflags & REG_EXTENDED) ? | 5775 = (cflags & REG_EXTENDED) ? |
5720 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; | 5776 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; |
5721 | 5777 |
5722 /* regex_compile will allocate the space for the compiled pattern. */ | 5778 /* regex_compile will allocate the space for the compiled pattern. */ |
5723 preg->buffer = 0; | 5779 preg->buffer = 0; |
5740 if (preg->translate == NULL) | 5796 if (preg->translate == NULL) |
5741 return (int) REG_ESPACE; | 5797 return (int) REG_ESPACE; |
5742 | 5798 |
5743 /* Map uppercase characters to corresponding lowercase ones. */ | 5799 /* Map uppercase characters to corresponding lowercase ones. */ |
5744 for (i = 0; i < CHAR_SET_SIZE; i++) | 5800 for (i = 0; i < CHAR_SET_SIZE; i++) |
5745 preg->translate[i] = ISUPPER (i) ? tolower (i) : i; | 5801 preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i; |
5746 } | 5802 } |
5747 else | 5803 else |
5748 preg->translate = NULL; | 5804 preg->translate = NULL; |
5749 | 5805 |
5750 /* If REG_NEWLINE is set, newlines are treated differently. */ | 5806 /* If REG_NEWLINE is set, newlines are treated differently. */ |
5760 | 5816 |
5761 preg->no_sub = !!(cflags & REG_NOSUB); | 5817 preg->no_sub = !!(cflags & REG_NOSUB); |
5762 | 5818 |
5763 /* POSIX says a null character in the pattern terminates it, so we | 5819 /* POSIX says a null character in the pattern terminates it, so we |
5764 can use strlen here in compiling the pattern. */ | 5820 can use strlen here in compiling the pattern. */ |
5765 ret = regex_compile (pattern, strlen (pattern), syntax, preg); | 5821 ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg); |
5766 | 5822 |
5767 /* POSIX doesn't distinguish between an unmatched open-group and an | 5823 /* POSIX doesn't distinguish between an unmatched open-group and an |
5768 unmatched close-group: both are REG_EPAREN. */ | 5824 unmatched close-group: both are REG_EPAREN. */ |
5769 if (ret == REG_ERPAREN) ret = REG_EPAREN; | 5825 if (ret == REG_ERPAREN) ret = REG_EPAREN; |
5770 | 5826 |
5811 private_preg.regs_allocated = REGS_FIXED; | 5867 private_preg.regs_allocated = REGS_FIXED; |
5812 | 5868 |
5813 if (want_reg_info) | 5869 if (want_reg_info) |
5814 { | 5870 { |
5815 regs.num_regs = nmatch; | 5871 regs.num_regs = nmatch; |
5816 regs.start = TALLOC (nmatch, regoff_t); | 5872 regs.start = TALLOC (nmatch * 2, regoff_t); |
5817 regs.end = TALLOC (nmatch, regoff_t); | 5873 if (regs.start == NULL) |
5818 if (regs.start == NULL || regs.end == NULL) | |
5819 return (int) REG_NOMATCH; | 5874 return (int) REG_NOMATCH; |
5875 regs.end = regs.start + nmatch; | |
5820 } | 5876 } |
5821 | 5877 |
5822 /* Perform the searching operation. */ | 5878 /* Perform the searching operation. */ |
5823 ret = re_search (&private_preg, string, len, | 5879 ret = re_search (&private_preg, string, len, |
5824 /* start: */ 0, /* range: */ len, | 5880 /* start: */ 0, /* range: */ len, |
5838 } | 5894 } |
5839 } | 5895 } |
5840 | 5896 |
5841 /* If we needed the temporary register info, free the space now. */ | 5897 /* If we needed the temporary register info, free the space now. */ |
5842 free (regs.start); | 5898 free (regs.start); |
5843 free (regs.end); | |
5844 } | 5899 } |
5845 | 5900 |
5846 /* We want zero return to mean success, unlike `re_search'. */ | 5901 /* We want zero return to mean success, unlike `re_search'. */ |
5847 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; | 5902 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; |
5848 } | 5903 } |