comparison src/regex.c @ 83548:c71725faff1a

Merged from emacs@sv.gnu.org. Last-minute emacsclient rewrites be damned! Patches applied: * emacs@sv.gnu.org/emacs--devo--0--patch-490 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-491 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-492 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-493 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-494 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-495 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-496 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-497 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-498 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-499 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-500 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-501 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-502 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-503 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-504 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-505 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-506 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-507 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-508 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-509 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-510 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-511 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-512 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-513 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-514 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-515 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-516 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-517 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-518 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-519 Update from CVS: etc/TUTORIAL.cn: Updated. * emacs@sv.gnu.org/emacs--devo--0--patch-520 Merge from erc--emacs--22 * emacs@sv.gnu.org/emacs--devo--0--patch-521 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-522 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-523 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-524 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-525 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-526 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-527 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-528 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-529 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-530 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-531 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-532 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-533 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-534 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-535 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-161 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-162 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-163 Merge from emacs--devo--0 * emacs@sv.gnu.org/gnus--rel--5.10--patch-164 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-165 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-166 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-167 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-168 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-169 Merge from emacs--devo--0 * emacs@sv.gnu.org/gnus--rel--5.10--patch-170 Update from CVS git-archimport-id: lorentey@elte.hu--2004/emacs--multi-tty--0--patch-588
author Karoly Lorentey <lorentey@elte.hu>
date Sun, 03 Dec 2006 15:03:30 +0000
parents 2d56e13fd23d ad2b5e25b8f0
children 17e0dd217877
comparison
equal deleted inserted replaced
83547:0912b745fc75 83548:c71725faff1a
10 the Free Software Foundation; either version 2, or (at your option) 10 the Free Software Foundation; either version 2, or (at your option)
11 any later version. 11 any later version.
12 12
13 This program is distributed in the hope that it will be useful, 13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details. 16 GNU General Public License for more details.
17 17
18 You should have received a copy of the GNU General Public License 18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software 19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
21 USA. */ 21 USA. */
22 22
23 /* TODO: 23 /* TODO:
24 - structure the opcode space into opcode+flag. 24 - structure the opcode space into opcode+flag.
25 - merge with glibc's regex.[ch]. 25 - merge with glibc's regex.[ch].
26 - replace (succeed_n + jump_n + set_number_at) with something that doesn't 26 - replace (succeed_n + jump_n + set_number_at) with something that doesn't
137 # ifdef free 137 # ifdef free
138 # undef free 138 # undef free
139 # endif 139 # endif
140 # define free xfree 140 # define free xfree
141 141
142 /* Converts the pointer to the char to BEG-based offset from the start. */ 142 /* Converts the pointer to the char to BEG-based offset from the start. */
143 # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) 143 # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
144 # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) 144 # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
145 145
146 # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) 146 # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
147 # define RE_STRING_CHAR(p, s) \ 147 # define RE_STRING_CHAR(p, s) \
538 538
539 # define REGEX_ALLOCATE_STACK alloca 539 # define REGEX_ALLOCATE_STACK alloca
540 540
541 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 541 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
542 REGEX_REALLOCATE (source, osize, nsize) 542 REGEX_REALLOCATE (source, osize, nsize)
543 /* No need to explicitly free anything. */ 543 /* No need to explicitly free anything. */
544 # define REGEX_FREE_STACK(arg) ((void)0) 544 # define REGEX_FREE_STACK(arg) ((void)0)
545 545
546 # endif /* not REGEX_MALLOC */ 546 # endif /* not REGEX_MALLOC */
547 #endif /* not using relocating allocator */ 547 #endif /* not using relocating allocator */
548 548
652 652
653 /* Followed by two byte relative address to which to jump. */ 653 /* Followed by two byte relative address to which to jump. */
654 jump, 654 jump,
655 655
656 /* Followed by two-byte relative address of place to resume at 656 /* Followed by two-byte relative address of place to resume at
657 in case of failure. */ 657 in case of failure. */
658 on_failure_jump, 658 on_failure_jump,
659 659
660 /* Like on_failure_jump, but pushes a placeholder instead of the 660 /* Like on_failure_jump, but pushes a placeholder instead of the
661 current string position when executed. */ 661 current string position when executed. */
662 on_failure_keep_string_jump, 662 on_failure_keep_string_jump,
689 /* Followed by two-byte relative address, and two-byte number n. 689 /* Followed by two-byte relative address, and two-byte number n.
690 Jump to the address N times, then fail. */ 690 Jump to the address N times, then fail. */
691 jump_n, 691 jump_n,
692 692
693 /* Set the following two-byte relative address to the 693 /* Set the following two-byte relative address to the
694 subsequent two-byte number. The address *includes* the two 694 subsequent two-byte number. The address *includes* the two
695 bytes of number. */ 695 bytes of number. */
696 set_number_at, 696 set_number_at,
697 697
698 wordbeg, /* Succeeds if at word beginning. */ 698 wordbeg, /* Succeeds if at word beginning. */
699 wordend, /* Succeeds if at word end. */ 699 wordend, /* Succeeds if at word end. */
700 700
701 wordbound, /* Succeeds if at a word boundary. */ 701 wordbound, /* Succeeds if at a word boundary. */
702 notwordbound, /* Succeeds if not at a word boundary. */ 702 notwordbound, /* Succeeds if not at a word boundary. */
703 703
704 symbeg, /* Succeeds if at symbol beginning. */ 704 symbeg, /* Succeeds if at symbol beginning. */
705 symend, /* Succeeds if at symbol end. */ 705 symend, /* Succeeds if at symbol end. */
706 706
707 /* Matches any character whose syntax is specified. Followed by 707 /* Matches any character whose syntax is specified. Followed by
715 ,before_dot, /* Succeeds if before point. */ 715 ,before_dot, /* Succeeds if before point. */
716 at_dot, /* Succeeds if at point. */ 716 at_dot, /* Succeeds if at point. */
717 after_dot, /* Succeeds if after point. */ 717 after_dot, /* Succeeds if after point. */
718 718
719 /* Matches any character whose category-set contains the specified 719 /* Matches any character whose category-set contains the specified
720 category. The operator is followed by a byte which contains a 720 category. The operator is followed by a byte which contains a
721 category code (mnemonic ASCII character). */ 721 category code (mnemonic ASCII character). */
722 categoryspec, 722 categoryspec,
723 723
724 /* Matches any character whose category-set does not contain the 724 /* Matches any character whose category-set does not contain the
725 specified category. The operator is followed by a byte which 725 specified category. The operator is followed by a byte which
726 contains the category code (mnemonic ASCII character). */ 726 contains the category code (mnemonic ASCII character). */
805 805
806 #endif /* DEBUG */ 806 #endif /* DEBUG */
807 807
808 /* Store a multibyte character in three contiguous bytes starting 808 /* Store a multibyte character in three contiguous bytes starting
809 DESTINATION, and increment DESTINATION to the byte after where the 809 DESTINATION, and increment DESTINATION to the byte after where the
810 character is stored. Therefore, DESTINATION must be an lvalue. */ 810 character is stored. Therefore, DESTINATION must be an lvalue. */
811 811
812 #define STORE_CHARACTER_AND_INCR(destination, character) \ 812 #define STORE_CHARACTER_AND_INCR(destination, character) \
813 do { \ 813 do { \
814 (destination)[0] = (character) & 0377; \ 814 (destination)[0] = (character) & 0377; \
815 (destination)[1] = ((character) >> 8) & 0377; \ 815 (destination)[1] = ((character) >> 8) & 0377; \
816 (destination)[2] = (character) >> 16; \ 816 (destination)[2] = (character) >> 16; \
817 (destination) += 3; \ 817 (destination) += 3; \
818 } while (0) 818 } while (0)
819 819
820 /* Put into DESTINATION a character stored in three contiguous bytes 820 /* Put into DESTINATION a character stored in three contiguous bytes
821 starting at SOURCE. */ 821 starting at SOURCE. */
822 822
823 #define EXTRACT_CHARACTER(destination, source) \ 823 #define EXTRACT_CHARACTER(destination, source) \
824 do { \ 824 do { \
825 (destination) = ((source)[0] \ 825 (destination) = ((source)[0] \
826 | ((source)[1] << 8) \ 826 | ((source)[1] << 8) \
852 #define CHARSET_LOOKUP_BITMAP(p, c) \ 852 #define CHARSET_LOOKUP_BITMAP(p, c) \
853 ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \ 853 ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \
854 && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH))) 854 && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
855 855
856 /* Return the address of end of RANGE_TABLE. COUNT is number of 856 /* Return the address of end of RANGE_TABLE. COUNT is number of
857 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' 857 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
858 is start of range and end of range. `* 3' is size of each start 858 is start of range and end of range. `* 3' is size of each start
859 and end. */ 859 and end. */
860 #define CHARSET_RANGE_TABLE_END(range_table, count) \ 860 #define CHARSET_RANGE_TABLE_END(range_table, count) \
861 ((range_table) + (count) * 2 * 3) 861 ((range_table) + (count) * 2 * 3)
862 862
863 /* Test if C is in RANGE_TABLE. A flag NOT is negated if C is in. 863 /* Test if C is in RANGE_TABLE. A flag NOT is negated if C is in.
864 COUNT is number of ranges in RANGE_TABLE. */ 864 COUNT is number of ranges in RANGE_TABLE. */
865 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \ 865 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
866 do \ 866 do \
867 { \ 867 { \
868 re_wchar_t range_start, range_end; \ 868 re_wchar_t range_start, range_end; \
1665 STR -- the saved data position. 1665 STR -- the saved data position.
1666 PAT -- the saved pattern position. 1666 PAT -- the saved pattern position.
1667 REGSTART, REGEND -- arrays of string positions. 1667 REGSTART, REGEND -- arrays of string positions.
1668 1668
1669 Also assumes the variables `fail_stack' and (if debugging), `bufp', 1669 Also assumes the variables `fail_stack' and (if debugging), `bufp',
1670 `pend', `string1', `size1', `string2', and `size2'. */ 1670 `pend', `string1', `size1', `string2', and `size2'. */
1671 1671
1672 #define POP_FAILURE_POINT(str, pat) \ 1672 #define POP_FAILURE_POINT(str, pat) \
1673 do { \ 1673 do { \
1674 assert (!FAIL_STACK_EMPTY ()); \ 1674 assert (!FAIL_STACK_EMPTY ()); \
1675 \ 1675 \
1934 /* Structure to manage work area for range table. */ 1934 /* Structure to manage work area for range table. */
1935 struct range_table_work_area 1935 struct range_table_work_area
1936 { 1936 {
1937 int *table; /* actual work area. */ 1937 int *table; /* actual work area. */
1938 int allocated; /* allocated size for work area in bytes. */ 1938 int allocated; /* allocated size for work area in bytes. */
1939 int used; /* actually used size in words. */ 1939 int used; /* actually used size in words. */
1940 int bits; /* flag to record character classes */ 1940 int bits; /* flag to record character classes */
1941 }; 1941 };
1942 1942
1943 /* Make sure that WORK_AREA can hold more N multibyte characters. 1943 /* Make sure that WORK_AREA can hold more N multibyte characters.
1944 This is used only in set_image_of_range and set_image_of_range_1. 1944 This is used only in set_image_of_range and set_image_of_range_1.
1976 tem = set_image_of_range (&work_area, start, end, translate); \ 1976 tem = set_image_of_range (&work_area, start, end, translate); \
1977 if (tem > 0) \ 1977 if (tem > 0) \
1978 FREE_STACK_RETURN (tem); \ 1978 FREE_STACK_RETURN (tem); \
1979 } while (0) 1979 } while (0)
1980 1980
1981 /* Free allocated memory for WORK_AREA. */ 1981 /* Free allocated memory for WORK_AREA. */
1982 #define FREE_RANGE_TABLE_WORK_AREA(work_area) \ 1982 #define FREE_RANGE_TABLE_WORK_AREA(work_area) \
1983 do { \ 1983 do { \
1984 if ((work_area).table) \ 1984 if ((work_area).table) \
1985 free ((work_area).table); \ 1985 free ((work_area).table); \
1986 } while (0) 1986 } while (0)
2476 /* Place in the uncompiled pattern (i.e., the {) to 2476 /* Place in the uncompiled pattern (i.e., the {) to
2477 which to go back if the interval is invalid. */ 2477 which to go back if the interval is invalid. */
2478 re_char *beg_interval; 2478 re_char *beg_interval;
2479 2479
2480 /* Address of the place where a forward jump should go to the end of 2480 /* Address of the place where a forward jump should go to the end of
2481 the containing expression. Each alternative of an `or' -- except the 2481 the containing expression. Each alternative of an `or' -- except the
2482 last -- ends with a forward jump of this sort. */ 2482 last -- ends with a forward jump of this sort. */
2483 unsigned char *fixup_alt_jump = 0; 2483 unsigned char *fixup_alt_jump = 0;
2484 2484
2485 /* Counts open-groups as they are encountered. Remembered for the 2485 /* Counts open-groups as they are encountered. Remembered for the
2486 matching close-group on the compile stack, so the same register 2486 matching close-group on the compile stack, so the same register
2548 if (bufp->allocated == 0) 2548 if (bufp->allocated == 0)
2549 { 2549 {
2550 if (bufp->buffer) 2550 if (bufp->buffer)
2551 { /* If zero allocated, but buffer is non-null, try to realloc 2551 { /* If zero allocated, but buffer is non-null, try to realloc
2552 enough space. This loses if buffer's address is bogus, but 2552 enough space. This loses if buffer's address is bogus, but
2553 that is the user's responsibility. */ 2553 that is the user's responsibility. */
2554 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); 2554 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
2555 } 2555 }
2556 else 2556 else
2557 { /* Caller did not allocate a buffer. Do it for them. */ 2557 { /* Caller did not allocate a buffer. Do it for them. */
2558 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); 2558 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2559 } 2559 }
2560 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); 2560 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2561 2561
2562 bufp->allocated = INIT_BUF_SIZE; 2562 bufp->allocated = INIT_BUF_SIZE;
2616 main_pend = pend; 2616 main_pend = pend;
2617 main_pattern = pattern; 2617 main_pattern = pattern;
2618 p = pattern = whitespace_regexp; 2618 p = pattern = whitespace_regexp;
2619 pend = p + strlen (p); 2619 pend = p + strlen (p);
2620 break; 2620 break;
2621 } 2621 }
2622 2622
2623 case '^': 2623 case '^':
2624 { 2624 {
2625 if ( /* If at start of pattern, it's an operator. */ 2625 if ( /* If at start of pattern, it's an operator. */
2626 p == pattern + 1 2626 p == pattern + 1
2627 /* If context independent, it's an operator. */ 2627 /* If context independent, it's an operator. */
2628 || syntax & RE_CONTEXT_INDEP_ANCHORS 2628 || syntax & RE_CONTEXT_INDEP_ANCHORS
2629 /* Otherwise, depends on what's come before. */ 2629 /* Otherwise, depends on what's come before. */
2630 || at_begline_loc_p (pattern, p, syntax)) 2630 || at_begline_loc_p (pattern, p, syntax))
2631 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline); 2631 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
2632 else 2632 else
2633 goto normal_char; 2633 goto normal_char;
2634 } 2634 }
2637 2637
2638 case '$': 2638 case '$':
2639 { 2639 {
2640 if ( /* If at end of pattern, it's an operator. */ 2640 if ( /* If at end of pattern, it's an operator. */
2641 p == pend 2641 p == pend
2642 /* If context independent, it's an operator. */ 2642 /* If context independent, it's an operator. */
2643 || syntax & RE_CONTEXT_INDEP_ANCHORS 2643 || syntax & RE_CONTEXT_INDEP_ANCHORS
2644 /* Otherwise, depends on what's next. */ 2644 /* Otherwise, depends on what's next. */
2645 || at_endline_loc_p (p, pend, syntax)) 2645 || at_endline_loc_p (p, pend, syntax))
2646 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline); 2646 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
2647 else 2647 else
2665 else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 2665 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2666 goto normal_char; 2666 goto normal_char;
2667 } 2667 }
2668 2668
2669 { 2669 {
2670 /* 1 means zero (many) matches is allowed. */ 2670 /* 1 means zero (many) matches is allowed. */
2671 boolean zero_times_ok = 0, many_times_ok = 0; 2671 boolean zero_times_ok = 0, many_times_ok = 0;
2672 boolean greedy = 1; 2672 boolean greedy = 1;
2673 2673
2674 /* If there is a sequence of repetition chars, collapse it 2674 /* If there is a sequence of repetition chars, collapse it
2675 down to just one (the right one). We can't combine 2675 down to just one (the right one). We can't combine
2676 interval operators with these because of, e.g., `a{2}*', 2676 interval operators with these because of, e.g., `a{2}*',
2677 which should only match an even number of `a's. */ 2677 which should only match an even number of `a's. */
2678 2678
2679 for (;;) 2679 for (;;)
2680 { 2680 {
2681 if ((syntax & RE_FRUGAL) 2681 if ((syntax & RE_FRUGAL)
2682 && c == '?' && (zero_times_ok || many_times_ok)) 2682 && c == '?' && (zero_times_ok || many_times_ok))
2712 to an empty pattern. */ 2712 to an empty pattern. */
2713 if (!laststart || laststart == b) 2713 if (!laststart || laststart == b)
2714 break; 2714 break;
2715 2715
2716 /* Now we know whether or not zero matches is allowed 2716 /* Now we know whether or not zero matches is allowed
2717 and also whether or not two or more matches is allowed. */ 2717 and also whether or not two or more matches is allowed. */
2718 if (greedy) 2718 if (greedy)
2719 { 2719 {
2720 if (many_times_ok) 2720 if (many_times_ok)
2721 { 2721 {
2722 boolean simple = skip_one_char (laststart) == b; 2722 boolean simple = skip_one_char (laststart) == b;
2819 GET_BUFFER_SPACE (34); 2819 GET_BUFFER_SPACE (34);
2820 2820
2821 laststart = b; 2821 laststart = b;
2822 2822
2823 /* We test `*p == '^' twice, instead of using an if 2823 /* We test `*p == '^' twice, instead of using an if
2824 statement, so we only need one BUF_PUSH. */ 2824 statement, so we only need one BUF_PUSH. */
2825 BUF_PUSH (*p == '^' ? charset_not : charset); 2825 BUF_PUSH (*p == '^' ? charset_not : charset);
2826 if (*p == '^') 2826 if (*p == '^')
2827 p++; 2827 p++;
2828 2828
2829 /* Remember the first position in the bracket expression. */ 2829 /* Remember the first position in the bracket expression. */
2830 p1 = p; 2830 p1 = p;
2831 2831
2832 /* Push the number of bytes in the bitmap. */ 2832 /* Push the number of bytes in the bitmap. */
2833 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 2833 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2834 2834
2835 /* Clear the whole map. */ 2835 /* Clear the whole map. */
2836 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 2836 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2837 2837
2838 /* charset_not matches newline according to a syntax bit. */ 2838 /* charset_not matches newline according to a syntax bit. */
2839 if ((re_opcode_t) b[-2] == charset_not 2839 if ((re_opcode_t) b[-2] == charset_not
2840 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 2840 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2841 SET_LIST_BIT ('\n'); 2841 SET_LIST_BIT ('\n');
2842 2842
2843 /* Read in characters and ranges, setting map bits. */ 2843 /* Read in characters and ranges, setting map bits. */
2844 for (;;) 2844 for (;;)
2845 { 2845 {
2846 boolean escaped_char = false; 2846 boolean escaped_char = false;
2847 const unsigned char *p2 = p; 2847 const unsigned char *p2 = p;
2848 2848
2862 PATFETCH (c); 2862 PATFETCH (c);
2863 escaped_char = true; 2863 escaped_char = true;
2864 } 2864 }
2865 else 2865 else
2866 { 2866 {
2867 /* Could be the end of the bracket expression. If it's 2867 /* Could be the end of the bracket expression. If it's
2868 not (i.e., when the bracket expression is `[]' so 2868 not (i.e., when the bracket expression is `[]' so
2869 far), the ']' character bit gets set way below. */ 2869 far), the ']' character bit gets set way below. */
2870 if (c == ']' && p2 != p1) 2870 if (c == ']' && p2 != p1)
2871 break; 2871 break;
2872 } 2872 }
2879 class. */ 2879 class. */
2880 2880
2881 if (!escaped_char && 2881 if (!escaped_char &&
2882 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 2882 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2883 { 2883 {
2884 /* Leave room for the null. */ 2884 /* Leave room for the null. */
2885 unsigned char str[CHAR_CLASS_MAX_LENGTH + 1]; 2885 unsigned char str[CHAR_CLASS_MAX_LENGTH + 1];
2886 const unsigned char *class_beg; 2886 const unsigned char *class_beg;
2887 2887
2888 PATFETCH (c); 2888 PATFETCH (c);
2889 c1 = 0; 2889 c1 = 0;
2937 re_wctype_to_bit (cc)); 2937 re_wctype_to_bit (cc));
2938 2938
2939 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) 2939 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2940 { 2940 {
2941 int translated = TRANSLATE (ch); 2941 int translated = TRANSLATE (ch);
2942 if (re_iswctype (btowc (ch), cc)) 2942 if (translated < (1 << BYTEWIDTH)
2943 && re_iswctype (btowc (ch), cc))
2943 SET_LIST_BIT (translated); 2944 SET_LIST_BIT (translated);
2944 } 2945 }
2945 2946
2946 /* In most cases the matching rule for char classes 2947 /* In most cases the matching rule for char classes
2947 only uses the syntax table for multibyte chars, 2948 only uses the syntax table for multibyte chars,
2960 p = class_beg; 2961 p = class_beg;
2961 SET_LIST_BIT ('['); 2962 SET_LIST_BIT ('[');
2962 2963
2963 /* Because the `:' may starts the range, we 2964 /* Because the `:' may starts the range, we
2964 can't simply set bit and repeat the loop. 2965 can't simply set bit and repeat the loop.
2965 Instead, just set it to C and handle below. */ 2966 Instead, just set it to C and handle below. */
2966 c = ':'; 2967 c = ':';
2967 } 2968 }
2968 } 2969 }
2969 2970
2970 if (p < pend && p[0] == '-' && p[1] != ']') 2971 if (p < pend && p[0] == '-' && p[1] != ']')
3018 } 3019 }
3019 else 3020 else
3020 { 3021 {
3021 for (this_char = range_start; this_char <= range_end; 3022 for (this_char = range_start; this_char <= range_end;
3022 this_char++) 3023 this_char++)
3023 SET_LIST_BIT (TRANSLATE (this_char)); 3024 {
3025 int translated = TRANSLATE (this_char);
3026 if (translated < (1 << BYTEWIDTH))
3027 SET_LIST_BIT (translated);
3028 else
3029 SET_RANGE_TABLE_WORK_AREA
3030 (range_table_work, translated, translated);
3031 }
3024 } 3032 }
3025 } 3033 }
3026 else 3034 else
3027 /* ... into range table. */ 3035 /* ... into range table. */
3028 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); 3036 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
3029 } 3037 }
3030 3038
3031 /* Discard any (non)matching list bytes that are all 0 at the 3039 /* Discard any (non)matching list bytes that are all 0 at the
3032 end of the map. Decrease the map-length byte too. */ 3040 end of the map. Decrease the map-length byte too. */
3033 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 3041 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3034 b[-1]--; 3042 b[-1]--;
3035 b += b[-1]; 3043 b += b[-1];
3036 3044
3037 /* Build real range table from work area. */ 3045 /* Build real range table from work area. */
3146 3154
3147 compile_stack.size <<= 1; 3155 compile_stack.size <<= 1;
3148 } 3156 }
3149 3157
3150 /* These are the values to restore when we hit end of this 3158 /* These are the values to restore when we hit end of this
3151 group. They are all relative offsets, so that if the 3159 group. They are all relative offsets, so that if the
3152 whole pattern moves because of realloc, they will still 3160 whole pattern moves because of realloc, they will still
3153 be valid. */ 3161 be valid. */
3154 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; 3162 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
3155 COMPILE_STACK_TOP.fixup_alt_jump 3163 COMPILE_STACK_TOP.fixup_alt_jump
3156 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; 3164 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
3202 ``can't happen''. */ 3210 ``can't happen''. */
3203 assert (compile_stack.avail != 0); 3211 assert (compile_stack.avail != 0);
3204 { 3212 {
3205 /* We don't just want to restore into `regnum', because 3213 /* We don't just want to restore into `regnum', because
3206 later groups should continue to be numbered higher, 3214 later groups should continue to be numbered higher,
3207 as in `(ab)c(de)' -- the second group is #2. */ 3215 as in `(ab)c(de)' -- the second group is #2. */
3208 regnum_t this_group_regnum; 3216 regnum_t this_group_regnum;
3209 3217
3210 compile_stack.avail--; 3218 compile_stack.avail--;
3211 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; 3219 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
3212 fixup_alt_jump 3220 fixup_alt_jump
3219 won't actually generate any code, so we'll have to 3227 won't actually generate any code, so we'll have to
3220 clear pending_exact explicitly. */ 3228 clear pending_exact explicitly. */
3221 pending_exact = 0; 3229 pending_exact = 0;
3222 3230
3223 /* We're at the end of the group, so now we know how many 3231 /* We're at the end of the group, so now we know how many
3224 groups were inside this one. */ 3232 groups were inside this one. */
3225 if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0) 3233 if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0)
3226 BUF_PUSH_2 (stop_memory, this_group_regnum); 3234 BUF_PUSH_2 (stop_memory, this_group_regnum);
3227 } 3235 }
3228 break; 3236 break;
3229 3237
3234 handle_alt: 3242 handle_alt:
3235 if (syntax & RE_LIMITED_OPS) 3243 if (syntax & RE_LIMITED_OPS)
3236 goto normal_char; 3244 goto normal_char;
3237 3245
3238 /* Insert before the previous alternative a jump which 3246 /* Insert before the previous alternative a jump which
3239 jumps to this alternative if the former fails. */ 3247 jumps to this alternative if the former fails. */
3240 GET_BUFFER_SPACE (3); 3248 GET_BUFFER_SPACE (3);
3241 INSERT_JUMP (on_failure_jump, begalt, b + 6); 3249 INSERT_JUMP (on_failure_jump, begalt, b + 6);
3242 pending_exact = 0; 3250 pending_exact = 0;
3243 b += 3; 3251 b += 3;
3244 3252
3371 b + 5 + nbytes, 3379 b + 5 + nbytes,
3372 lower_bound); 3380 lower_bound);
3373 b += 5; 3381 b += 5;
3374 3382
3375 /* Code to initialize the lower bound. Insert 3383 /* Code to initialize the lower bound. Insert
3376 before the `succeed_n'. The `5' is the last two 3384 before the `succeed_n'. The `5' is the last two
3377 bytes of this `set_number_at', plus 3 bytes of 3385 bytes of this `set_number_at', plus 3 bytes of
3378 the following `succeed_n'. */ 3386 the following `succeed_n'. */
3379 insert_op2 (set_number_at, laststart, 5, lower_bound, b); 3387 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
3380 b += 5; 3388 b += 5;
3381 startoffset += 5; 3389 startoffset += 5;
3441 else 3449 else
3442 goto normal_char; 3450 goto normal_char;
3443 3451
3444 #ifdef emacs 3452 #ifdef emacs
3445 /* There is no way to specify the before_dot and after_dot 3453 /* There is no way to specify the before_dot and after_dot
3446 operators. rms says this is ok. --karl */ 3454 operators. rms says this is ok. --karl */
3447 case '=': 3455 case '=':
3448 BUF_PUSH (at_dot); 3456 BUF_PUSH (at_dot);
3449 break; 3457 break;
3450 3458
3451 case 's': 3459 case 's':
3586 || pending_exact + *pending_exact + 1 != b 3594 || pending_exact + *pending_exact + 1 != b
3587 3595
3588 /* We have only one byte following the exactn for the count. */ 3596 /* We have only one byte following the exactn for the count. */
3589 || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH 3597 || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
3590 3598
3591 /* If followed by a repetition operator. */ 3599 /* If followed by a repetition operator. */
3592 || (p != pend && (*p == '*' || *p == '^')) 3600 || (p != pend && (*p == '*' || *p == '^'))
3593 || ((syntax & RE_BK_PLUS_QM) 3601 || ((syntax & RE_BK_PLUS_QM)
3594 ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') 3602 ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
3595 : p != pend && (*p == '+' || *p == '?')) 3603 : p != pend && (*p == '+' || *p == '?'))
3596 || ((syntax & RE_INTERVALS) 3604 || ((syntax & RE_INTERVALS)
3678 FREE_STACK_RETURN (REG_NOERROR); 3686 FREE_STACK_RETURN (REG_NOERROR);
3679 } /* regex_compile */ 3687 } /* regex_compile */
3680 3688
3681 /* Subroutines for `regex_compile'. */ 3689 /* Subroutines for `regex_compile'. */
3682 3690
3683 /* Store OP at LOC followed by two-byte integer parameter ARG. */ 3691 /* Store OP at LOC followed by two-byte integer parameter ARG. */
3684 3692
3685 static void 3693 static void
3686 store_op1 (op, loc, arg) 3694 store_op1 (op, loc, arg)
3687 re_opcode_t op; 3695 re_opcode_t op;
3688 unsigned char *loc; 3696 unsigned char *loc;
3830 { 3838 {
3831 int j, k; 3839 int j, k;
3832 boolean not; 3840 boolean not;
3833 3841
3834 /* If all elements for base leading-codes in fastmap is set, this 3842 /* If all elements for base leading-codes in fastmap is set, this
3835 flag is set true. */ 3843 flag is set true. */
3836 boolean match_any_multibyte_characters = false; 3844 boolean match_any_multibyte_characters = false;
3837 3845
3838 assert (p); 3846 assert (p);
3839 3847
3840 /* The loop below works as follows: 3848 /* The loop below works as follows:
3878 p++; 3886 p++;
3879 continue; 3887 continue;
3880 3888
3881 3889
3882 /* Following are the cases which match a character. These end 3890 /* Following are the cases which match a character. These end
3883 with `break'. */ 3891 with `break'. */
3884 3892
3885 case exactn: 3893 case exactn:
3886 if (fastmap) 3894 if (fastmap)
3887 { 3895 {
3888 int c = RE_STRING_CHAR (p + 1, pend - p); 3896 int c = RE_STRING_CHAR (p + 1, pend - p);
4100 abort (); /* We have listed all the cases. */ 4108 abort (); /* We have listed all the cases. */
4101 } /* switch *p++ */ 4109 } /* switch *p++ */
4102 4110
4103 /* Getting here means we have found the possible starting 4111 /* Getting here means we have found the possible starting
4104 characters for one path of the pattern -- and that the empty 4112 characters for one path of the pattern -- and that the empty
4105 string does not match. We need not follow this path further. */ 4113 string does not match. We need not follow this path further. */
4106 return 0; 4114 return 0;
4107 } /* while p */ 4115 } /* while p */
4108 4116
4109 /* We reached the end without matching anything. */ 4117 /* We reached the end without matching anything. */
4110 return 1; 4118 return 1;
4135 char *fastmap = bufp->fastmap; 4143 char *fastmap = bufp->fastmap;
4136 int analysis; 4144 int analysis;
4137 4145
4138 assert (fastmap && bufp->buffer); 4146 assert (fastmap && bufp->buffer);
4139 4147
4140 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 4148 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
4141 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 4149 bufp->fastmap_accurate = 1; /* It will be when we're done. */
4142 4150
4143 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, 4151 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
4144 fastmap, RE_MULTIBYTE_P (bufp)); 4152 fastmap, RE_MULTIBYTE_P (bufp));
4145 bufp->can_be_null = (analysis != 0); 4153 bufp->can_be_null = (analysis != 0);
4180 regs->start = regs->end = (regoff_t *) 0; 4188 regs->start = regs->end = (regoff_t *) 0;
4181 } 4189 }
4182 } 4190 }
4183 WEAK_ALIAS (__re_set_registers, re_set_registers) 4191 WEAK_ALIAS (__re_set_registers, re_set_registers)
4184 4192
4185 /* Searching routines. */ 4193 /* Searching routines. */
4186 4194
4187 /* Like re_search_2, below, but only one string is specified, and 4195 /* Like re_search_2, below, but only one string is specified, and
4188 doesn't let you say where to stop matching. */ 4196 doesn't let you say where to stop matching. */
4189 4197
4190 int 4198 int
4249 register RE_TRANSLATE_TYPE translate = bufp->translate; 4257 register RE_TRANSLATE_TYPE translate = bufp->translate;
4250 int total_size = size1 + size2; 4258 int total_size = size1 + size2;
4251 int endpos = startpos + range; 4259 int endpos = startpos + range;
4252 boolean anchored_start; 4260 boolean anchored_start;
4253 4261
4254 /* Nonzero if we have to concern multibyte character. */ 4262 /* Nonzero if we have to concern multibyte character. */
4255 const boolean multibyte = RE_MULTIBYTE_P (bufp); 4263 const boolean multibyte = RE_MULTIBYTE_P (bufp);
4256 4264
4257 /* Check for out-of-range STARTPOS. */ 4265 /* Check for out-of-range STARTPOS. */
4258 if (startpos < 0 || startpos > total_size) 4266 if (startpos < 0 || startpos > total_size)
4259 return -1; 4267 return -1;
4319 } 4327 }
4320 4328
4321 /* If a fastmap is supplied, skip quickly over characters that 4329 /* If a fastmap is supplied, skip quickly over characters that
4322 cannot be the start of a match. If the pattern can match the 4330 cannot be the start of a match. If the pattern can match the
4323 null string, however, we don't need to skip characters; we want 4331 null string, however, we don't need to skip characters; we want
4324 the first null string. */ 4332 the first null string. */
4325 if (fastmap && startpos < total_size && !bufp->can_be_null) 4333 if (fastmap && startpos < total_size && !bufp->can_be_null)
4326 { 4334 {
4327 register re_char *d; 4335 register re_char *d;
4328 register re_wchar_t buf_ch; 4336 register re_wchar_t buf_ch;
4329 4337
4330 d = POS_ADDR_VSTRING (startpos); 4338 d = POS_ADDR_VSTRING (startpos);
4331 4339
4332 if (range > 0) /* Searching forwards. */ 4340 if (range > 0) /* Searching forwards. */
4333 { 4341 {
4334 register int lim = 0; 4342 register int lim = 0;
4335 int irange = range; 4343 int irange = range;
4336 4344
4337 if (startpos < size1 && startpos + range >= size1) 4345 if (startpos < size1 && startpos + range >= size1)
4401 break; 4409 break;
4402 } while (1); 4410 } while (1);
4403 4411
4404 startpos += irange - range; 4412 startpos += irange - range;
4405 } 4413 }
4406 else /* Searching backwards. */ 4414 else /* Searching backwards. */
4407 { 4415 {
4408 int room = (startpos >= size1 4416 int room = (startpos >= size1
4409 ? size2 + size1 - startpos 4417 ? size2 + size1 - startpos
4410 : size1 - startpos); 4418 : size1 - startpos);
4411 buf_ch = RE_STRING_CHAR (d, room); 4419 buf_ch = RE_STRING_CHAR (d, room);
4524 d = string2; \ 4532 d = string2; \
4525 dend = end_match_2; \ 4533 dend = end_match_2; \
4526 } \ 4534 } \
4527 4535
4528 /* Test if at very beginning or at very end of the virtual concatenation 4536 /* Test if at very beginning or at very end of the virtual concatenation
4529 of `string1' and `string2'. If only one string, it's `string2'. */ 4537 of `string1' and `string2'. If only one string, it's `string2'. */
4530 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 4538 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4531 #define AT_STRINGS_END(d) ((d) == end2) 4539 #define AT_STRINGS_END(d) ((d) == end2)
4532 4540
4533 4541
4534 /* Test if D points to a character which is word-constituent. We have 4542 /* Test if D points to a character which is word-constituent. We have
4545 /* The comment at case wordbound is following one, but we don't use 4553 /* The comment at case wordbound is following one, but we don't use
4546 AT_WORD_BOUNDARY anymore to support multibyte form. 4554 AT_WORD_BOUNDARY anymore to support multibyte form.
4547 4555
4548 The DEC Alpha C compiler 3.x generates incorrect code for the 4556 The DEC Alpha C compiler 3.x generates incorrect code for the
4549 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of 4557 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
4550 AT_WORD_BOUNDARY, so this code is disabled. Expanding the 4558 AT_WORD_BOUNDARY, so this code is disabled. Expanding the
4551 macro and introducing temporary variables works around the bug. */ 4559 macro and introducing temporary variables works around the bug. */
4552 4560
4553 #if 0 4561 #if 0
4554 /* Test if the character before D and the one at D differ with respect 4562 /* Test if the character before D and the one at D differ with respect
4555 to being word-constituent. */ 4563 to being word-constituent. */
4747 such case. */ 4755 such case. */
4748 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2)) 4756 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
4749 { 4757 {
4750 /* Now, we are sure that P2 has no range table. 4758 /* Now, we are sure that P2 has no range table.
4751 So, for the size of bitmap in P2, `p2[1]' is 4759 So, for the size of bitmap in P2, `p2[1]' is
4752 enough. But P1 may have range table, so the 4760 enough. But P1 may have range table, so the
4753 size of bitmap table of P1 is extracted by 4761 size of bitmap table of P1 is extracted by
4754 using macro `CHARSET_BITMAP_SIZE'. 4762 using macro `CHARSET_BITMAP_SIZE'.
4755 4763
4756 Since we know that all the character listed in 4764 Since we know that all the character listed in
4757 P2 is ASCII, it is enough to test only bitmap 4765 P2 is ASCII, it is enough to test only bitmap
4778 } 4786 }
4779 else if ((re_opcode_t) *p1 == charset_not) 4787 else if ((re_opcode_t) *p1 == charset_not)
4780 { 4788 {
4781 int idx; 4789 int idx;
4782 /* We win if the charset_not inside the loop lists 4790 /* We win if the charset_not inside the loop lists
4783 every character listed in the charset after. */ 4791 every character listed in the charset after. */
4784 for (idx = 0; idx < (int) p2[1]; idx++) 4792 for (idx = 0; idx < (int) p2[1]; idx++)
4785 if (! (p2[2 + idx] == 0 4793 if (! (p2[2 + idx] == 0
4786 || (idx < CHARSET_BITMAP_SIZE (p1) 4794 || (idx < CHARSET_BITMAP_SIZE (p1)
4787 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0)))) 4795 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
4788 break; 4796 break;
4872 WEAK_ALIAS (__re_match, re_match) 4880 WEAK_ALIAS (__re_match, re_match)
4873 #endif /* not emacs */ 4881 #endif /* not emacs */
4874 4882
4875 #ifdef emacs 4883 #ifdef emacs
4876 /* In Emacs, this is the string or buffer in which we 4884 /* In Emacs, this is the string or buffer in which we
4877 are matching. It is used for looking up syntax properties. */ 4885 are matching. It is used for looking up syntax properties. */
4878 Lisp_Object re_match_object; 4886 Lisp_Object re_match_object;
4879 #endif 4887 #endif
4880 4888
4881 /* re_match_2 matches the compiled pattern in BUFP against the 4889 /* re_match_2 matches the compiled pattern in BUFP against the
4882 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 4890 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4883 and SIZE2, respectively). We start matching at POS, and stop 4891 and SIZE2, respectively). We start matching at POS, and stop
4884 matching at STOP. 4892 matching at STOP.
4885 4893
4886 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we 4894 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4887 store offsets for the substring each group matched in REGS. See the 4895 store offsets for the substring each group matched in REGS. See the
4888 documentation for exactly how many groups we fill. 4896 documentation for exactly how many groups we fill.
4889 4897
4890 We return -1 if no match, -2 if an internal error (such as the 4898 We return -1 if no match, -2 if an internal error (such as the
4891 failure stack overflowing). Otherwise, we return the length of the 4899 failure stack overflowing). Otherwise, we return the length of the
4892 matched substring. */ 4900 matched substring. */
4893 4901
4894 int 4902 int
4895 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 4903 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4896 struct re_pattern_buffer *bufp; 4904 struct re_pattern_buffer *bufp;
4918 return result; 4926 return result;
4919 } 4927 }
4920 WEAK_ALIAS (__re_match_2, re_match_2) 4928 WEAK_ALIAS (__re_match_2, re_match_2)
4921 4929
4922 /* This is a separate function so that we can force an alloca cleanup 4930 /* This is a separate function so that we can force an alloca cleanup
4923 afterwards. */ 4931 afterwards. */
4924 static int 4932 static int
4925 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) 4933 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4926 struct re_pattern_buffer *bufp; 4934 struct re_pattern_buffer *bufp;
4927 re_char *string1, *string2; 4935 re_char *string1, *string2;
4928 int size1, size2; 4936 int size1, size2;
4937 4945
4938 /* Just past the end of the corresponding string. */ 4946 /* Just past the end of the corresponding string. */
4939 re_char *end1, *end2; 4947 re_char *end1, *end2;
4940 4948
4941 /* Pointers into string1 and string2, just past the last characters in 4949 /* Pointers into string1 and string2, just past the last characters in
4942 each to consider matching. */ 4950 each to consider matching. */
4943 re_char *end_match_1, *end_match_2; 4951 re_char *end_match_1, *end_match_2;
4944 4952
4945 /* Where we are in the data, and the end of the current string. */ 4953 /* Where we are in the data, and the end of the current string. */
4946 re_char *d, *dend; 4954 re_char *d, *dend;
4947 4955
4953 4961
4954 /* Where we are in the pattern, and the end of the pattern. */ 4962 /* Where we are in the pattern, and the end of the pattern. */
4955 re_char *p = bufp->buffer; 4963 re_char *p = bufp->buffer;
4956 re_char *pend = p + bufp->used; 4964 re_char *pend = p + bufp->used;
4957 4965
4958 /* We use this to map every character in the string. */ 4966 /* We use this to map every character in the string. */
4959 RE_TRANSLATE_TYPE translate = bufp->translate; 4967 RE_TRANSLATE_TYPE translate = bufp->translate;
4960 4968
4961 /* Nonzero if we have to concern multibyte character. */ 4969 /* Nonzero if we have to concern multibyte character. */
4962 const boolean multibyte = RE_MULTIBYTE_P (bufp); 4970 const boolean multibyte = RE_MULTIBYTE_P (bufp);
4963 4971
4964 /* Failure point stack. Each place that can handle a failure further 4972 /* Failure point stack. Each place that can handle a failure further
4965 down the line pushes a failure point on this stack. It consists of 4973 down the line pushes a failure point on this stack. It consists of
4966 regstart, and regend for all registers corresponding to 4974 regstart, and regend for all registers corresponding to
4967 the subexpressions we're currently inside, plus the number of such 4975 the subexpressions we're currently inside, plus the number of such
4968 registers, and, finally, two char *'s. The first char * is where 4976 registers, and, finally, two char *'s. The first char * is where
4969 to resume scanning the pattern; the second one is where to resume 4977 to resume scanning the pattern; the second one is where to resume
4970 scanning the strings. */ 4978 scanning the strings. */
4971 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 4979 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4972 fail_stack_type fail_stack; 4980 fail_stack_type fail_stack;
4973 #endif 4981 #endif
4974 #ifdef DEBUG 4982 #ifdef DEBUG
4975 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 4983 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4976 #endif 4984 #endif
4980 it is allocated relocatably. */ 4988 it is allocated relocatably. */
4981 fail_stack_elt_t *failure_stack_ptr; 4989 fail_stack_elt_t *failure_stack_ptr;
4982 #endif 4990 #endif
4983 4991
4984 /* We fill all the registers internally, independent of what we 4992 /* We fill all the registers internally, independent of what we
4985 return, for use in backreferences. The number here includes 4993 return, for use in backreferences. The number here includes
4986 an element for register zero. */ 4994 an element for register zero. */
4987 size_t num_regs = bufp->re_nsub + 1; 4995 size_t num_regs = bufp->re_nsub + 1;
4988 4996
4989 /* Information on the contents of registers. These are pointers into 4997 /* Information on the contents of registers. These are pointers into
4990 the input strings; they record just what was matched (on this 4998 the input strings; they record just what was matched (on this
5006 re_char **best_regstart, **best_regend; 5014 re_char **best_regstart, **best_regend;
5007 #endif 5015 #endif
5008 5016
5009 /* Logically, this is `best_regend[0]'. But we don't want to have to 5017 /* Logically, this is `best_regend[0]'. But we don't want to have to
5010 allocate space for that if we're not allocating space for anything 5018 allocate space for that if we're not allocating space for anything
5011 else (see below). Also, we never need info about register 0 for 5019 else (see below). Also, we never need info about register 0 for
5012 any of the other register vectors, and it seems rather a kludge to 5020 any of the other register vectors, and it seems rather a kludge to
5013 treat `best_regend' differently than the rest. So we keep track of 5021 treat `best_regend' differently than the rest. So we keep track of
5014 the end of the best match so far in a separate variable. We 5022 the end of the best match so far in a separate variable. We
5015 initialize this to NULL so that when we backtrack the first time 5023 initialize this to NULL so that when we backtrack the first time
5016 and need to test it, it's not garbage. */ 5024 and need to test it, it's not garbage. */
5064 register information struct. */ 5072 register information struct. */
5065 for (reg = 1; reg < num_regs; reg++) 5073 for (reg = 1; reg < num_regs; reg++)
5066 regstart[reg] = regend[reg] = NULL; 5074 regstart[reg] = regend[reg] = NULL;
5067 5075
5068 /* We move `string1' into `string2' if the latter's empty -- but not if 5076 /* We move `string1' into `string2' if the latter's empty -- but not if
5069 `string1' is null. */ 5077 `string1' is null. */
5070 if (size2 == 0 && string1 != NULL) 5078 if (size2 == 0 && string1 != NULL)
5071 { 5079 {
5072 string2 = string1; 5080 string2 = string1;
5073 size2 = size1; 5081 size2 = size1;
5074 string1 = 0; 5082 string1 = 0;
5121 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 5129 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
5122 DEBUG_PRINT1 ("The string to match is: `"); 5130 DEBUG_PRINT1 ("The string to match is: `");
5123 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 5131 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
5124 DEBUG_PRINT1 ("'\n"); 5132 DEBUG_PRINT1 ("'\n");
5125 5133
5126 /* This loops over pattern commands. It exits by returning from the 5134 /* This loops over pattern commands. It exits by returning from the
5127 function if the match is complete, or it drops through if the match 5135 function if the match is complete, or it drops through if the match
5128 fails at this starting point in the input data. */ 5136 fails at this starting point in the input data. */
5129 for (;;) 5137 for (;;)
5130 { 5138 {
5131 DEBUG_PRINT2 ("\n%p: ", p); 5139 DEBUG_PRINT2 ("\n%p: ", p);
5144 == FIRST_STRING_P (d)); 5152 == FIRST_STRING_P (d));
5145 /* 1 if this match is the best seen so far. */ 5153 /* 1 if this match is the best seen so far. */
5146 boolean best_match_p; 5154 boolean best_match_p;
5147 5155
5148 /* AIX compiler got confused when this was combined 5156 /* AIX compiler got confused when this was combined
5149 with the previous declaration. */ 5157 with the previous declaration. */
5150 if (same_str_p) 5158 if (same_str_p)
5151 best_match_p = d > match_end; 5159 best_match_p = d > match_end;
5152 else 5160 else
5153 best_match_p = !FIRST_STRING_P (d); 5161 best_match_p = !FIRST_STRING_P (d);
5154 5162
5182 restore_best_regs: 5190 restore_best_regs:
5183 /* Restore best match. It may happen that `dend == 5191 /* Restore best match. It may happen that `dend ==
5184 end_match_1' while the restored d is in string2. 5192 end_match_1' while the restored d is in string2.
5185 For example, the pattern `x.*y.*z' against the 5193 For example, the pattern `x.*y.*z' against the
5186 strings `x-' and `y-z-', if the two strings are 5194 strings `x-' and `y-z-', if the two strings are
5187 not consecutive in memory. */ 5195 not consecutive in memory. */
5188 DEBUG_PRINT1 ("Restoring best registers.\n"); 5196 DEBUG_PRINT1 ("Restoring best registers.\n");
5189 5197
5190 d = match_end; 5198 d = match_end;
5191 dend = ((d >= string1 && d <= end1) 5199 dend = ((d >= string1 && d <= end1)
5192 ? end_match_1 : end_match_2); 5200 ? end_match_1 : end_match_2);
5205 /* If caller wants register contents data back, do it. */ 5213 /* If caller wants register contents data back, do it. */
5206 if (regs && !bufp->no_sub) 5214 if (regs && !bufp->no_sub)
5207 { 5215 {
5208 /* Have the register data arrays been allocated? */ 5216 /* Have the register data arrays been allocated? */
5209 if (bufp->regs_allocated == REGS_UNALLOCATED) 5217 if (bufp->regs_allocated == REGS_UNALLOCATED)
5210 { /* No. So allocate them with malloc. We need one 5218 { /* No. So allocate them with malloc. We need one
5211 extra element beyond `num_regs' for the `-1' marker 5219 extra element beyond `num_regs' for the `-1' marker
5212 GNU code uses. */ 5220 GNU code uses. */
5213 regs->num_regs = MAX (RE_NREGS, num_regs + 1); 5221 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
5214 regs->start = TALLOC (regs->num_regs, regoff_t); 5222 regs->start = TALLOC (regs->num_regs, regoff_t);
5215 regs->end = TALLOC (regs->num_regs, regoff_t); 5223 regs->end = TALLOC (regs->num_regs, regoff_t);
5237 } 5245 }
5238 } 5246 }
5239 else 5247 else
5240 { 5248 {
5241 /* These braces fend off a "empty body in an else-statement" 5249 /* These braces fend off a "empty body in an else-statement"
5242 warning under GCC when assert expands to nothing. */ 5250 warning under GCC when assert expands to nothing. */
5243 assert (bufp->regs_allocated == REGS_FIXED); 5251 assert (bufp->regs_allocated == REGS_FIXED);
5244 } 5252 }
5245 5253
5246 /* Convert the pointer data in `regstart' and `regend' to 5254 /* Convert the pointer data in `regstart' and `regend' to
5247 indices. Register zero has to be set differently, 5255 indices. Register zero has to be set differently,
5269 5277
5270 /* If the regs structure we return has more elements than 5278 /* If the regs structure we return has more elements than
5271 were in the pattern, set the extra elements to -1. If 5279 were in the pattern, set the extra elements to -1. If
5272 we (re)allocated the registers, this is the case, 5280 we (re)allocated the registers, this is the case,
5273 because we always allocate enough to have at least one 5281 because we always allocate enough to have at least one
5274 -1 at the end. */ 5282 -1 at the end. */
5275 for (reg = num_regs; reg < regs->num_regs; reg++) 5283 for (reg = num_regs; reg < regs->num_regs; reg++)
5276 regs->start[reg] = regs->end[reg] = -1; 5284 regs->start[reg] = regs->end[reg] = -1;
5277 } /* regs && !bufp->no_sub */ 5285 } /* regs && !bufp->no_sub */
5278 5286
5279 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 5287 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
5287 5295
5288 FREE_VARIABLES (); 5296 FREE_VARIABLES ();
5289 return mcnt; 5297 return mcnt;
5290 } 5298 }
5291 5299
5292 /* Otherwise match next pattern command. */ 5300 /* Otherwise match next pattern command. */
5293 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 5301 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
5294 { 5302 {
5295 /* Ignore these. Used to ignore the n of succeed_n's which 5303 /* Ignore these. Used to ignore the n of succeed_n's which
5296 currently have n == 0. */ 5304 currently have n == 0. */
5297 case no_op: 5305 case no_op:
5300 5308
5301 case succeed: 5309 case succeed:
5302 DEBUG_PRINT1 ("EXECUTING succeed.\n"); 5310 DEBUG_PRINT1 ("EXECUTING succeed.\n");
5303 goto succeed_label; 5311 goto succeed_label;
5304 5312
5305 /* Match the next n pattern characters exactly. The following 5313 /* Match the next n pattern characters exactly. The following
5306 byte in the pattern defines n, and the n bytes after that 5314 byte in the pattern defines n, and the n bytes after that
5307 are the characters to match. */ 5315 are the characters to match. */
5308 case exactn: 5316 case exactn:
5309 mcnt = *p++; 5317 mcnt = *p++;
5310 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 5318 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
5311 5319
5312 /* Remember the start point to rollback upon failure. */ 5320 /* Remember the start point to rollback upon failure. */
5466 5474
5467 5475
5468 /* The beginning of a group is represented by start_memory. 5476 /* The beginning of a group is represented by start_memory.
5469 The argument is the register number. The text 5477 The argument is the register number. The text
5470 matched within the group is recorded (in the internal 5478 matched within the group is recorded (in the internal
5471 registers data structure) under the register number. */ 5479 registers data structure) under the register number. */
5472 case start_memory: 5480 case start_memory:
5473 DEBUG_PRINT2 ("EXECUTING start_memory %d:\n", *p); 5481 DEBUG_PRINT2 ("EXECUTING start_memory %d:\n", *p);
5474 5482
5475 /* In case we need to undo this operation (via backtracking). */ 5483 /* In case we need to undo this operation (via backtracking). */
5476 PUSH_FAILURE_REG ((unsigned int)*p); 5484 PUSH_FAILURE_REG ((unsigned int)*p);
5515 /* \<digit> has been turned into a `duplicate' command which is 5523 /* \<digit> has been turned into a `duplicate' command which is
5516 followed by the numeric value of <digit> as the register number. */ 5524 followed by the numeric value of <digit> as the register number. */
5517 case duplicate: 5525 case duplicate:
5518 { 5526 {
5519 register re_char *d2, *dend2; 5527 register re_char *d2, *dend2;
5520 int regno = *p++; /* Get which register to match against. */ 5528 int regno = *p++; /* Get which register to match against. */
5521 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 5529 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
5522 5530
5523 /* Can't back reference a group which we've never matched. */ 5531 /* Can't back reference a group which we've never matched. */
5524 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 5532 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
5525 goto fail; 5533 goto fail;
5526 5534
5527 /* Where in input to try to start matching. */ 5535 /* Where in input to try to start matching. */
5528 d2 = regstart[regno]; 5536 d2 = regstart[regno];
5529 5537
5530 /* Remember the start point to rollback upon failure. */ 5538 /* Remember the start point to rollback upon failure. */
5531 dfail = d; 5539 dfail = d;
5532 5540
5635 5643
5636 /* on_failure_keep_string_jump is used to optimize `.*\n'. It 5644 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
5637 pushes NULL as the value for the string on the stack. Then 5645 pushes NULL as the value for the string on the stack. Then
5638 `POP_FAILURE_POINT' will keep the current value for the 5646 `POP_FAILURE_POINT' will keep the current value for the
5639 string, instead of restoring it. To see why, consider 5647 string, instead of restoring it. To see why, consider
5640 matching `foo\nbar' against `.*\n'. The .* matches the foo; 5648 matching `foo\nbar' against `.*\n'. The .* matches the foo;
5641 then the . fails against the \n. But the next thing we want 5649 then the . fails against the \n. But the next thing we want
5642 to do is match the \n against the \n; if we restored the 5650 to do is match the \n against the \n; if we restored the
5643 string value, we would be back at the foo. 5651 string value, we would be back at the foo.
5644 5652
5645 Because this is used only in specific cases, we don't need to 5653 Because this is used only in specific cases, we don't need to
5780 case jump: 5788 case jump:
5781 unconditional_jump: 5789 unconditional_jump:
5782 IMMEDIATE_QUIT_CHECK; 5790 IMMEDIATE_QUIT_CHECK;
5783 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 5791 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
5784 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 5792 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5785 p += mcnt; /* Do the jump. */ 5793 p += mcnt; /* Do the jump. */
5786 DEBUG_PRINT2 ("(to %p).\n", p); 5794 DEBUG_PRINT2 ("(to %p).\n", p);
5787 break; 5795 break;
5788 5796
5789 5797
5790 /* Have to succeed matching what follows at least n times. 5798 /* Have to succeed matching what follows at least n times.
5873 s2 = SYNTAX (c2); 5881 s2 = SYNTAX (c2);
5874 5882
5875 if (/* Case 2: Only one of S1 and S2 is Sword. */ 5883 if (/* Case 2: Only one of S1 and S2 is Sword. */
5876 ((s1 == Sword) != (s2 == Sword)) 5884 ((s1 == Sword) != (s2 == Sword))
5877 /* Case 3: Both of S1 and S2 are Sword, and macro 5885 /* Case 3: Both of S1 and S2 are Sword, and macro
5878 WORD_BOUNDARY_P (C1, C2) returns nonzero. */ 5886 WORD_BOUNDARY_P (C1, C2) returns nonzero. */
5879 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) 5887 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5880 not = !not; 5888 not = !not;
5881 } 5889 }
5882 if (not) 5890 if (not)
5883 break; 5891 break;
5887 case wordbeg: 5895 case wordbeg:
5888 DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 5896 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5889 5897
5890 /* We FAIL in one of the following cases: */ 5898 /* We FAIL in one of the following cases: */
5891 5899
5892 /* Case 1: D is at the end of string. */ 5900 /* Case 1: D is at the end of string. */
5893 if (AT_STRINGS_END (d)) 5901 if (AT_STRINGS_END (d))
5894 goto fail; 5902 goto fail;
5895 else 5903 else
5896 { 5904 {
5897 /* C1 is the character before D, S1 is the syntax of C1, C2 5905 /* C1 is the character before D, S1 is the syntax of C1, C2
5919 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); 5927 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
5920 #endif 5928 #endif
5921 s1 = SYNTAX (c1); 5929 s1 = SYNTAX (c1);
5922 5930
5923 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2) 5931 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
5924 returns 0. */ 5932 returns 0. */
5925 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2)) 5933 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5926 goto fail; 5934 goto fail;
5927 } 5935 }
5928 } 5936 }
5929 break; 5937 break;
5963 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); 5971 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5964 #endif 5972 #endif
5965 s2 = SYNTAX (c2); 5973 s2 = SYNTAX (c2);
5966 5974
5967 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2) 5975 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
5968 returns 0. */ 5976 returns 0. */
5969 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2)) 5977 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5970 goto fail; 5978 goto fail;
5971 } 5979 }
5972 } 5980 }
5973 break; 5981 break;
5975 case symbeg: 5983 case symbeg:
5976 DEBUG_PRINT1 ("EXECUTING symbeg.\n"); 5984 DEBUG_PRINT1 ("EXECUTING symbeg.\n");
5977 5985
5978 /* We FAIL in one of the following cases: */ 5986 /* We FAIL in one of the following cases: */
5979 5987
5980 /* Case 1: D is at the end of string. */ 5988 /* Case 1: D is at the end of string. */
5981 if (AT_STRINGS_END (d)) 5989 if (AT_STRINGS_END (d))
5982 goto fail; 5990 goto fail;
5983 else 5991 else
5984 { 5992 {
5985 /* C1 is the character before D, S1 is the syntax of C1, C2 5993 /* C1 is the character before D, S1 is the syntax of C1, C2
5992 UPDATE_SYNTAX_TABLE (charpos); 6000 UPDATE_SYNTAX_TABLE (charpos);
5993 #endif 6001 #endif
5994 PREFETCH (); 6002 PREFETCH ();
5995 c2 = RE_STRING_CHAR (d, dend - d); 6003 c2 = RE_STRING_CHAR (d, dend - d);
5996 s2 = SYNTAX (c2); 6004 s2 = SYNTAX (c2);
5997 6005
5998 /* Case 2: S2 is neither Sword nor Ssymbol. */ 6006 /* Case 2: S2 is neither Sword nor Ssymbol. */
5999 if (s2 != Sword && s2 != Ssymbol) 6007 if (s2 != Sword && s2 != Ssymbol)
6000 goto fail; 6008 goto fail;
6001 6009
6002 /* Case 3: D is not at the beginning of string ... */ 6010 /* Case 3: D is not at the beginning of string ... */