Mercurial > emacs
comparison src/regex.c @ 13565:c66885b6330c
(gettext_noop): New macro, identity fn.
(re_error_msgid): Mark strings with that, to ease message catalog creation.
author | Roland McGrath <roland@gnu.org> |
---|---|
date | Thu, 16 Nov 1995 02:51:19 +0000 |
parents | e50cebfd1d7a |
children | e2669b8a46e2 |
comparison
equal
deleted
inserted
replaced
13564:8238a4262b6d | 13565:c66885b6330c |
---|---|
37 /* This is for other GNU distributions with internationalized messages. */ | 37 /* This is for other GNU distributions with internationalized messages. */ |
38 #if HAVE_LIBINTL_H || defined (_LIBC) | 38 #if HAVE_LIBINTL_H || defined (_LIBC) |
39 # include <libintl.h> | 39 # include <libintl.h> |
40 #else | 40 #else |
41 # define gettext(msgid) (msgid) | 41 # define gettext(msgid) (msgid) |
42 #endif | |
43 | |
44 #ifndef gettext_noop | |
45 /* This define is so xgettext can find the internationalizable | |
46 strings. */ | |
47 #define gettext_noop(String) String | |
42 #endif | 48 #endif |
43 | 49 |
44 /* The `emacs' switch turns on certain matching commands | 50 /* The `emacs' switch turns on certain matching commands |
45 that make sense only in Emacs. */ | 51 that make sense only in Emacs. */ |
46 #ifdef emacs | 52 #ifdef emacs |
95 | 101 |
96 /* Define the syntax stuff for \<, \>, etc. */ | 102 /* Define the syntax stuff for \<, \>, etc. */ |
97 | 103 |
98 /* This must be nonzero for the wordchar and notwordchar pattern | 104 /* This must be nonzero for the wordchar and notwordchar pattern |
99 commands in re_match_2. */ | 105 commands in re_match_2. */ |
100 #ifndef Sword | 106 #ifndef Sword |
101 #define Sword 1 | 107 #define Sword 1 |
102 #endif | 108 #endif |
103 | 109 |
104 #ifdef SWITCH_ENUM_BUG | 110 #ifdef SWITCH_ENUM_BUG |
105 #define SWITCH_ENUM_CAST(x) ((int)(x)) | 111 #define SWITCH_ENUM_CAST(x) ((int)(x)) |
212 | 218 |
213 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we | 219 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we |
214 use `alloca' instead of `malloc'. This is because using malloc in | 220 use `alloca' instead of `malloc'. This is because using malloc in |
215 re_search* or re_match* could cause memory leaks when C-g is used in | 221 re_search* or re_match* could cause memory leaks when C-g is used in |
216 Emacs; also, malloc is slower and causes storage fragmentation. On | 222 Emacs; also, malloc is slower and causes storage fragmentation. On |
217 the other hand, malloc is more portable, and easier to debug. | 223 the other hand, malloc is more portable, and easier to debug. |
218 | 224 |
219 Because we sometimes use alloca, some routines have to be macros, | 225 Because we sometimes use alloca, some routines have to be macros, |
220 not functions -- `alloca'-allocated space disappears at the end of the | 226 not functions -- `alloca'-allocated space disappears at the end of the |
221 function it is called in. */ | 227 function it is called in. */ |
222 | 228 |
223 #ifdef REGEX_MALLOC | 229 #ifdef REGEX_MALLOC |
241 #if 0 /* It is a bad idea to declare alloca. We always cast the result. */ | 247 #if 0 /* It is a bad idea to declare alloca. We always cast the result. */ |
242 #ifndef _AIX /* Already did AIX, up at the top. */ | 248 #ifndef _AIX /* Already did AIX, up at the top. */ |
243 char *alloca (); | 249 char *alloca (); |
244 #endif /* not _AIX */ | 250 #endif /* not _AIX */ |
245 #endif | 251 #endif |
246 #endif /* not HAVE_ALLOCA_H */ | 252 #endif /* not HAVE_ALLOCA_H */ |
247 #endif /* not __GNUC__ */ | 253 #endif /* not __GNUC__ */ |
248 | 254 |
249 #endif /* not alloca */ | 255 #endif /* not alloca */ |
250 | 256 |
251 #define REGEX_ALLOCATE alloca | 257 #define REGEX_ALLOCATE alloca |
383 of string to be matched (if not). */ | 389 of string to be matched (if not). */ |
384 begbuf, | 390 begbuf, |
385 | 391 |
386 /* Analogously, for end of buffer/string. */ | 392 /* Analogously, for end of buffer/string. */ |
387 endbuf, | 393 endbuf, |
388 | 394 |
389 /* Followed by two byte relative address to which to jump. */ | 395 /* Followed by two byte relative address to which to jump. */ |
390 jump, | 396 jump, |
391 | 397 |
392 /* Same as jump, but marks the end of an alternative. */ | 398 /* Same as jump, but marks the end of an alternative. */ |
393 jump_past_alt, | 399 jump_past_alt, |
394 | 400 |
395 /* Followed by two-byte relative address of place to resume at | 401 /* Followed by two-byte relative address of place to resume at |
396 in case of failure. */ | 402 in case of failure. */ |
397 on_failure_jump, | 403 on_failure_jump, |
398 | 404 |
399 /* Like on_failure_jump, but pushes a placeholder instead of the | 405 /* Like on_failure_jump, but pushes a placeholder instead of the |
400 current string position when executed. */ | 406 current string position when executed. */ |
401 on_failure_keep_string_jump, | 407 on_failure_keep_string_jump, |
402 | 408 |
403 /* Throw away latest failure point and then jump to following | 409 /* Throw away latest failure point and then jump to following |
404 two-byte relative address. */ | 410 two-byte relative address. */ |
405 pop_failure_jump, | 411 pop_failure_jump, |
406 | 412 |
407 /* Change to pop_failure_jump if know won't have to backtrack to | 413 /* Change to pop_failure_jump if know won't have to backtrack to |
493 static void | 499 static void |
494 extract_number (dest, source) | 500 extract_number (dest, source) |
495 int *dest; | 501 int *dest; |
496 unsigned char *source; | 502 unsigned char *source; |
497 { | 503 { |
498 int temp = SIGN_EXTEND_CHAR (*(source + 1)); | 504 int temp = SIGN_EXTEND_CHAR (*(source + 1)); |
499 *dest = *source & 0377; | 505 *dest = *source & 0377; |
500 *dest += temp << 8; | 506 *dest += temp << 8; |
501 } | 507 } |
502 | 508 |
503 #ifndef EXTRACT_MACROS /* To debug the macros. */ | 509 #ifndef EXTRACT_MACROS /* To debug the macros. */ |
519 #ifdef DEBUG | 525 #ifdef DEBUG |
520 static void | 526 static void |
521 extract_number_and_incr (destination, source) | 527 extract_number_and_incr (destination, source) |
522 int *destination; | 528 int *destination; |
523 unsigned char **source; | 529 unsigned char **source; |
524 { | 530 { |
525 extract_number (destination, *source); | 531 extract_number (destination, *source); |
526 *source += 2; | 532 *source += 2; |
527 } | 533 } |
528 | 534 |
529 #ifndef EXTRACT_MACROS | 535 #ifndef EXTRACT_MACROS |
566 void | 572 void |
567 print_fastmap (fastmap) | 573 print_fastmap (fastmap) |
568 char *fastmap; | 574 char *fastmap; |
569 { | 575 { |
570 unsigned was_a_range = 0; | 576 unsigned was_a_range = 0; |
571 unsigned i = 0; | 577 unsigned i = 0; |
572 | 578 |
573 while (i < (1 << BYTEWIDTH)) | 579 while (i < (1 << BYTEWIDTH)) |
574 { | 580 { |
575 if (fastmap[i++]) | 581 if (fastmap[i++]) |
576 { | 582 { |
577 was_a_range = 0; | 583 was_a_range = 0; |
586 printf ("-"); | 592 printf ("-"); |
587 putchar (i - 1); | 593 putchar (i - 1); |
588 } | 594 } |
589 } | 595 } |
590 } | 596 } |
591 putchar ('\n'); | 597 putchar ('\n'); |
592 } | 598 } |
593 | 599 |
594 | 600 |
595 /* Print a compiled pattern string in human-readable form, starting at | 601 /* Print a compiled pattern string in human-readable form, starting at |
596 the START pointer into it and ending just before the pointer END. */ | 602 the START pointer into it and ending just before the pointer END. */ |
607 if (start == NULL) | 613 if (start == NULL) |
608 { | 614 { |
609 printf ("(null)\n"); | 615 printf ("(null)\n"); |
610 return; | 616 return; |
611 } | 617 } |
612 | 618 |
613 /* Loop over pattern commands. */ | 619 /* Loop over pattern commands. */ |
614 while (p < pend) | 620 while (p < pend) |
615 { | 621 { |
616 printf ("%d:\t", p - start); | 622 printf ("%d:\t", p - start); |
617 | 623 |
656 register int c, last = -100; | 662 register int c, last = -100; |
657 register int in_range = 0; | 663 register int in_range = 0; |
658 | 664 |
659 printf ("/charset [%s", | 665 printf ("/charset [%s", |
660 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); | 666 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); |
661 | 667 |
662 assert (p + *p < pend); | 668 assert (p + *p < pend); |
663 | 669 |
664 for (c = 0; c < 256; c++) | 670 for (c = 0; c < 256; c++) |
665 if (c / 8 < *p | 671 if (c / 8 < *p |
666 && (p[1 + (c/8)] & (1 << (c % 8)))) | 672 && (p[1 + (c/8)] & (1 << (c % 8)))) |
675 else if (last + 1 != c && in_range) | 681 else if (last + 1 != c && in_range) |
676 { | 682 { |
677 putchar (last); | 683 putchar (last); |
678 in_range = 0; | 684 in_range = 0; |
679 } | 685 } |
680 | 686 |
681 if (! in_range) | 687 if (! in_range) |
682 putchar (c); | 688 putchar (c); |
683 | 689 |
684 last = c; | 690 last = c; |
685 } | 691 } |
717 break; | 723 break; |
718 | 724 |
719 case push_dummy_failure: | 725 case push_dummy_failure: |
720 printf ("/push_dummy_failure"); | 726 printf ("/push_dummy_failure"); |
721 break; | 727 break; |
722 | 728 |
723 case maybe_pop_jump: | 729 case maybe_pop_jump: |
724 extract_number_and_incr (&mcnt, &p); | 730 extract_number_and_incr (&mcnt, &p); |
725 printf ("/maybe_pop_jump to %d", p + mcnt - start); | 731 printf ("/maybe_pop_jump to %d", p + mcnt - start); |
726 break; | 732 break; |
727 | 733 |
728 case pop_failure_jump: | 734 case pop_failure_jump: |
729 extract_number_and_incr (&mcnt, &p); | 735 extract_number_and_incr (&mcnt, &p); |
730 printf ("/pop_failure_jump to %d", p + mcnt - start); | 736 printf ("/pop_failure_jump to %d", p + mcnt - start); |
731 break; | 737 break; |
732 | 738 |
733 case jump_past_alt: | 739 case jump_past_alt: |
734 extract_number_and_incr (&mcnt, &p); | 740 extract_number_and_incr (&mcnt, &p); |
735 printf ("/jump_past_alt to %d", p + mcnt - start); | 741 printf ("/jump_past_alt to %d", p + mcnt - start); |
736 break; | 742 break; |
737 | 743 |
738 case jump: | 744 case jump: |
739 extract_number_and_incr (&mcnt, &p); | 745 extract_number_and_incr (&mcnt, &p); |
740 printf ("/jump to %d", p + mcnt - start); | 746 printf ("/jump to %d", p + mcnt - start); |
741 break; | 747 break; |
742 | 748 |
743 case succeed_n: | 749 case succeed_n: |
744 extract_number_and_incr (&mcnt, &p); | 750 extract_number_and_incr (&mcnt, &p); |
745 extract_number_and_incr (&mcnt2, &p); | 751 extract_number_and_incr (&mcnt2, &p); |
746 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); | 752 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); |
747 break; | 753 break; |
748 | 754 |
749 case jump_n: | 755 case jump_n: |
750 extract_number_and_incr (&mcnt, &p); | 756 extract_number_and_incr (&mcnt, &p); |
751 extract_number_and_incr (&mcnt2, &p); | 757 extract_number_and_incr (&mcnt2, &p); |
752 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); | 758 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); |
753 break; | 759 break; |
754 | 760 |
755 case set_number_at: | 761 case set_number_at: |
756 extract_number_and_incr (&mcnt, &p); | 762 extract_number_and_incr (&mcnt, &p); |
757 extract_number_and_incr (&mcnt2, &p); | 763 extract_number_and_incr (&mcnt2, &p); |
758 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); | 764 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); |
759 break; | 765 break; |
760 | 766 |
761 case wordbound: | 767 case wordbound: |
762 printf ("/wordbound"); | 768 printf ("/wordbound"); |
763 break; | 769 break; |
764 | 770 |
765 case notwordbound: | 771 case notwordbound: |
767 break; | 773 break; |
768 | 774 |
769 case wordbeg: | 775 case wordbeg: |
770 printf ("/wordbeg"); | 776 printf ("/wordbeg"); |
771 break; | 777 break; |
772 | 778 |
773 case wordend: | 779 case wordend: |
774 printf ("/wordend"); | 780 printf ("/wordend"); |
775 | 781 |
776 #ifdef emacs | 782 #ifdef emacs |
777 case before_dot: | 783 case before_dot: |
778 printf ("/before_dot"); | 784 printf ("/before_dot"); |
779 break; | 785 break; |
780 | 786 |
789 case syntaxspec: | 795 case syntaxspec: |
790 printf ("/syntaxspec"); | 796 printf ("/syntaxspec"); |
791 mcnt = *p++; | 797 mcnt = *p++; |
792 printf ("/%d", mcnt); | 798 printf ("/%d", mcnt); |
793 break; | 799 break; |
794 | 800 |
795 case notsyntaxspec: | 801 case notsyntaxspec: |
796 printf ("/notsyntaxspec"); | 802 printf ("/notsyntaxspec"); |
797 mcnt = *p++; | 803 mcnt = *p++; |
798 printf ("/%d", mcnt); | 804 printf ("/%d", mcnt); |
799 break; | 805 break; |
800 #endif /* emacs */ | 806 #endif /* emacs */ |
801 | 807 |
802 case wordchar: | 808 case wordchar: |
803 printf ("/wordchar"); | 809 printf ("/wordchar"); |
804 break; | 810 break; |
805 | 811 |
806 case notwordchar: | 812 case notwordchar: |
807 printf ("/notwordchar"); | 813 printf ("/notwordchar"); |
808 break; | 814 break; |
809 | 815 |
810 case begbuf: | 816 case begbuf: |
860 const char *string2; | 866 const char *string2; |
861 int size1; | 867 int size1; |
862 int size2; | 868 int size2; |
863 { | 869 { |
864 unsigned this_char; | 870 unsigned this_char; |
865 | 871 |
866 if (where == NULL) | 872 if (where == NULL) |
867 printf ("(null)"); | 873 printf ("(null)"); |
868 else | 874 else |
869 { | 875 { |
870 if (FIRST_STRING_P (where)) | 876 if (FIRST_STRING_P (where)) |
871 { | 877 { |
872 for (this_char = where - string1; this_char < size1; this_char++) | 878 for (this_char = where - string1; this_char < size1; this_char++) |
873 putchar (string1[this_char]); | 879 putchar (string1[this_char]); |
874 | 880 |
875 where = string2; | 881 where = string2; |
876 } | 882 } |
877 | 883 |
878 for (this_char = where - string2; this_char < size2; this_char++) | 884 for (this_char = where - string2; this_char < size2; this_char++) |
879 putchar (string2[this_char]); | 885 putchar (string2[this_char]); |
880 } | 886 } |
913 reg_syntax_t | 919 reg_syntax_t |
914 re_set_syntax (syntax) | 920 re_set_syntax (syntax) |
915 reg_syntax_t syntax; | 921 reg_syntax_t syntax; |
916 { | 922 { |
917 reg_syntax_t ret = re_syntax_options; | 923 reg_syntax_t ret = re_syntax_options; |
918 | 924 |
919 re_syntax_options = syntax; | 925 re_syntax_options = syntax; |
920 return ret; | 926 return ret; |
921 } | 927 } |
922 | 928 |
923 /* This table gives an error message for each of the error codes listed | 929 /* This table gives an error message for each of the error codes listed |
924 in regex.h. Obviously the order here has to be same as there. | 930 in regex.h. Obviously the order here has to be same as there. |
925 POSIX doesn't require that we do anything for REG_NOERROR, | 931 POSIX doesn't require that we do anything for REG_NOERROR, |
926 but why not be nice? */ | 932 but why not be nice? */ |
927 | 933 |
928 static const char *re_error_msgid[] = | 934 static const char *re_error_msgid[] = |
929 { "Success", /* REG_NOERROR */ | 935 { |
930 "No match", /* REG_NOMATCH */ | 936 gettext_noop ("Success"), /* REG_NOERROR */ |
931 "Invalid regular expression", /* REG_BADPAT */ | 937 gettext_noop ("No match"), /* REG_NOMATCH */ |
932 "Invalid collation character", /* REG_ECOLLATE */ | 938 gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ |
933 "Invalid character class name", /* REG_ECTYPE */ | 939 gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ |
934 "Trailing backslash", /* REG_EESCAPE */ | 940 gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ |
935 "Invalid back reference", /* REG_ESUBREG */ | 941 gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ |
936 "Unmatched [ or [^", /* REG_EBRACK */ | 942 gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ |
937 "Unmatched ( or \\(", /* REG_EPAREN */ | 943 gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ |
938 "Unmatched \\{", /* REG_EBRACE */ | 944 gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ |
939 "Invalid content of \\{\\}", /* REG_BADBR */ | 945 gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ |
940 "Invalid range end", /* REG_ERANGE */ | 946 gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ |
941 "Memory exhausted", /* REG_ESPACE */ | 947 gettext_noop ("Invalid range end"), /* REG_ERANGE */ |
942 "Invalid preceding regular expression", /* REG_BADRPT */ | 948 gettext_noop ("Memory exhausted"), /* REG_ESPACE */ |
943 "Premature end of regular expression", /* REG_EEND */ | 949 gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ |
944 "Regular expression too big", /* REG_ESIZE */ | 950 gettext_noop ("Premature end of regular expression"), /* REG_EEND */ |
945 "Unmatched ) or \\)", /* REG_ERPAREN */ | 951 gettext_noop ("Regular expression too big"), /* REG_ESIZE */ |
952 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ | |
946 }; | 953 }; |
947 | 954 |
948 /* Avoiding alloca during matching, to placate r_alloc. */ | 955 /* Avoiding alloca during matching, to placate r_alloc. */ |
949 | 956 |
950 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the | 957 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the |
953 using the relocating allocator routines, then malloc could cause a | 960 using the relocating allocator routines, then malloc could cause a |
954 relocation, which might (if the strings being searched are in the | 961 relocation, which might (if the strings being searched are in the |
955 ralloc heap) shift the data out from underneath the regexp | 962 ralloc heap) shift the data out from underneath the regexp |
956 routines. | 963 routines. |
957 | 964 |
958 Here's another reason to avoid allocation: Emacs | 965 Here's another reason to avoid allocation: Emacs |
959 processes input from X in a signal handler; processing X input may | 966 processes input from X in a signal handler; processing X input may |
960 call malloc; if input arrives while a matching routine is calling | 967 call malloc; if input arrives while a matching routine is calling |
961 malloc, then we're scrod. But Emacs can't just block input while | 968 malloc, then we're scrod. But Emacs can't just block input while |
962 calling matching routines; then we don't notice interrupts when | 969 calling matching routines; then we don't notice interrupts when |
963 they come in. So, Emacs blocks input around all regexp calls | 970 they come in. So, Emacs blocks input around all regexp calls |
984 | 991 |
985 | 992 |
986 /* Failure stack declarations and macros; both re_compile_fastmap and | 993 /* Failure stack declarations and macros; both re_compile_fastmap and |
987 re_match_2 use a failure stack. These have to be macros because of | 994 re_match_2 use a failure stack. These have to be macros because of |
988 REGEX_ALLOCATE_STACK. */ | 995 REGEX_ALLOCATE_STACK. */ |
989 | 996 |
990 | 997 |
991 /* Number of failure points for which to initially allocate space | 998 /* Number of failure points for which to initially allocate space |
992 when matching. If this number is exceeded, we allocate more | 999 when matching. If this number is exceeded, we allocate more |
993 space, so it is not a hard limit. */ | 1000 space, so it is not a hard limit. */ |
994 #ifndef INIT_FAILURE_ALLOC | 1001 #ifndef INIT_FAILURE_ALLOC |
1053 | 1060 |
1054 | 1061 |
1055 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. | 1062 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. |
1056 | 1063 |
1057 Return 1 if succeeds, and 0 if either ran out of memory | 1064 Return 1 if succeeds, and 0 if either ran out of memory |
1058 allocating space for it or it was already too large. | 1065 allocating space for it or it was already too large. |
1059 | 1066 |
1060 REGEX_REALLOCATE_STACK requires `destination' be declared. */ | 1067 REGEX_REALLOCATE_STACK requires `destination' be declared. */ |
1061 | 1068 |
1062 #define DOUBLE_FAIL_STACK(fail_stack) \ | 1069 #define DOUBLE_FAIL_STACK(fail_stack) \ |
1063 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ | 1070 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ |
1064 ? 0 \ | 1071 ? 0 \ |
1071 ? 0 \ | 1078 ? 0 \ |
1072 : ((fail_stack).size <<= 1, \ | 1079 : ((fail_stack).size <<= 1, \ |
1073 1))) | 1080 1))) |
1074 | 1081 |
1075 | 1082 |
1076 /* Push pointer POINTER on FAIL_STACK. | 1083 /* Push pointer POINTER on FAIL_STACK. |
1077 Return 1 if was able to do so and 0 if ran out of memory allocating | 1084 Return 1 if was able to do so and 0 if ran out of memory allocating |
1078 space to do so. */ | 1085 space to do so. */ |
1079 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ | 1086 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ |
1080 ((FAIL_STACK_FULL () \ | 1087 ((FAIL_STACK_FULL () \ |
1081 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ | 1088 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ |
1116 #define DEBUG_POP(item_addr) | 1123 #define DEBUG_POP(item_addr) |
1117 #endif | 1124 #endif |
1118 | 1125 |
1119 | 1126 |
1120 /* Push the information about the state we will need | 1127 /* Push the information about the state we will need |
1121 if we ever fail back to it. | 1128 if we ever fail back to it. |
1122 | 1129 |
1123 Requires variables fail_stack, regstart, regend, reg_info, and | 1130 Requires variables fail_stack, regstart, regend, reg_info, and |
1124 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be | 1131 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be |
1125 declared. | 1132 declared. |
1126 | 1133 |
1127 Does `return FAILURE_CODE' if runs out of memory. */ | 1134 Does `return FAILURE_CODE' if runs out of memory. */ |
1128 | 1135 |
1129 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ | 1136 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ |
1130 do { \ | 1137 do { \ |
1131 char *destination; \ | 1138 char *destination; \ |
1232 STR -- the saved data position. | 1239 STR -- the saved data position. |
1233 PAT -- the saved pattern position. | 1240 PAT -- the saved pattern position. |
1234 LOW_REG, HIGH_REG -- the highest and lowest active registers. | 1241 LOW_REG, HIGH_REG -- the highest and lowest active registers. |
1235 REGSTART, REGEND -- arrays of string positions. | 1242 REGSTART, REGEND -- arrays of string positions. |
1236 REG_INFO -- array of information about each subexpression. | 1243 REG_INFO -- array of information about each subexpression. |
1237 | 1244 |
1238 Also assumes the variables `fail_stack' and (if debugging), `bufp', | 1245 Also assumes the variables `fail_stack' and (if debugging), `bufp', |
1239 `pend', `string1', `size1', `string2', and `size2'. */ | 1246 `pend', `string1', `size1', `string2', and `size2'. */ |
1240 | 1247 |
1241 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ | 1248 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ |
1242 { \ | 1249 { \ |
1311 | 1318 |
1312 /* Structure for per-register (a.k.a. per-group) information. | 1319 /* Structure for per-register (a.k.a. per-group) information. |
1313 Other register information, such as the | 1320 Other register information, such as the |
1314 starting and ending positions (which are addresses), and the list of | 1321 starting and ending positions (which are addresses), and the list of |
1315 inner groups (which is a bits list) are maintained in separate | 1322 inner groups (which is a bits list) are maintained in separate |
1316 variables. | 1323 variables. |
1317 | 1324 |
1318 We are making a (strictly speaking) nonportable assumption here: that | 1325 We are making a (strictly speaking) nonportable assumption here: that |
1319 the compiler will pack our bit fields into something that fits into | 1326 the compiler will pack our bit fields into something that fits into |
1320 the type of `word', i.e., is something that fits into one item on the | 1327 the type of `word', i.e., is something that fits into one item on the |
1321 failure stack. */ | 1328 failure stack. */ |
1322 | 1329 |
1372 static void insert_op1 (), insert_op2 (); | 1379 static void insert_op1 (), insert_op2 (); |
1373 static boolean at_begline_loc_p (), at_endline_loc_p (); | 1380 static boolean at_begline_loc_p (), at_endline_loc_p (); |
1374 static boolean group_in_compile_stack (); | 1381 static boolean group_in_compile_stack (); |
1375 static reg_errcode_t compile_range (); | 1382 static reg_errcode_t compile_range (); |
1376 | 1383 |
1377 /* Fetch the next character in the uncompiled pattern---translating it | 1384 /* Fetch the next character in the uncompiled pattern---translating it |
1378 if necessary. Also cast from a signed character in the constant | 1385 if necessary. Also cast from a signed character in the constant |
1379 string passed to us by the user to an unsigned char that we can use | 1386 string passed to us by the user to an unsigned char that we can use |
1380 as an array index (in, e.g., `translate'). */ | 1387 as an array index (in, e.g., `translate'). */ |
1381 #ifndef PATFETCH | 1388 #ifndef PATFETCH |
1382 #define PATFETCH(c) \ | 1389 #define PATFETCH(c) \ |
1517 typedef struct | 1524 typedef struct |
1518 { | 1525 { |
1519 pattern_offset_t begalt_offset; | 1526 pattern_offset_t begalt_offset; |
1520 pattern_offset_t fixup_alt_jump; | 1527 pattern_offset_t fixup_alt_jump; |
1521 pattern_offset_t inner_group_offset; | 1528 pattern_offset_t inner_group_offset; |
1522 pattern_offset_t laststart_offset; | 1529 pattern_offset_t laststart_offset; |
1523 regnum_t regnum; | 1530 regnum_t regnum; |
1524 } compile_stack_elt_t; | 1531 } compile_stack_elt_t; |
1525 | 1532 |
1526 | 1533 |
1527 typedef struct | 1534 typedef struct |
1560 if (p == pend) \ | 1567 if (p == pend) \ |
1561 break; \ | 1568 break; \ |
1562 PATFETCH (c); \ | 1569 PATFETCH (c); \ |
1563 } \ | 1570 } \ |
1564 } \ | 1571 } \ |
1565 } | 1572 } |
1566 | 1573 |
1567 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ | 1574 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ |
1568 | 1575 |
1569 #define IS_CHAR_CLASS(string) \ | 1576 #define IS_CHAR_CLASS(string) \ |
1570 (STREQ (string, "alpha") || STREQ (string, "upper") \ | 1577 (STREQ (string, "alpha") || STREQ (string, "upper") \ |
1591 static int regs_allocated_size; | 1598 static int regs_allocated_size; |
1592 | 1599 |
1593 static const char ** regstart, ** regend; | 1600 static const char ** regstart, ** regend; |
1594 static const char ** old_regstart, ** old_regend; | 1601 static const char ** old_regstart, ** old_regend; |
1595 static const char **best_regstart, **best_regend; | 1602 static const char **best_regstart, **best_regend; |
1596 static register_info_type *reg_info; | 1603 static register_info_type *reg_info; |
1597 static const char **reg_dummy; | 1604 static const char **reg_dummy; |
1598 static register_info_type *reg_info_dummy; | 1605 static register_info_type *reg_info_dummy; |
1599 | 1606 |
1600 /* Make the register vectors big enough for NUM_REGS registers, | 1607 /* Make the register vectors big enough for NUM_REGS registers, |
1601 but don't make them smaller. */ | 1608 but don't make them smaller. */ |
1634 `syntax' is set to SYNTAX; | 1641 `syntax' is set to SYNTAX; |
1635 `used' is set to the length of the compiled pattern; | 1642 `used' is set to the length of the compiled pattern; |
1636 `fastmap_accurate' is zero; | 1643 `fastmap_accurate' is zero; |
1637 `re_nsub' is the number of subexpressions in PATTERN; | 1644 `re_nsub' is the number of subexpressions in PATTERN; |
1638 `not_bol' and `not_eol' are zero; | 1645 `not_bol' and `not_eol' are zero; |
1639 | 1646 |
1640 The `fastmap' and `newline_anchor' fields are neither | 1647 The `fastmap' and `newline_anchor' fields are neither |
1641 examined nor set. */ | 1648 examined nor set. */ |
1642 | 1649 |
1643 /* Return, freeing storage we allocated. */ | 1650 /* Return, freeing storage we allocated. */ |
1644 #define FREE_STACK_RETURN(value) \ | 1651 #define FREE_STACK_RETURN(value) \ |
1653 { | 1660 { |
1654 /* We fetch characters from PATTERN here. Even though PATTERN is | 1661 /* We fetch characters from PATTERN here. Even though PATTERN is |
1655 `char *' (i.e., signed), we declare these variables as unsigned, so | 1662 `char *' (i.e., signed), we declare these variables as unsigned, so |
1656 they can be reliably used as array indices. */ | 1663 they can be reliably used as array indices. */ |
1657 register unsigned char c, c1; | 1664 register unsigned char c, c1; |
1658 | 1665 |
1659 /* A random temporary spot in PATTERN. */ | 1666 /* A random temporary spot in PATTERN. */ |
1660 const char *p1; | 1667 const char *p1; |
1661 | 1668 |
1662 /* Points to the end of the buffer, where we should append. */ | 1669 /* Points to the end of the buffer, where we should append. */ |
1663 register unsigned char *b; | 1670 register unsigned char *b; |
1664 | 1671 |
1665 /* Keeps track of unclosed groups. */ | 1672 /* Keeps track of unclosed groups. */ |
1666 compile_stack_type compile_stack; | 1673 compile_stack_type compile_stack; |
1667 | 1674 |
1668 /* Points to the current (ending) position in the pattern. */ | 1675 /* Points to the current (ending) position in the pattern. */ |
1669 const char *p = pattern; | 1676 const char *p = pattern; |
1670 const char *pend = pattern + size; | 1677 const char *pend = pattern + size; |
1671 | 1678 |
1672 /* How to translate the characters in the pattern. */ | 1679 /* How to translate the characters in the pattern. */ |
1673 RE_TRANSLATE_TYPE translate = bufp->translate; | 1680 RE_TRANSLATE_TYPE translate = bufp->translate; |
1674 | 1681 |
1675 /* Address of the count-byte of the most recently inserted `exactn' | 1682 /* Address of the count-byte of the most recently inserted `exactn' |
1676 command. This makes it possible to tell if a new exact-match | 1683 command. This makes it possible to tell if a new exact-match |
1687 unsigned char *begalt; | 1694 unsigned char *begalt; |
1688 | 1695 |
1689 /* Place in the uncompiled pattern (i.e., the {) to | 1696 /* Place in the uncompiled pattern (i.e., the {) to |
1690 which to go back if the interval is invalid. */ | 1697 which to go back if the interval is invalid. */ |
1691 const char *beg_interval; | 1698 const char *beg_interval; |
1692 | 1699 |
1693 /* Address of the place where a forward jump should go to the end of | 1700 /* Address of the place where a forward jump should go to the end of |
1694 the containing expression. Each alternative of an `or' -- except the | 1701 the containing expression. Each alternative of an `or' -- except the |
1695 last -- ends with a forward jump of this sort. */ | 1702 last -- ends with a forward jump of this sort. */ |
1696 unsigned char *fixup_alt_jump = 0; | 1703 unsigned char *fixup_alt_jump = 0; |
1697 | 1704 |
1703 #ifdef DEBUG | 1710 #ifdef DEBUG |
1704 DEBUG_PRINT1 ("\nCompiling pattern: "); | 1711 DEBUG_PRINT1 ("\nCompiling pattern: "); |
1705 if (debug) | 1712 if (debug) |
1706 { | 1713 { |
1707 unsigned debug_count; | 1714 unsigned debug_count; |
1708 | 1715 |
1709 for (debug_count = 0; debug_count < size; debug_count++) | 1716 for (debug_count = 0; debug_count < size; debug_count++) |
1710 putchar (pattern[debug_count]); | 1717 putchar (pattern[debug_count]); |
1711 putchar ('\n'); | 1718 putchar ('\n'); |
1712 } | 1719 } |
1713 #endif /* DEBUG */ | 1720 #endif /* DEBUG */ |
1727 | 1734 |
1728 /* Set `used' to zero, so that if we return an error, the pattern | 1735 /* Set `used' to zero, so that if we return an error, the pattern |
1729 printer (for debugging) will think there's no pattern. We reset it | 1736 printer (for debugging) will think there's no pattern. We reset it |
1730 at the end. */ | 1737 at the end. */ |
1731 bufp->used = 0; | 1738 bufp->used = 0; |
1732 | 1739 |
1733 /* Always count groups, whether or not bufp->no_sub is set. */ | 1740 /* Always count groups, whether or not bufp->no_sub is set. */ |
1734 bufp->re_nsub = 0; | 1741 bufp->re_nsub = 0; |
1735 | 1742 |
1736 #if !defined (emacs) && !defined (SYNTAX_TABLE) | 1743 #if !defined (emacs) && !defined (SYNTAX_TABLE) |
1737 /* Initialize the syntax table. */ | 1744 /* Initialize the syntax table. */ |
1738 init_syntax_once (); | 1745 init_syntax_once (); |
1739 #endif | 1746 #endif |
1780 | 1787 |
1781 | 1788 |
1782 case '$': | 1789 case '$': |
1783 { | 1790 { |
1784 if ( /* If at end of pattern, it's an operator. */ | 1791 if ( /* If at end of pattern, it's an operator. */ |
1785 p == pend | 1792 p == pend |
1786 /* If context independent, it's an operator. */ | 1793 /* If context independent, it's an operator. */ |
1787 || syntax & RE_CONTEXT_INDEP_ANCHORS | 1794 || syntax & RE_CONTEXT_INDEP_ANCHORS |
1788 /* Otherwise, depends on what's next. */ | 1795 /* Otherwise, depends on what's next. */ |
1789 || at_endline_loc_p (p, pend, syntax)) | 1796 || at_endline_loc_p (p, pend, syntax)) |
1790 BUF_PUSH (endline); | 1797 BUF_PUSH (endline); |
1811 } | 1818 } |
1812 | 1819 |
1813 { | 1820 { |
1814 /* Are we optimizing this jump? */ | 1821 /* Are we optimizing this jump? */ |
1815 boolean keep_string_p = false; | 1822 boolean keep_string_p = false; |
1816 | 1823 |
1817 /* 1 means zero (many) matches is allowed. */ | 1824 /* 1 means zero (many) matches is allowed. */ |
1818 char zero_times_ok = 0, many_times_ok = 0; | 1825 char zero_times_ok = 0, many_times_ok = 0; |
1819 | 1826 |
1820 /* If there is a sequence of repetition chars, collapse it | 1827 /* If there is a sequence of repetition chars, collapse it |
1821 down to just one (the right one). We can't combine | 1828 down to just one (the right one). We can't combine |
1859 /* If we get here, we found another repeat character. */ | 1866 /* If we get here, we found another repeat character. */ |
1860 } | 1867 } |
1861 | 1868 |
1862 /* Star, etc. applied to an empty pattern is equivalent | 1869 /* Star, etc. applied to an empty pattern is equivalent |
1863 to an empty pattern. */ | 1870 to an empty pattern. */ |
1864 if (!laststart) | 1871 if (!laststart) |
1865 break; | 1872 break; |
1866 | 1873 |
1867 /* Now we know whether or not zero matches is allowed | 1874 /* Now we know whether or not zero matches is allowed |
1868 and also whether or not two or more matches is allowed. */ | 1875 and also whether or not two or more matches is allowed. */ |
1869 if (many_times_ok) | 1876 if (many_times_ok) |
1870 { /* More than one repetition is allowed, so put in at the | 1877 { /* More than one repetition is allowed, so put in at the |
1871 end a backward relative jump from `b' to before the next | 1878 end a backward relative jump from `b' to before the next |
1872 jump we're going to put in below (which jumps from | 1879 jump we're going to put in below (which jumps from |
1873 laststart to after this jump). | 1880 laststart to after this jump). |
1874 | 1881 |
1875 But if we are at the `*' in the exact sequence `.*\n', | 1882 But if we are at the `*' in the exact sequence `.*\n', |
1876 insert an unconditional jump backwards to the ., | 1883 insert an unconditional jump backwards to the ., |
1877 instead of the beginning of the loop. This way we only | 1884 instead of the beginning of the loop. This way we only |
1878 push a failure point once, instead of every time | 1885 push a failure point once, instead of every time |
1945 | 1952 |
1946 laststart = b; | 1953 laststart = b; |
1947 | 1954 |
1948 /* We test `*p == '^' twice, instead of using an if | 1955 /* We test `*p == '^' twice, instead of using an if |
1949 statement, so we only need one BUF_PUSH. */ | 1956 statement, so we only need one BUF_PUSH. */ |
1950 BUF_PUSH (*p == '^' ? charset_not : charset); | 1957 BUF_PUSH (*p == '^' ? charset_not : charset); |
1951 if (*p == '^') | 1958 if (*p == '^') |
1952 p++; | 1959 p++; |
1953 | 1960 |
1954 /* Remember the first position in the bracket expression. */ | 1961 /* Remember the first position in the bracket expression. */ |
1955 p1 = p; | 1962 p1 = p; |
1995 | 2002 |
1996 /* Look ahead to see if it's a range when the last thing | 2003 /* Look ahead to see if it's a range when the last thing |
1997 was a character: if this is a hyphen not at the | 2004 was a character: if this is a hyphen not at the |
1998 beginning or the end of a list, then it's the range | 2005 beginning or the end of a list, then it's the range |
1999 operator. */ | 2006 operator. */ |
2000 if (c == '-' | 2007 if (c == '-' |
2001 && !(p - 2 >= pattern && p[-2] == '[') | 2008 && !(p - 2 >= pattern && p[-2] == '[') |
2002 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') | 2009 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') |
2003 && *p != ']') | 2010 && *p != ']') |
2004 { | 2011 { |
2005 reg_errcode_t ret | 2012 reg_errcode_t ret |
2006 = compile_range (&p, pend, translate, syntax, b); | 2013 = compile_range (&p, pend, translate, syntax, b); |
2011 { /* This handles ranges made up of characters only. */ | 2018 { /* This handles ranges made up of characters only. */ |
2012 reg_errcode_t ret; | 2019 reg_errcode_t ret; |
2013 | 2020 |
2014 /* Move past the `-'. */ | 2021 /* Move past the `-'. */ |
2015 PATFETCH (c1); | 2022 PATFETCH (c1); |
2016 | 2023 |
2017 ret = compile_range (&p, pend, translate, syntax, b); | 2024 ret = compile_range (&p, pend, translate, syntax, b); |
2018 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); | 2025 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); |
2019 } | 2026 } |
2020 | 2027 |
2021 /* See if we're at the beginning of a possible character | 2028 /* See if we're at the beginning of a possible character |
2040 str[c1++] = c; | 2047 str[c1++] = c; |
2041 } | 2048 } |
2042 str[c1] = '\0'; | 2049 str[c1] = '\0'; |
2043 | 2050 |
2044 /* If isn't a word bracketed by `[:' and:`]': | 2051 /* If isn't a word bracketed by `[:' and:`]': |
2045 undo the ending character, the letters, and leave | 2052 undo the ending character, the letters, and leave |
2046 the leading `:' and `[' (but set bits for them). */ | 2053 the leading `:' and `[' (but set bits for them). */ |
2047 if (c == ':' && *p == ']') | 2054 if (c == ':' && *p == ']') |
2048 { | 2055 { |
2049 int ch; | 2056 int ch; |
2050 boolean is_alnum = STREQ (str, "alnum"); | 2057 boolean is_alnum = STREQ (str, "alnum"); |
2057 boolean is_print = STREQ (str, "print"); | 2064 boolean is_print = STREQ (str, "print"); |
2058 boolean is_punct = STREQ (str, "punct"); | 2065 boolean is_punct = STREQ (str, "punct"); |
2059 boolean is_space = STREQ (str, "space"); | 2066 boolean is_space = STREQ (str, "space"); |
2060 boolean is_upper = STREQ (str, "upper"); | 2067 boolean is_upper = STREQ (str, "upper"); |
2061 boolean is_xdigit = STREQ (str, "xdigit"); | 2068 boolean is_xdigit = STREQ (str, "xdigit"); |
2062 | 2069 |
2063 if (!IS_CHAR_CLASS (str)) | 2070 if (!IS_CHAR_CLASS (str)) |
2064 FREE_STACK_RETURN (REG_ECTYPE); | 2071 FREE_STACK_RETURN (REG_ECTYPE); |
2065 | 2072 |
2066 /* Throw away the ] at the end of the character | 2073 /* Throw away the ] at the end of the character |
2067 class. */ | 2074 class. */ |
2068 PATFETCH (c); | 2075 PATFETCH (c); |
2069 | 2076 |
2070 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2077 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
2071 | 2078 |
2072 for (ch = 0; ch < 1 << BYTEWIDTH; ch++) | 2079 for (ch = 0; ch < 1 << BYTEWIDTH; ch++) |
2073 { | 2080 { |
2092 had_char_class = true; | 2099 had_char_class = true; |
2093 } | 2100 } |
2094 else | 2101 else |
2095 { | 2102 { |
2096 c1++; | 2103 c1++; |
2097 while (c1--) | 2104 while (c1--) |
2098 PATUNFETCH; | 2105 PATUNFETCH; |
2099 SET_LIST_BIT ('['); | 2106 SET_LIST_BIT ('['); |
2100 SET_LIST_BIT (':'); | 2107 SET_LIST_BIT (':'); |
2101 had_char_class = false; | 2108 had_char_class = false; |
2102 } | 2109 } |
2108 } | 2115 } |
2109 } | 2116 } |
2110 | 2117 |
2111 /* Discard any (non)matching list bytes that are all 0 at the | 2118 /* Discard any (non)matching list bytes that are all 0 at the |
2112 end of the map. Decrease the map-length byte too. */ | 2119 end of the map. Decrease the map-length byte too. */ |
2113 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) | 2120 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) |
2114 b[-1]--; | 2121 b[-1]--; |
2115 b += b[-1]; | 2122 b += b[-1]; |
2116 } | 2123 } |
2117 break; | 2124 break; |
2118 | 2125 |
2119 | 2126 |
2169 handle_open: | 2176 handle_open: |
2170 bufp->re_nsub++; | 2177 bufp->re_nsub++; |
2171 regnum++; | 2178 regnum++; |
2172 | 2179 |
2173 if (COMPILE_STACK_FULL) | 2180 if (COMPILE_STACK_FULL) |
2174 { | 2181 { |
2175 RETALLOC (compile_stack.stack, compile_stack.size << 1, | 2182 RETALLOC (compile_stack.stack, compile_stack.size << 1, |
2176 compile_stack_elt_t); | 2183 compile_stack_elt_t); |
2177 if (compile_stack.stack == NULL) return REG_ESPACE; | 2184 if (compile_stack.stack == NULL) return REG_ESPACE; |
2178 | 2185 |
2179 compile_stack.size <<= 1; | 2186 compile_stack.size <<= 1; |
2182 /* These are the values to restore when we hit end of this | 2189 /* These are the values to restore when we hit end of this |
2183 group. They are all relative offsets, so that if the | 2190 group. They are all relative offsets, so that if the |
2184 whole pattern moves because of realloc, they will still | 2191 whole pattern moves because of realloc, they will still |
2185 be valid. */ | 2192 be valid. */ |
2186 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; | 2193 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; |
2187 COMPILE_STACK_TOP.fixup_alt_jump | 2194 COMPILE_STACK_TOP.fixup_alt_jump |
2188 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; | 2195 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; |
2189 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; | 2196 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; |
2190 COMPILE_STACK_TOP.regnum = regnum; | 2197 COMPILE_STACK_TOP.regnum = regnum; |
2191 | 2198 |
2192 /* We will eventually replace the 0 with the number of | 2199 /* We will eventually replace the 0 with the number of |
2196 if (regnum <= MAX_REGNUM) | 2203 if (regnum <= MAX_REGNUM) |
2197 { | 2204 { |
2198 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; | 2205 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; |
2199 BUF_PUSH_3 (start_memory, regnum, 0); | 2206 BUF_PUSH_3 (start_memory, regnum, 0); |
2200 } | 2207 } |
2201 | 2208 |
2202 compile_stack.avail++; | 2209 compile_stack.avail++; |
2203 | 2210 |
2204 fixup_alt_jump = 0; | 2211 fixup_alt_jump = 0; |
2205 laststart = 0; | 2212 laststart = 0; |
2206 begalt = b; | 2213 begalt = b; |
2225 { /* Push a dummy failure point at the end of the | 2232 { /* Push a dummy failure point at the end of the |
2226 alternative for a possible future | 2233 alternative for a possible future |
2227 `pop_failure_jump' to pop. See comments at | 2234 `pop_failure_jump' to pop. See comments at |
2228 `push_dummy_failure' in `re_match_2'. */ | 2235 `push_dummy_failure' in `re_match_2'. */ |
2229 BUF_PUSH (push_dummy_failure); | 2236 BUF_PUSH (push_dummy_failure); |
2230 | 2237 |
2231 /* We allocated space for this jump when we assigned | 2238 /* We allocated space for this jump when we assigned |
2232 to `fixup_alt_jump', in the `handle_alt' case below. */ | 2239 to `fixup_alt_jump', in the `handle_alt' case below. */ |
2233 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); | 2240 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); |
2234 } | 2241 } |
2235 | 2242 |
2247 /* We don't just want to restore into `regnum', because | 2254 /* We don't just want to restore into `regnum', because |
2248 later groups should continue to be numbered higher, | 2255 later groups should continue to be numbered higher, |
2249 as in `(ab)c(de)' -- the second group is #2. */ | 2256 as in `(ab)c(de)' -- the second group is #2. */ |
2250 regnum_t this_group_regnum; | 2257 regnum_t this_group_regnum; |
2251 | 2258 |
2252 compile_stack.avail--; | 2259 compile_stack.avail--; |
2253 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; | 2260 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; |
2254 fixup_alt_jump | 2261 fixup_alt_jump |
2255 = COMPILE_STACK_TOP.fixup_alt_jump | 2262 = COMPILE_STACK_TOP.fixup_alt_jump |
2256 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 | 2263 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 |
2257 : 0; | 2264 : 0; |
2258 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; | 2265 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; |
2259 this_group_regnum = COMPILE_STACK_TOP.regnum; | 2266 this_group_regnum = COMPILE_STACK_TOP.regnum; |
2260 /* If we've reached MAX_REGNUM groups, then this open | 2267 /* If we've reached MAX_REGNUM groups, then this open |
2261 won't actually generate any code, so we'll have to | 2268 won't actually generate any code, so we'll have to |
2266 groups were inside this one. */ | 2273 groups were inside this one. */ |
2267 if (this_group_regnum <= MAX_REGNUM) | 2274 if (this_group_regnum <= MAX_REGNUM) |
2268 { | 2275 { |
2269 unsigned char *inner_group_loc | 2276 unsigned char *inner_group_loc |
2270 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; | 2277 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; |
2271 | 2278 |
2272 *inner_group_loc = regnum - this_group_regnum; | 2279 *inner_group_loc = regnum - this_group_regnum; |
2273 BUF_PUSH_3 (stop_memory, this_group_regnum, | 2280 BUF_PUSH_3 (stop_memory, this_group_regnum, |
2274 regnum - this_group_regnum); | 2281 regnum - this_group_regnum); |
2275 } | 2282 } |
2276 } | 2283 } |
2295 which gets executed if it gets matched. Adjust that | 2302 which gets executed if it gets matched. Adjust that |
2296 jump so it will jump to this alternative's analogous | 2303 jump so it will jump to this alternative's analogous |
2297 jump (put in below, which in turn will jump to the next | 2304 jump (put in below, which in turn will jump to the next |
2298 (if any) alternative's such jump, etc.). The last such | 2305 (if any) alternative's such jump, etc.). The last such |
2299 jump jumps to the correct final destination. A picture: | 2306 jump jumps to the correct final destination. A picture: |
2300 _____ _____ | 2307 _____ _____ |
2301 | | | | | 2308 | | | | |
2302 | v | v | 2309 | v | v |
2303 a | b | c | 2310 a | b | c |
2304 | 2311 |
2305 If we are at `b', then fixup_alt_jump right now points to a | 2312 If we are at `b', then fixup_alt_jump right now points to a |
2306 three-byte space after `a'. We'll put in the jump, set | 2313 three-byte space after `a'. We'll put in the jump, set |
2307 fixup_alt_jump to right after `b', and leave behind three | 2314 fixup_alt_jump to right after `b', and leave behind three |
2308 bytes which we'll fill in when we get to after `c'. */ | 2315 bytes which we'll fill in when we get to after `c'. */ |
2320 laststart = 0; | 2327 laststart = 0; |
2321 begalt = b; | 2328 begalt = b; |
2322 break; | 2329 break; |
2323 | 2330 |
2324 | 2331 |
2325 case '{': | 2332 case '{': |
2326 /* If \{ is a literal. */ | 2333 /* If \{ is a literal. */ |
2327 if (!(syntax & RE_INTERVALS) | 2334 if (!(syntax & RE_INTERVALS) |
2328 /* If we're at `\{' and it's not the open-interval | 2335 /* If we're at `\{' and it's not the open-interval |
2329 operator. */ | 2336 operator. */ |
2330 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) | 2337 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) |
2331 || (p - 2 == pattern && p == pend)) | 2338 || (p - 2 == pattern && p == pend)) |
2332 goto normal_backslash; | 2339 goto normal_backslash; |
2333 | 2340 |
2362 if (lower_bound < 0 || upper_bound > RE_DUP_MAX | 2369 if (lower_bound < 0 || upper_bound > RE_DUP_MAX |
2363 || lower_bound > upper_bound) | 2370 || lower_bound > upper_bound) |
2364 { | 2371 { |
2365 if (syntax & RE_NO_BK_BRACES) | 2372 if (syntax & RE_NO_BK_BRACES) |
2366 goto unfetch_interval; | 2373 goto unfetch_interval; |
2367 else | 2374 else |
2368 FREE_STACK_RETURN (REG_BADBR); | 2375 FREE_STACK_RETURN (REG_BADBR); |
2369 } | 2376 } |
2370 | 2377 |
2371 if (!(syntax & RE_NO_BK_BRACES)) | 2378 if (!(syntax & RE_NO_BK_BRACES)) |
2372 { | 2379 { |
2373 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); | 2380 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); |
2374 | 2381 |
2375 PATFETCH (c); | 2382 PATFETCH (c); |
2376 } | 2383 } |
2377 | 2384 |
2378 if (c != '}') | 2385 if (c != '}') |
2379 { | 2386 { |
2380 if (syntax & RE_NO_BK_BRACES) | 2387 if (syntax & RE_NO_BK_BRACES) |
2381 goto unfetch_interval; | 2388 goto unfetch_interval; |
2382 else | 2389 else |
2383 FREE_STACK_RETURN (REG_BADBR); | 2390 FREE_STACK_RETURN (REG_BADBR); |
2384 } | 2391 } |
2385 | 2392 |
2386 /* We just parsed a valid interval. */ | 2393 /* We just parsed a valid interval. */ |
2387 | 2394 |
2413 succeed_n <after jump addr> <succeed_n count> | 2420 succeed_n <after jump addr> <succeed_n count> |
2414 <body of loop> | 2421 <body of loop> |
2415 jump_n <succeed_n addr> <jump count> | 2422 jump_n <succeed_n addr> <jump count> |
2416 (The upper bound and `jump_n' are omitted if | 2423 (The upper bound and `jump_n' are omitted if |
2417 `upper_bound' is 1, though.) */ | 2424 `upper_bound' is 1, though.) */ |
2418 else | 2425 else |
2419 { /* If the upper bound is > 1, we need to insert | 2426 { /* If the upper bound is > 1, we need to insert |
2420 more at the end of the loop. */ | 2427 more at the end of the loop. */ |
2421 unsigned nbytes = 10 + (upper_bound > 1) * 10; | 2428 unsigned nbytes = 10 + (upper_bound > 1) * 10; |
2422 | 2429 |
2423 GET_BUFFER_SPACE (nbytes); | 2430 GET_BUFFER_SPACE (nbytes); |
2430 INSERT_JUMP2 (succeed_n, laststart, | 2437 INSERT_JUMP2 (succeed_n, laststart, |
2431 b + 5 + (upper_bound > 1) * 5, | 2438 b + 5 + (upper_bound > 1) * 5, |
2432 lower_bound); | 2439 lower_bound); |
2433 b += 5; | 2440 b += 5; |
2434 | 2441 |
2435 /* Code to initialize the lower bound. Insert | 2442 /* Code to initialize the lower bound. Insert |
2436 before the `succeed_n'. The `5' is the last two | 2443 before the `succeed_n'. The `5' is the last two |
2437 bytes of this `set_number_at', plus 3 bytes of | 2444 bytes of this `set_number_at', plus 3 bytes of |
2438 the following `succeed_n'. */ | 2445 the following `succeed_n'. */ |
2439 insert_op2 (set_number_at, laststart, 5, lower_bound, b); | 2446 insert_op2 (set_number_at, laststart, 5, lower_bound, b); |
2440 b += 5; | 2447 b += 5; |
2441 | 2448 |
2442 if (upper_bound > 1) | 2449 if (upper_bound > 1) |
2443 { /* More than one repetition is allowed, so | 2450 { /* More than one repetition is allowed, so |
2444 append a backward jump to the `succeed_n' | 2451 append a backward jump to the `succeed_n' |
2445 that starts this interval. | 2452 that starts this interval. |
2446 | 2453 |
2447 When we've reached this during matching, | 2454 When we've reached this during matching, |
2448 we'll have matched the interval once, so | 2455 we'll have matched the interval once, so |
2449 jump back only `upper_bound - 1' times. */ | 2456 jump back only `upper_bound - 1' times. */ |
2450 STORE_JUMP2 (jump_n, b, laststart + 5, | 2457 STORE_JUMP2 (jump_n, b, laststart + 5, |
2451 upper_bound - 1); | 2458 upper_bound - 1); |
2459 for the relative address. But we are | 2466 for the relative address. But we are |
2460 inserting into the middle of the pattern -- | 2467 inserting into the middle of the pattern -- |
2461 so everything is getting moved up by 5. | 2468 so everything is getting moved up by 5. |
2462 Conclusion: (b - 2) - (laststart + 3) + 5, | 2469 Conclusion: (b - 2) - (laststart + 3) + 5, |
2463 i.e., b - laststart. | 2470 i.e., b - laststart. |
2464 | 2471 |
2465 We insert this at the beginning of the loop | 2472 We insert this at the beginning of the loop |
2466 so that if we fail during matching, we'll | 2473 so that if we fail during matching, we'll |
2467 reinitialize the bounds. */ | 2474 reinitialize the bounds. */ |
2468 insert_op2 (set_number_at, laststart, b - laststart, | 2475 insert_op2 (set_number_at, laststart, b - laststart, |
2469 upper_bound - 1, b); | 2476 upper_bound - 1, b); |
2480 assert (beg_interval); | 2487 assert (beg_interval); |
2481 p = beg_interval; | 2488 p = beg_interval; |
2482 beg_interval = NULL; | 2489 beg_interval = NULL; |
2483 | 2490 |
2484 /* normal_char and normal_backslash need `c'. */ | 2491 /* normal_char and normal_backslash need `c'. */ |
2485 PATFETCH (c); | 2492 PATFETCH (c); |
2486 | 2493 |
2487 if (!(syntax & RE_NO_BK_BRACES)) | 2494 if (!(syntax & RE_NO_BK_BRACES)) |
2488 { | 2495 { |
2489 if (p > pattern && p[-1] == '\\') | 2496 if (p > pattern && p[-1] == '\\') |
2490 goto normal_backslash; | 2497 goto normal_backslash; |
2496 operators. rms says this is ok. --karl */ | 2503 operators. rms says this is ok. --karl */ |
2497 case '=': | 2504 case '=': |
2498 BUF_PUSH (at_dot); | 2505 BUF_PUSH (at_dot); |
2499 break; | 2506 break; |
2500 | 2507 |
2501 case 's': | 2508 case 's': |
2502 laststart = b; | 2509 laststart = b; |
2503 PATFETCH (c); | 2510 PATFETCH (c); |
2504 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); | 2511 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); |
2505 break; | 2512 break; |
2506 | 2513 |
2587 | 2594 |
2588 default: | 2595 default: |
2589 /* Expects the character in `c'. */ | 2596 /* Expects the character in `c'. */ |
2590 normal_char: | 2597 normal_char: |
2591 /* If no exactn currently being built. */ | 2598 /* If no exactn currently being built. */ |
2592 if (!pending_exact | 2599 if (!pending_exact |
2593 | 2600 |
2594 /* If last exactn not at current position. */ | 2601 /* If last exactn not at current position. */ |
2595 || pending_exact + *pending_exact + 1 != b | 2602 || pending_exact + *pending_exact + 1 != b |
2596 | 2603 |
2597 /* We have only one byte following the exactn for the count. */ | 2604 /* We have only one byte following the exactn for the count. */ |
2598 || *pending_exact == (1 << BYTEWIDTH) - 1 | 2605 || *pending_exact == (1 << BYTEWIDTH) - 1 |
2599 | 2606 |
2600 /* If followed by a repetition operator. */ | 2607 /* If followed by a repetition operator. */ |
2601 || *p == '*' || *p == '^' | 2608 || *p == '*' || *p == '^' |
2606 && ((syntax & RE_NO_BK_BRACES) | 2613 && ((syntax & RE_NO_BK_BRACES) |
2607 ? *p == '{' | 2614 ? *p == '{' |
2608 : (p[0] == '\\' && p[1] == '{')))) | 2615 : (p[0] == '\\' && p[1] == '{')))) |
2609 { | 2616 { |
2610 /* Start building a new exactn. */ | 2617 /* Start building a new exactn. */ |
2611 | 2618 |
2612 laststart = b; | 2619 laststart = b; |
2613 | 2620 |
2614 BUF_PUSH_2 (exactn, 0); | 2621 BUF_PUSH_2 (exactn, 0); |
2615 pending_exact = b - 1; | 2622 pending_exact = b - 1; |
2616 } | 2623 } |
2617 | 2624 |
2618 BUF_PUSH (c); | 2625 BUF_PUSH (c); |
2619 (*pending_exact)++; | 2626 (*pending_exact)++; |
2620 break; | 2627 break; |
2621 } /* switch (c) */ | 2628 } /* switch (c) */ |
2622 } /* while p != pend */ | 2629 } /* while p != pend */ |
2623 | 2630 |
2624 | 2631 |
2625 /* Through the pattern now. */ | 2632 /* Through the pattern now. */ |
2626 | 2633 |
2627 if (fixup_alt_jump) | 2634 if (fixup_alt_jump) |
2628 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); | 2635 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); |
2629 | 2636 |
2630 if (!COMPILE_STACK_EMPTY) | 2637 if (!COMPILE_STACK_EMPTY) |
2631 FREE_STACK_RETURN (REG_EPAREN); | 2638 FREE_STACK_RETURN (REG_EPAREN); |
2632 | 2639 |
2633 /* If we don't want backtracking, force success | 2640 /* If we don't want backtracking, force success |
2634 the first time we reach the end of the compiled pattern. */ | 2641 the first time we reach the end of the compiled pattern. */ |
2635 if (syntax & RE_NO_POSIX_BACKTRACKING) | 2642 if (syntax & RE_NO_POSIX_BACKTRACKING) |
2663 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); | 2670 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); |
2664 | 2671 |
2665 #ifdef emacs | 2672 #ifdef emacs |
2666 if (! fail_stack.stack) | 2673 if (! fail_stack.stack) |
2667 fail_stack.stack | 2674 fail_stack.stack |
2668 = (fail_stack_elt_t *) xmalloc (fail_stack.size | 2675 = (fail_stack_elt_t *) xmalloc (fail_stack.size |
2669 * sizeof (fail_stack_elt_t)); | 2676 * sizeof (fail_stack_elt_t)); |
2670 else | 2677 else |
2671 fail_stack.stack | 2678 fail_stack.stack |
2672 = (fail_stack_elt_t *) xrealloc (fail_stack.stack, | 2679 = (fail_stack_elt_t *) xrealloc (fail_stack.stack, |
2673 (fail_stack.size | 2680 (fail_stack.size |
2674 * sizeof (fail_stack_elt_t))); | 2681 * sizeof (fail_stack_elt_t))); |
2675 #else /* not emacs */ | 2682 #else /* not emacs */ |
2676 if (! fail_stack.stack) | 2683 if (! fail_stack.stack) |
2677 fail_stack.stack | 2684 fail_stack.stack |
2678 = (fail_stack_elt_t *) malloc (fail_stack.size | 2685 = (fail_stack_elt_t *) malloc (fail_stack.size |
2679 * sizeof (fail_stack_elt_t)); | 2686 * sizeof (fail_stack_elt_t)); |
2680 else | 2687 else |
2681 fail_stack.stack | 2688 fail_stack.stack |
2682 = (fail_stack_elt_t *) realloc (fail_stack.stack, | 2689 = (fail_stack_elt_t *) realloc (fail_stack.stack, |
2683 (fail_stack.size | 2690 (fail_stack.size |
2727 static void | 2734 static void |
2728 insert_op1 (op, loc, arg, end) | 2735 insert_op1 (op, loc, arg, end) |
2729 re_opcode_t op; | 2736 re_opcode_t op; |
2730 unsigned char *loc; | 2737 unsigned char *loc; |
2731 int arg; | 2738 int arg; |
2732 unsigned char *end; | 2739 unsigned char *end; |
2733 { | 2740 { |
2734 register unsigned char *pfrom = end; | 2741 register unsigned char *pfrom = end; |
2735 register unsigned char *pto = end + 3; | 2742 register unsigned char *pto = end + 3; |
2736 | 2743 |
2737 while (pfrom != loc) | 2744 while (pfrom != loc) |
2738 *--pto = *--pfrom; | 2745 *--pto = *--pfrom; |
2739 | 2746 |
2740 store_op1 (op, loc, arg); | 2747 store_op1 (op, loc, arg); |
2741 } | 2748 } |
2742 | 2749 |
2743 | 2750 |
2744 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ | 2751 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ |
2746 static void | 2753 static void |
2747 insert_op2 (op, loc, arg1, arg2, end) | 2754 insert_op2 (op, loc, arg1, arg2, end) |
2748 re_opcode_t op; | 2755 re_opcode_t op; |
2749 unsigned char *loc; | 2756 unsigned char *loc; |
2750 int arg1, arg2; | 2757 int arg1, arg2; |
2751 unsigned char *end; | 2758 unsigned char *end; |
2752 { | 2759 { |
2753 register unsigned char *pfrom = end; | 2760 register unsigned char *pfrom = end; |
2754 register unsigned char *pto = end + 5; | 2761 register unsigned char *pto = end + 5; |
2755 | 2762 |
2756 while (pfrom != loc) | 2763 while (pfrom != loc) |
2757 *--pto = *--pfrom; | 2764 *--pto = *--pfrom; |
2758 | 2765 |
2759 store_op2 (op, loc, arg1, arg2); | 2766 store_op2 (op, loc, arg1, arg2); |
2760 } | 2767 } |
2761 | 2768 |
2762 | 2769 |
2763 /* P points to just after a ^ in PATTERN. Return true if that ^ comes | 2770 /* P points to just after a ^ in PATTERN. Return true if that ^ comes |
2769 const char *pattern, *p; | 2776 const char *pattern, *p; |
2770 reg_syntax_t syntax; | 2777 reg_syntax_t syntax; |
2771 { | 2778 { |
2772 const char *prev = p - 2; | 2779 const char *prev = p - 2; |
2773 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; | 2780 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; |
2774 | 2781 |
2775 return | 2782 return |
2776 /* After a subexpression? */ | 2783 /* After a subexpression? */ |
2777 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) | 2784 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) |
2778 /* After an alternative? */ | 2785 /* After an alternative? */ |
2779 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); | 2786 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); |
2789 int syntax; | 2796 int syntax; |
2790 { | 2797 { |
2791 const char *next = p; | 2798 const char *next = p; |
2792 boolean next_backslash = *next == '\\'; | 2799 boolean next_backslash = *next == '\\'; |
2793 const char *next_next = p + 1 < pend ? p + 1 : 0; | 2800 const char *next_next = p + 1 < pend ? p + 1 : 0; |
2794 | 2801 |
2795 return | 2802 return |
2796 /* Before a subexpression? */ | 2803 /* Before a subexpression? */ |
2797 (syntax & RE_NO_BK_PARENS ? *next == ')' | 2804 (syntax & RE_NO_BK_PARENS ? *next == ')' |
2798 : next_backslash && next_next && *next_next == ')') | 2805 : next_backslash && next_next && *next_next == ')') |
2799 /* Before an alternative? */ | 2806 /* Before an alternative? */ |
2800 || (syntax & RE_NO_BK_VBAR ? *next == '|' | 2807 || (syntax & RE_NO_BK_VBAR ? *next == '|' |
2801 : next_backslash && next_next && *next_next == '|'); | 2808 : next_backslash && next_next && *next_next == '|'); |
2802 } | 2809 } |
2803 | 2810 |
2804 | 2811 |
2805 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and | 2812 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and |
2806 false if it's not. */ | 2813 false if it's not. */ |
2807 | 2814 |
2808 static boolean | 2815 static boolean |
2809 group_in_compile_stack (compile_stack, regnum) | 2816 group_in_compile_stack (compile_stack, regnum) |
2810 compile_stack_type compile_stack; | 2817 compile_stack_type compile_stack; |
2811 regnum_t regnum; | 2818 regnum_t regnum; |
2812 { | 2819 { |
2813 int this_element; | 2820 int this_element; |
2814 | 2821 |
2815 for (this_element = compile_stack.avail - 1; | 2822 for (this_element = compile_stack.avail - 1; |
2816 this_element >= 0; | 2823 this_element >= 0; |
2817 this_element--) | 2824 this_element--) |
2818 if (compile_stack.stack[this_element].regnum == regnum) | 2825 if (compile_stack.stack[this_element].regnum == regnum) |
2819 return true; | 2826 return true; |
2820 | 2827 |
2821 return false; | 2828 return false; |
2825 /* Read the ending character of a range (in a bracket expression) from the | 2832 /* Read the ending character of a range (in a bracket expression) from the |
2826 uncompiled pattern *P_PTR (which ends at PEND). We assume the | 2833 uncompiled pattern *P_PTR (which ends at PEND). We assume the |
2827 starting character is in `P[-2]'. (`P[-1]' is the character `-'.) | 2834 starting character is in `P[-2]'. (`P[-1]' is the character `-'.) |
2828 Then we set the translation of all bits between the starting and | 2835 Then we set the translation of all bits between the starting and |
2829 ending characters (inclusive) in the compiled pattern B. | 2836 ending characters (inclusive) in the compiled pattern B. |
2830 | 2837 |
2831 Return an error code. | 2838 Return an error code. |
2832 | 2839 |
2833 We use these short variable names so we can use the same macros as | 2840 We use these short variable names so we can use the same macros as |
2834 `regex_compile' itself. */ | 2841 `regex_compile' itself. */ |
2835 | 2842 |
2836 static reg_errcode_t | 2843 static reg_errcode_t |
2837 compile_range (p_ptr, pend, translate, syntax, b) | 2844 compile_range (p_ptr, pend, translate, syntax, b) |
2842 { | 2849 { |
2843 unsigned this_char; | 2850 unsigned this_char; |
2844 | 2851 |
2845 const char *p = *p_ptr; | 2852 const char *p = *p_ptr; |
2846 int range_start, range_end; | 2853 int range_start, range_end; |
2847 | 2854 |
2848 if (p == pend) | 2855 if (p == pend) |
2849 return REG_ERANGE; | 2856 return REG_ERANGE; |
2850 | 2857 |
2851 /* Even though the pattern is a signed `char *', we need to fetch | 2858 /* Even though the pattern is a signed `char *', we need to fetch |
2852 with unsigned char *'s; if the high bit of the pattern character | 2859 with unsigned char *'s; if the high bit of the pattern character |
2853 is set, the range endpoints will be negative if we fetch using a | 2860 is set, the range endpoints will be negative if we fetch using a |
2854 signed char *. | 2861 signed char *. |
2855 | 2862 |
2856 We also want to fetch the endpoints without translating them; the | 2863 We also want to fetch the endpoints without translating them; the |
2857 appropriate translation is done in the bit-setting loop below. */ | 2864 appropriate translation is done in the bit-setting loop below. */ |
2858 /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ | 2865 /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ |
2859 range_start = ((const unsigned char *) p)[-2]; | 2866 range_start = ((const unsigned char *) p)[-2]; |
2860 range_end = ((const unsigned char *) p)[0]; | 2867 range_end = ((const unsigned char *) p)[0]; |
2861 | 2868 |
2873 loop, since all characters <= 0xff. */ | 2880 loop, since all characters <= 0xff. */ |
2874 for (this_char = range_start; this_char <= range_end; this_char++) | 2881 for (this_char = range_start; this_char <= range_end; this_char++) |
2875 { | 2882 { |
2876 SET_LIST_BIT (TRANSLATE (this_char)); | 2883 SET_LIST_BIT (TRANSLATE (this_char)); |
2877 } | 2884 } |
2878 | 2885 |
2879 return REG_NOERROR; | 2886 return REG_NOERROR; |
2880 } | 2887 } |
2881 | 2888 |
2882 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in | 2889 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in |
2883 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible | 2890 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible |
2884 characters can start a string that matches the pattern. This fastmap | 2891 characters can start a string that matches the pattern. This fastmap |
2885 is used by re_search to skip quickly over impossible starting points. | 2892 is used by re_search to skip quickly over impossible starting points. |
2886 | 2893 |
2887 The caller must supply the address of a (1 << BYTEWIDTH)-byte data | 2894 The caller must supply the address of a (1 << BYTEWIDTH)-byte data |
2888 area as BUFP->fastmap. | 2895 area as BUFP->fastmap. |
2889 | 2896 |
2890 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in | 2897 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in |
2891 the pattern buffer. | 2898 the pattern buffer. |
2892 | 2899 |
2893 Returns 0 if we succeed, -2 if an internal error. */ | 2900 Returns 0 if we succeed, -2 if an internal error. */ |
2894 | 2901 |
2903 #ifndef REGEX_MALLOC | 2910 #ifndef REGEX_MALLOC |
2904 char *destination; | 2911 char *destination; |
2905 #endif | 2912 #endif |
2906 /* We don't push any register information onto the failure stack. */ | 2913 /* We don't push any register information onto the failure stack. */ |
2907 unsigned num_regs = 0; | 2914 unsigned num_regs = 0; |
2908 | 2915 |
2909 register char *fastmap = bufp->fastmap; | 2916 register char *fastmap = bufp->fastmap; |
2910 unsigned char *pattern = bufp->buffer; | 2917 unsigned char *pattern = bufp->buffer; |
2911 unsigned long size = bufp->used; | 2918 unsigned long size = bufp->used; |
2912 unsigned char *p = pattern; | 2919 unsigned char *p = pattern; |
2913 register unsigned char *pend = pattern + size; | 2920 register unsigned char *pend = pattern + size; |
2924 | 2931 |
2925 /* We aren't doing a `succeed_n' to begin with. */ | 2932 /* We aren't doing a `succeed_n' to begin with. */ |
2926 boolean succeed_n_p = false; | 2933 boolean succeed_n_p = false; |
2927 | 2934 |
2928 assert (fastmap != NULL && p != NULL); | 2935 assert (fastmap != NULL && p != NULL); |
2929 | 2936 |
2930 INIT_FAIL_STACK (); | 2937 INIT_FAIL_STACK (); |
2931 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ | 2938 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ |
2932 bufp->fastmap_accurate = 1; /* It will be when we're done. */ | 2939 bufp->fastmap_accurate = 1; /* It will be when we're done. */ |
2933 bufp->can_be_null = 0; | 2940 bufp->can_be_null = 0; |
2934 | 2941 |
2935 while (1) | 2942 while (1) |
2936 { | 2943 { |
2937 if (p == pend || *p == succeed) | 2944 if (p == pend || *p == succeed) |
2938 { | 2945 { |
2939 /* We have reached the (effective) end of pattern. */ | 2946 /* We have reached the (effective) end of pattern. */ |
2952 break; | 2959 break; |
2953 } | 2960 } |
2954 | 2961 |
2955 /* We should never be about to go beyond the end of the pattern. */ | 2962 /* We should never be about to go beyond the end of the pattern. */ |
2956 assert (p < pend); | 2963 assert (p < pend); |
2957 | 2964 |
2958 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) | 2965 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) |
2959 { | 2966 { |
2960 | 2967 |
2961 /* I guess the idea here is to simply not bother with a fastmap | 2968 /* I guess the idea here is to simply not bother with a fastmap |
2962 if a backreference is used, since it's too hard to figure out | 2969 if a backreference is used, since it's too hard to figure out |
3075 case maybe_pop_jump: | 3082 case maybe_pop_jump: |
3076 case jump: | 3083 case jump: |
3077 case jump_past_alt: | 3084 case jump_past_alt: |
3078 case dummy_failure_jump: | 3085 case dummy_failure_jump: |
3079 EXTRACT_NUMBER_AND_INCR (j, p); | 3086 EXTRACT_NUMBER_AND_INCR (j, p); |
3080 p += j; | 3087 p += j; |
3081 if (j > 0) | 3088 if (j > 0) |
3082 continue; | 3089 continue; |
3083 | 3090 |
3084 /* Jump backward implies we just went through the body of a | 3091 /* Jump backward implies we just went through the body of a |
3085 loop and matched nothing. Opcode jumped to should be | 3092 loop and matched nothing. Opcode jumped to should be |
3086 `on_failure_jump' or `succeed_n'. Just treat it like an | 3093 `on_failure_jump' or `succeed_n'. Just treat it like an |
3087 ordinary jump. For a * loop, it has pushed its failure | 3094 ordinary jump. For a * loop, it has pushed its failure |
3088 point already; if so, discard that as redundant. */ | 3095 point already; if so, discard that as redundant. */ |
3090 && (re_opcode_t) *p != succeed_n) | 3097 && (re_opcode_t) *p != succeed_n) |
3091 continue; | 3098 continue; |
3092 | 3099 |
3093 p++; | 3100 p++; |
3094 EXTRACT_NUMBER_AND_INCR (j, p); | 3101 EXTRACT_NUMBER_AND_INCR (j, p); |
3095 p += j; | 3102 p += j; |
3096 | 3103 |
3097 /* If what's on the stack is where we are now, pop it. */ | 3104 /* If what's on the stack is where we are now, pop it. */ |
3098 if (!FAIL_STACK_EMPTY () | 3105 if (!FAIL_STACK_EMPTY () |
3099 && fail_stack.stack[fail_stack.avail - 1].pointer == p) | 3106 && fail_stack.stack[fail_stack.avail - 1].pointer == p) |
3100 fail_stack.avail--; | 3107 fail_stack.avail--; |
3101 | 3108 |
3102 continue; | 3109 continue; |
3103 | 3110 |
3134 continue; | 3141 continue; |
3135 | 3142 |
3136 | 3143 |
3137 case succeed_n: | 3144 case succeed_n: |
3138 /* Get to the number of times to succeed. */ | 3145 /* Get to the number of times to succeed. */ |
3139 p += 2; | 3146 p += 2; |
3140 | 3147 |
3141 /* Increment p past the n for when k != 0. */ | 3148 /* Increment p past the n for when k != 0. */ |
3142 EXTRACT_NUMBER_AND_INCR (k, p); | 3149 EXTRACT_NUMBER_AND_INCR (k, p); |
3143 if (k == 0) | 3150 if (k == 0) |
3144 { | 3151 { |
3228 struct re_pattern_buffer *bufp; | 3235 struct re_pattern_buffer *bufp; |
3229 const char *string; | 3236 const char *string; |
3230 int size, startpos, range; | 3237 int size, startpos, range; |
3231 struct re_registers *regs; | 3238 struct re_registers *regs; |
3232 { | 3239 { |
3233 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, | 3240 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, |
3234 regs, size); | 3241 regs, size); |
3235 } | 3242 } |
3236 | 3243 |
3237 | 3244 |
3238 /* Using the compiled pattern in BUFP->buffer, first tries to match the | 3245 /* Using the compiled pattern in BUFP->buffer, first tries to match the |
3239 virtual concatenation of STRING1 and STRING2, starting first at index | 3246 virtual concatenation of STRING1 and STRING2, starting first at index |
3240 STARTPOS, then at STARTPOS + 1, and so on. | 3247 STARTPOS, then at STARTPOS + 1, and so on. |
3241 | 3248 |
3242 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. | 3249 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. |
3243 | 3250 |
3244 RANGE is how far to scan while trying to match. RANGE = 0 means try | 3251 RANGE is how far to scan while trying to match. RANGE = 0 means try |
3245 only at STARTPOS; in general, the last start tried is STARTPOS + | 3252 only at STARTPOS; in general, the last start tried is STARTPOS + |
3246 RANGE. | 3253 RANGE. |
3247 | 3254 |
3248 In REGS, return the indices of the virtual concatenation of STRING1 | 3255 In REGS, return the indices of the virtual concatenation of STRING1 |
3249 and STRING2 that matched the entire BUFP->buffer and its contained | 3256 and STRING2 that matched the entire BUFP->buffer and its contained |
3250 subexpressions. | 3257 subexpressions. |
3251 | 3258 |
3252 Do not consider matching one past the index STOP in the virtual | 3259 Do not consider matching one past the index STOP in the virtual |
3253 concatenation of STRING1 and STRING2. | 3260 concatenation of STRING1 and STRING2. |
3254 | 3261 |
3255 We return either the position in the strings at which the match was | 3262 We return either the position in the strings at which the match was |
3256 found, -1 if no match, or -2 if error (such as failure | 3263 found, -1 if no match, or -2 if error (such as failure |
3273 int endpos = startpos + range; | 3280 int endpos = startpos + range; |
3274 | 3281 |
3275 /* Check for out-of-range STARTPOS. */ | 3282 /* Check for out-of-range STARTPOS. */ |
3276 if (startpos < 0 || startpos > total_size) | 3283 if (startpos < 0 || startpos > total_size) |
3277 return -1; | 3284 return -1; |
3278 | 3285 |
3279 /* Fix up RANGE if it might eventually take us outside | 3286 /* Fix up RANGE if it might eventually take us outside |
3280 the virtual concatenation of STRING1 and STRING2. | 3287 the virtual concatenation of STRING1 and STRING2. |
3281 Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ | 3288 Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ |
3282 if (endpos < 0) | 3289 if (endpos < 0) |
3283 range = 0 - startpos; | 3290 range = 0 - startpos; |
3284 else if (endpos > total_size) | 3291 else if (endpos > total_size) |
3285 range = total_size - startpos; | 3292 range = total_size - startpos; |
3286 | 3293 |
3307 | 3314 |
3308 /* Update the fastmap now if not correct already. */ | 3315 /* Update the fastmap now if not correct already. */ |
3309 if (fastmap && !bufp->fastmap_accurate) | 3316 if (fastmap && !bufp->fastmap_accurate) |
3310 if (re_compile_fastmap (bufp) == -2) | 3317 if (re_compile_fastmap (bufp) == -2) |
3311 return -2; | 3318 return -2; |
3312 | 3319 |
3313 /* Loop through the string, looking for a place to start matching. */ | 3320 /* Loop through the string, looking for a place to start matching. */ |
3314 for (;;) | 3321 for (;;) |
3315 { | 3322 { |
3316 /* If a fastmap is supplied, skip quickly over characters that | 3323 /* If a fastmap is supplied, skip quickly over characters that |
3317 cannot be the start of a match. If the pattern can match the | 3324 cannot be the start of a match. If the pattern can match the |
3318 null string, however, we don't need to skip characters; we want | 3325 null string, however, we don't need to skip characters; we want |
3319 the first null string. */ | 3326 the first null string. */ |
3320 if (fastmap && startpos < total_size && !bufp->can_be_null) | 3327 if (fastmap && startpos < total_size && !bufp->can_be_null) |
3327 | 3334 |
3328 if (startpos < size1 && startpos + range >= size1) | 3335 if (startpos < size1 && startpos + range >= size1) |
3329 lim = range - (size1 - startpos); | 3336 lim = range - (size1 - startpos); |
3330 | 3337 |
3331 d = (startpos >= size1 ? string2 - size1 : string1) + startpos; | 3338 d = (startpos >= size1 ? string2 - size1 : string1) + startpos; |
3332 | 3339 |
3333 /* Written out as an if-else to avoid testing `translate' | 3340 /* Written out as an if-else to avoid testing `translate' |
3334 inside the loop. */ | 3341 inside the loop. */ |
3335 if (translate) | 3342 if (translate) |
3336 while (range > lim | 3343 while (range > lim |
3337 && !fastmap[(unsigned char) | 3344 && !fastmap[(unsigned char) |
3344 startpos += irange - range; | 3351 startpos += irange - range; |
3345 } | 3352 } |
3346 else /* Searching backwards. */ | 3353 else /* Searching backwards. */ |
3347 { | 3354 { |
3348 register char c = (size1 == 0 || startpos >= size1 | 3355 register char c = (size1 == 0 || startpos >= size1 |
3349 ? string2[startpos - size1] | 3356 ? string2[startpos - size1] |
3350 : string1[startpos]); | 3357 : string1[startpos]); |
3351 | 3358 |
3352 if (!fastmap[(unsigned char) TRANSLATE (c)]) | 3359 if (!fastmap[(unsigned char) TRANSLATE (c)]) |
3353 goto advance; | 3360 goto advance; |
3354 } | 3361 } |
3367 #endif | 3374 #endif |
3368 #endif | 3375 #endif |
3369 | 3376 |
3370 if (val >= 0) | 3377 if (val >= 0) |
3371 return startpos; | 3378 return startpos; |
3372 | 3379 |
3373 if (val == -2) | 3380 if (val == -2) |
3374 return -2; | 3381 return -2; |
3375 | 3382 |
3376 advance: | 3383 advance: |
3377 if (!range) | 3384 if (!range) |
3378 break; | 3385 break; |
3379 else if (range > 0) | 3386 else if (range > 0) |
3380 { | 3387 { |
3381 range--; | 3388 range--; |
3382 startpos++; | 3389 startpos++; |
3383 } | 3390 } |
3384 else | 3391 else |
3385 { | 3392 { |
3386 range++; | 3393 range++; |
3387 startpos--; | 3394 startpos--; |
3388 } | 3395 } |
3389 } | 3396 } |
3390 return -1; | 3397 return -1; |
3391 } /* re_search_2 */ | 3398 } /* re_search_2 */ |
3423 | 3430 |
3424 | 3431 |
3425 /* Test if at very beginning or at very end of the virtual concatenation | 3432 /* Test if at very beginning or at very end of the virtual concatenation |
3426 of `string1' and `string2'. If only one string, it's `string2'. */ | 3433 of `string1' and `string2'. If only one string, it's `string2'. */ |
3427 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) | 3434 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) |
3428 #define AT_STRINGS_END(d) ((d) == end2) | 3435 #define AT_STRINGS_END(d) ((d) == end2) |
3429 | 3436 |
3430 | 3437 |
3431 /* Test if D points to a character which is word-constituent. We have | 3438 /* Test if D points to a character which is word-constituent. We have |
3432 two special cases to check for: if past the end of string1, look at | 3439 two special cases to check for: if past the end of string1, look at |
3433 the first character in string2; and if before the beginning of | 3440 the first character in string2; and if before the beginning of |
3496 | 3503 |
3497 /* re_match_2 matches the compiled pattern in BUFP against the | 3504 /* re_match_2 matches the compiled pattern in BUFP against the |
3498 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 | 3505 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 |
3499 and SIZE2, respectively). We start matching at POS, and stop | 3506 and SIZE2, respectively). We start matching at POS, and stop |
3500 matching at STOP. | 3507 matching at STOP. |
3501 | 3508 |
3502 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we | 3509 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we |
3503 store offsets for the substring each group matched in REGS. See the | 3510 store offsets for the substring each group matched in REGS. See the |
3504 documentation for exactly how many groups we fill. | 3511 documentation for exactly how many groups we fill. |
3505 | 3512 |
3506 We return -1 if no match, -2 if an internal error (such as the | 3513 We return -1 if no match, -2 if an internal error (such as the |
3544 each to consider matching. */ | 3551 each to consider matching. */ |
3545 const char *end_match_1, *end_match_2; | 3552 const char *end_match_1, *end_match_2; |
3546 | 3553 |
3547 /* Where we are in the data, and the end of the current string. */ | 3554 /* Where we are in the data, and the end of the current string. */ |
3548 const char *d, *dend; | 3555 const char *d, *dend; |
3549 | 3556 |
3550 /* Where we are in the pattern, and the end of the pattern. */ | 3557 /* Where we are in the pattern, and the end of the pattern. */ |
3551 unsigned char *p = bufp->buffer; | 3558 unsigned char *p = bufp->buffer; |
3552 register unsigned char *pend = p + bufp->used; | 3559 register unsigned char *pend = p + bufp->used; |
3553 | 3560 |
3554 /* Mark the opcode just after a start_memory, so we can test for an | 3561 /* Mark the opcode just after a start_memory, so we can test for an |
3581 | 3588 |
3582 /* We fill all the registers internally, independent of what we | 3589 /* We fill all the registers internally, independent of what we |
3583 return, for use in backreferences. The number here includes | 3590 return, for use in backreferences. The number here includes |
3584 an element for register zero. */ | 3591 an element for register zero. */ |
3585 unsigned num_regs = bufp->re_nsub + 1; | 3592 unsigned num_regs = bufp->re_nsub + 1; |
3586 | 3593 |
3587 /* The currently active registers. */ | 3594 /* The currently active registers. */ |
3588 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; | 3595 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; |
3589 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; | 3596 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; |
3590 | 3597 |
3591 /* Information on the contents of registers. These are pointers into | 3598 /* Information on the contents of registers. These are pointers into |
3613 field of reg_info[reg_num] helps us tell whether or not we have | 3620 field of reg_info[reg_num] helps us tell whether or not we have |
3614 matched any of the pattern so far this time through the reg_num-th | 3621 matched any of the pattern so far this time through the reg_num-th |
3615 subexpression. These two fields get reset each time through any | 3622 subexpression. These two fields get reset each time through any |
3616 loop their register is in. */ | 3623 loop their register is in. */ |
3617 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ | 3624 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ |
3618 register_info_type *reg_info; | 3625 register_info_type *reg_info; |
3619 #endif | 3626 #endif |
3620 | 3627 |
3621 /* The following record the register info as found in the above | 3628 /* The following record the register info as found in the above |
3622 variables when we find a match better than any we've seen before. | 3629 variables when we find a match better than any we've seen before. |
3623 This happens as we backtrack through the failure points, which in | 3630 This happens as we backtrack through the failure points, which in |
3624 turn happens only if we have not yet matched the entire string. */ | 3631 turn happens only if we have not yet matched the entire string. */ |
3625 unsigned best_regs_set = false; | 3632 unsigned best_regs_set = false; |
3626 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | 3633 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ |
3627 const char **best_regstart, **best_regend; | 3634 const char **best_regstart, **best_regend; |
3628 #endif | 3635 #endif |
3629 | 3636 |
3630 /* Logically, this is `best_regend[0]'. But we don't want to have to | 3637 /* Logically, this is `best_regend[0]'. But we don't want to have to |
3631 allocate space for that if we're not allocating space for anything | 3638 allocate space for that if we're not allocating space for anything |
3632 else (see below). Also, we never need info about register 0 for | 3639 else (see below). Also, we never need info about register 0 for |
3633 any of the other register vectors, and it seems rather a kludge to | 3640 any of the other register vectors, and it seems rather a kludge to |
3634 treat `best_regend' differently than the rest. So we keep track of | 3641 treat `best_regend' differently than the rest. So we keep track of |
3646 register_info_type *reg_info_dummy; | 3653 register_info_type *reg_info_dummy; |
3647 #endif | 3654 #endif |
3648 | 3655 |
3649 #ifdef DEBUG | 3656 #ifdef DEBUG |
3650 /* Counts the total number of registers pushed. */ | 3657 /* Counts the total number of registers pushed. */ |
3651 unsigned num_regs_pushed = 0; | 3658 unsigned num_regs_pushed = 0; |
3652 #endif | 3659 #endif |
3653 | 3660 |
3654 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); | 3661 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); |
3655 | 3662 |
3656 INIT_FAIL_STACK (); | 3663 INIT_FAIL_STACK (); |
3657 | 3664 |
3658 #ifdef MATCH_MAY_ALLOCATE | 3665 #ifdef MATCH_MAY_ALLOCATE |
3659 /* Do not bother to initialize all the register variables if there are | 3666 /* Do not bother to initialize all the register variables if there are |
3660 no groups in the pattern, as it takes a fair amount of time. If | 3667 no groups in the pattern, as it takes a fair amount of time. If |
3661 there are groups, we include space for register 0 (the whole | 3668 there are groups, we include space for register 0 (the whole |
3662 pattern), even though we never use it, since it simplifies the | 3669 pattern), even though we never use it, since it simplifies the |
3671 best_regend = REGEX_TALLOC (num_regs, const char *); | 3678 best_regend = REGEX_TALLOC (num_regs, const char *); |
3672 reg_info = REGEX_TALLOC (num_regs, register_info_type); | 3679 reg_info = REGEX_TALLOC (num_regs, register_info_type); |
3673 reg_dummy = REGEX_TALLOC (num_regs, const char *); | 3680 reg_dummy = REGEX_TALLOC (num_regs, const char *); |
3674 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); | 3681 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); |
3675 | 3682 |
3676 if (!(regstart && regend && old_regstart && old_regend && reg_info | 3683 if (!(regstart && regend && old_regstart && old_regend && reg_info |
3677 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) | 3684 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) |
3678 { | 3685 { |
3679 FREE_VARIABLES (); | 3686 FREE_VARIABLES (); |
3680 return -2; | 3687 return -2; |
3681 } | 3688 } |
3682 } | 3689 } |
3694 if (pos < 0 || pos > size1 + size2) | 3701 if (pos < 0 || pos > size1 + size2) |
3695 { | 3702 { |
3696 FREE_VARIABLES (); | 3703 FREE_VARIABLES (); |
3697 return -1; | 3704 return -1; |
3698 } | 3705 } |
3699 | 3706 |
3700 /* Initialize subexpression text positions to -1 to mark ones that no | 3707 /* Initialize subexpression text positions to -1 to mark ones that no |
3701 start_memory/stop_memory has been seen for. Also initialize the | 3708 start_memory/stop_memory has been seen for. Also initialize the |
3702 register information struct. */ | 3709 register information struct. */ |
3703 for (mcnt = 1; mcnt < num_regs; mcnt++) | 3710 for (mcnt = 1; mcnt < num_regs; mcnt++) |
3704 { | 3711 { |
3705 regstart[mcnt] = regend[mcnt] | 3712 regstart[mcnt] = regend[mcnt] |
3706 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; | 3713 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; |
3707 | 3714 |
3708 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; | 3715 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; |
3709 IS_ACTIVE (reg_info[mcnt]) = 0; | 3716 IS_ACTIVE (reg_info[mcnt]) = 0; |
3710 MATCHED_SOMETHING (reg_info[mcnt]) = 0; | 3717 MATCHED_SOMETHING (reg_info[mcnt]) = 0; |
3711 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; | 3718 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; |
3712 } | 3719 } |
3713 | 3720 |
3714 /* We move `string1' into `string2' if the latter's empty -- but not if | 3721 /* We move `string1' into `string2' if the latter's empty -- but not if |
3715 `string1' is null. */ | 3722 `string1' is null. */ |
3716 if (size2 == 0 && string1 != NULL) | 3723 if (size2 == 0 && string1 != NULL) |
3717 { | 3724 { |
3718 string2 = string1; | 3725 string2 = string1; |
3733 { | 3740 { |
3734 end_match_1 = end1; | 3741 end_match_1 = end1; |
3735 end_match_2 = string2 + stop - size1; | 3742 end_match_2 = string2 + stop - size1; |
3736 } | 3743 } |
3737 | 3744 |
3738 /* `p' scans through the pattern as `d' scans through the data. | 3745 /* `p' scans through the pattern as `d' scans through the data. |
3739 `dend' is the end of the input string that `d' points within. `d' | 3746 `dend' is the end of the input string that `d' points within. `d' |
3740 is advanced into the following input string whenever necessary, but | 3747 is advanced into the following input string whenever necessary, but |
3741 this happens before fetching; therefore, at the beginning of the | 3748 this happens before fetching; therefore, at the beginning of the |
3742 loop, `d' can be pointing at the end of a string, but it cannot | 3749 loop, `d' can be pointing at the end of a string, but it cannot |
3743 equal `string2'. */ | 3750 equal `string2'. */ |
3755 DEBUG_PRINT1 ("The compiled pattern is: "); | 3762 DEBUG_PRINT1 ("The compiled pattern is: "); |
3756 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); | 3763 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); |
3757 DEBUG_PRINT1 ("The string to match is: `"); | 3764 DEBUG_PRINT1 ("The string to match is: `"); |
3758 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); | 3765 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); |
3759 DEBUG_PRINT1 ("'\n"); | 3766 DEBUG_PRINT1 ("'\n"); |
3760 | 3767 |
3761 /* This loops over pattern commands. It exits by returning from the | 3768 /* This loops over pattern commands. It exits by returning from the |
3762 function if the match is complete, or it drops through if the match | 3769 function if the match is complete, or it drops through if the match |
3763 fails at this starting point in the input data. */ | 3770 fails at this starting point in the input data. */ |
3764 for (;;) | 3771 for (;;) |
3765 { | 3772 { |
3766 DEBUG_PRINT2 ("\n0x%x: ", p); | 3773 DEBUG_PRINT2 ("\n0x%x: ", p); |
3767 | 3774 |
3768 if (p == pend) | 3775 if (p == pend) |
3769 { /* End of pattern means we might have succeeded. */ | 3776 { /* End of pattern means we might have succeeded. */ |
3770 DEBUG_PRINT1 ("end of pattern ... "); | 3777 DEBUG_PRINT1 ("end of pattern ... "); |
3771 | 3778 |
3772 /* If we haven't matched the entire string, and we want the | 3779 /* If we haven't matched the entire string, and we want the |
3773 longest match, try backtracking. */ | 3780 longest match, try backtracking. */ |
3774 if (d != end_match_2) | 3781 if (d != end_match_2) |
3775 { | 3782 { |
3776 /* 1 if this match ends in the same string (string1 or string2) | 3783 /* 1 if this match ends in the same string (string1 or string2) |
3777 as the best previous match. */ | 3784 as the best previous match. */ |
3778 boolean same_str_p = (FIRST_STRING_P (match_end) | 3785 boolean same_str_p = (FIRST_STRING_P (match_end) |
3779 == MATCHING_IN_FIRST_STRING); | 3786 == MATCHING_IN_FIRST_STRING); |
3780 /* 1 if this match is the best seen so far. */ | 3787 /* 1 if this match is the best seen so far. */ |
3781 boolean best_match_p; | 3788 boolean best_match_p; |
3782 | 3789 |
3783 /* AIX compiler got confused when this was combined | 3790 /* AIX compiler got confused when this was combined |
3786 best_match_p = d > match_end; | 3793 best_match_p = d > match_end; |
3787 else | 3794 else |
3788 best_match_p = !MATCHING_IN_FIRST_STRING; | 3795 best_match_p = !MATCHING_IN_FIRST_STRING; |
3789 | 3796 |
3790 DEBUG_PRINT1 ("backtracking.\n"); | 3797 DEBUG_PRINT1 ("backtracking.\n"); |
3791 | 3798 |
3792 if (!FAIL_STACK_EMPTY ()) | 3799 if (!FAIL_STACK_EMPTY ()) |
3793 { /* More failure points to try. */ | 3800 { /* More failure points to try. */ |
3794 | 3801 |
3795 /* If exceeds best match so far, save it. */ | 3802 /* If exceeds best match so far, save it. */ |
3796 if (!best_regs_set || best_match_p) | 3803 if (!best_regs_set || best_match_p) |
3797 { | 3804 { |
3798 best_regs_set = true; | 3805 best_regs_set = true; |
3799 match_end = d; | 3806 match_end = d; |
3800 | 3807 |
3801 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); | 3808 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); |
3802 | 3809 |
3803 for (mcnt = 1; mcnt < num_regs; mcnt++) | 3810 for (mcnt = 1; mcnt < num_regs; mcnt++) |
3804 { | 3811 { |
3805 best_regstart[mcnt] = regstart[mcnt]; | 3812 best_regstart[mcnt] = regstart[mcnt]; |
3806 best_regend[mcnt] = regend[mcnt]; | 3813 best_regend[mcnt] = regend[mcnt]; |
3807 } | 3814 } |
3808 } | 3815 } |
3809 goto fail; | 3816 goto fail; |
3810 } | 3817 } |
3811 | 3818 |
3812 /* If no failure points, don't restore garbage. And if | 3819 /* If no failure points, don't restore garbage. And if |
3813 last match is real best match, don't restore second | 3820 last match is real best match, don't restore second |
3814 best one. */ | 3821 best one. */ |
3819 end_match_1' while the restored d is in string2. | 3826 end_match_1' while the restored d is in string2. |
3820 For example, the pattern `x.*y.*z' against the | 3827 For example, the pattern `x.*y.*z' against the |
3821 strings `x-' and `y-z-', if the two strings are | 3828 strings `x-' and `y-z-', if the two strings are |
3822 not consecutive in memory. */ | 3829 not consecutive in memory. */ |
3823 DEBUG_PRINT1 ("Restoring best registers.\n"); | 3830 DEBUG_PRINT1 ("Restoring best registers.\n"); |
3824 | 3831 |
3825 d = match_end; | 3832 d = match_end; |
3826 dend = ((d >= string1 && d <= end1) | 3833 dend = ((d >= string1 && d <= end1) |
3827 ? end_match_1 : end_match_2); | 3834 ? end_match_1 : end_match_2); |
3828 | 3835 |
3829 for (mcnt = 1; mcnt < num_regs; mcnt++) | 3836 for (mcnt = 1; mcnt < num_regs; mcnt++) |
3886 regs->start[0] = pos; | 3893 regs->start[0] = pos; |
3887 regs->end[0] = (MATCHING_IN_FIRST_STRING | 3894 regs->end[0] = (MATCHING_IN_FIRST_STRING |
3888 ? ((regoff_t) (d - string1)) | 3895 ? ((regoff_t) (d - string1)) |
3889 : ((regoff_t) (d - string2 + size1))); | 3896 : ((regoff_t) (d - string2 + size1))); |
3890 } | 3897 } |
3891 | 3898 |
3892 /* Go through the first `min (num_regs, regs->num_regs)' | 3899 /* Go through the first `min (num_regs, regs->num_regs)' |
3893 registers, since that is all we initialized. */ | 3900 registers, since that is all we initialized. */ |
3894 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) | 3901 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) |
3895 { | 3902 { |
3896 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) | 3903 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) |
3901 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); | 3908 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); |
3902 regs->end[mcnt] | 3909 regs->end[mcnt] |
3903 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); | 3910 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); |
3904 } | 3911 } |
3905 } | 3912 } |
3906 | 3913 |
3907 /* If the regs structure we return has more elements than | 3914 /* If the regs structure we return has more elements than |
3908 were in the pattern, set the extra elements to -1. If | 3915 were in the pattern, set the extra elements to -1. If |
3909 we (re)allocated the registers, this is the case, | 3916 we (re)allocated the registers, this is the case, |
3910 because we always allocate enough to have at least one | 3917 because we always allocate enough to have at least one |
3911 -1 at the end. */ | 3918 -1 at the end. */ |
3916 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", | 3923 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", |
3917 nfailure_points_pushed, nfailure_points_popped, | 3924 nfailure_points_pushed, nfailure_points_popped, |
3918 nfailure_points_pushed - nfailure_points_popped); | 3925 nfailure_points_pushed - nfailure_points_popped); |
3919 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); | 3926 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); |
3920 | 3927 |
3921 mcnt = d - pos - (MATCHING_IN_FIRST_STRING | 3928 mcnt = d - pos - (MATCHING_IN_FIRST_STRING |
3922 ? string1 | 3929 ? string1 |
3923 : string2 - size1); | 3930 : string2 - size1); |
3924 | 3931 |
3925 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); | 3932 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); |
3926 | 3933 |
3927 FREE_VARIABLES (); | 3934 FREE_VARIABLES (); |
4008 not = !not; | 4015 not = !not; |
4009 | 4016 |
4010 p += 1 + *p; | 4017 p += 1 + *p; |
4011 | 4018 |
4012 if (!not) goto fail; | 4019 if (!not) goto fail; |
4013 | 4020 |
4014 SET_REGS_MATCHED (); | 4021 SET_REGS_MATCHED (); |
4015 d++; | 4022 d++; |
4016 break; | 4023 break; |
4017 } | 4024 } |
4018 | 4025 |
4025 case start_memory: | 4032 case start_memory: |
4026 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); | 4033 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); |
4027 | 4034 |
4028 /* Find out if this group can match the empty string. */ | 4035 /* Find out if this group can match the empty string. */ |
4029 p1 = p; /* To send to group_match_null_string_p. */ | 4036 p1 = p; /* To send to group_match_null_string_p. */ |
4030 | 4037 |
4031 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) | 4038 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) |
4032 REG_MATCH_NULL_STRING_P (reg_info[*p]) | 4039 REG_MATCH_NULL_STRING_P (reg_info[*p]) |
4033 = group_match_null_string_p (&p1, pend, reg_info); | 4040 = group_match_null_string_p (&p1, pend, reg_info); |
4034 | 4041 |
4035 /* Save the position in the string where we were the last time | 4042 /* Save the position in the string where we were the last time |
4036 we were at this open-group operator in case the group is | 4043 we were at this open-group operator in case the group is |
4037 operated upon by a repetition operator, e.g., with `(a*)*b' | 4044 operated upon by a repetition operator, e.g., with `(a*)*b' |
4038 against `ab'; then we want to ignore where we are now in | 4045 against `ab'; then we want to ignore where we are now in |
4039 the string in case this attempt to match fails. */ | 4046 the string in case this attempt to match fails. */ |
4040 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) | 4047 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) |
4041 ? REG_UNSET (regstart[*p]) ? d : regstart[*p] | 4048 ? REG_UNSET (regstart[*p]) ? d : regstart[*p] |
4042 : regstart[*p]; | 4049 : regstart[*p]; |
4043 DEBUG_PRINT2 (" old_regstart: %d\n", | 4050 DEBUG_PRINT2 (" old_regstart: %d\n", |
4044 POINTER_TO_OFFSET (old_regstart[*p])); | 4051 POINTER_TO_OFFSET (old_regstart[*p])); |
4045 | 4052 |
4046 regstart[*p] = d; | 4053 regstart[*p] = d; |
4047 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); | 4054 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); |
4048 | 4055 |
4049 IS_ACTIVE (reg_info[*p]) = 1; | 4056 IS_ACTIVE (reg_info[*p]) = 1; |
4050 MATCHED_SOMETHING (reg_info[*p]) = 0; | 4057 MATCHED_SOMETHING (reg_info[*p]) = 0; |
4051 | 4058 |
4052 /* Clear this whenever we change the register activity status. */ | 4059 /* Clear this whenever we change the register activity status. */ |
4053 set_regs_matched_done = 0; | 4060 set_regs_matched_done = 0; |
4054 | 4061 |
4055 /* This is the new highest active register. */ | 4062 /* This is the new highest active register. */ |
4056 highest_active_reg = *p; | 4063 highest_active_reg = *p; |
4057 | 4064 |
4058 /* If nothing was active before, this is the new lowest active | 4065 /* If nothing was active before, this is the new lowest active |
4059 register. */ | 4066 register. */ |
4060 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) | 4067 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) |
4061 lowest_active_reg = *p; | 4068 lowest_active_reg = *p; |
4062 | 4069 |
4070 /* The stop_memory opcode represents the end of a group. Its | 4077 /* The stop_memory opcode represents the end of a group. Its |
4071 arguments are the same as start_memory's: the register | 4078 arguments are the same as start_memory's: the register |
4072 number, and the number of inner groups. */ | 4079 number, and the number of inner groups. */ |
4073 case stop_memory: | 4080 case stop_memory: |
4074 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); | 4081 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); |
4075 | 4082 |
4076 /* We need to save the string position the last time we were at | 4083 /* We need to save the string position the last time we were at |
4077 this close-group operator in case the group is operated | 4084 this close-group operator in case the group is operated |
4078 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' | 4085 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' |
4079 against `aba'; then we want to ignore where we are now in | 4086 against `aba'; then we want to ignore where we are now in |
4080 the string in case this attempt to match fails. */ | 4087 the string in case this attempt to match fails. */ |
4081 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) | 4088 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) |
4082 ? REG_UNSET (regend[*p]) ? d : regend[*p] | 4089 ? REG_UNSET (regend[*p]) ? d : regend[*p] |
4083 : regend[*p]; | 4090 : regend[*p]; |
4084 DEBUG_PRINT2 (" old_regend: %d\n", | 4091 DEBUG_PRINT2 (" old_regend: %d\n", |
4085 POINTER_TO_OFFSET (old_regend[*p])); | 4092 POINTER_TO_OFFSET (old_regend[*p])); |
4086 | 4093 |
4087 regend[*p] = d; | 4094 regend[*p] = d; |
4088 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); | 4095 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); |
4089 | 4096 |
4106 (a(b)c(d(e)f)g). When group 3 ends, after the f), the | 4113 (a(b)c(d(e)f)g). When group 3 ends, after the f), the |
4107 new highest active register is 1. */ | 4114 new highest active register is 1. */ |
4108 unsigned char r = *p - 1; | 4115 unsigned char r = *p - 1; |
4109 while (r > 0 && !IS_ACTIVE (reg_info[r])) | 4116 while (r > 0 && !IS_ACTIVE (reg_info[r])) |
4110 r--; | 4117 r--; |
4111 | 4118 |
4112 /* If we end up at register zero, that means that we saved | 4119 /* If we end up at register zero, that means that we saved |
4113 the registers as the result of an `on_failure_jump', not | 4120 the registers as the result of an `on_failure_jump', not |
4114 a `start_memory', and we jumped to past the innermost | 4121 a `start_memory', and we jumped to past the innermost |
4115 `stop_memory'. For example, in ((.)*) we save | 4122 `stop_memory'. For example, in ((.)*) we save |
4116 registers 1 and 2 as a result of the *, but when we pop | 4123 registers 1 and 2 as a result of the *, but when we pop |
4122 highest_active_reg = NO_HIGHEST_ACTIVE_REG; | 4129 highest_active_reg = NO_HIGHEST_ACTIVE_REG; |
4123 } | 4130 } |
4124 else | 4131 else |
4125 highest_active_reg = r; | 4132 highest_active_reg = r; |
4126 } | 4133 } |
4127 | 4134 |
4128 /* If just failed to match something this time around with a | 4135 /* If just failed to match something this time around with a |
4129 group that's operated on by a repetition operator, try to | 4136 group that's operated on by a repetition operator, try to |
4130 force exit from the ``loop'', and restore the register | 4137 force exit from the ``loop'', and restore the register |
4131 information for this group that we had before trying this | 4138 information for this group that we had before trying this |
4132 last match. */ | 4139 last match. */ |
4133 if ((!MATCHED_SOMETHING (reg_info[*p]) | 4140 if ((!MATCHED_SOMETHING (reg_info[*p]) |
4134 || just_past_start_mem == p - 1) | 4141 || just_past_start_mem == p - 1) |
4135 && (p + 2) < pend) | 4142 && (p + 2) < pend) |
4136 { | 4143 { |
4137 boolean is_a_jump_n = false; | 4144 boolean is_a_jump_n = false; |
4138 | 4145 |
4139 p1 = p + 2; | 4146 p1 = p + 2; |
4140 mcnt = 0; | 4147 mcnt = 0; |
4141 switch ((re_opcode_t) *p1++) | 4148 switch ((re_opcode_t) *p1++) |
4142 { | 4149 { |
4143 case jump_n: | 4150 case jump_n: |
4148 case dummy_failure_jump: | 4155 case dummy_failure_jump: |
4149 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 4156 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
4150 if (is_a_jump_n) | 4157 if (is_a_jump_n) |
4151 p1 += 2; | 4158 p1 += 2; |
4152 break; | 4159 break; |
4153 | 4160 |
4154 default: | 4161 default: |
4155 /* do nothing */ ; | 4162 /* do nothing */ ; |
4156 } | 4163 } |
4157 p1 += mcnt; | 4164 p1 += mcnt; |
4158 | 4165 |
4159 /* If the next operation is a jump backwards in the pattern | 4166 /* If the next operation is a jump backwards in the pattern |
4160 to an on_failure_jump right before the start_memory | 4167 to an on_failure_jump right before the start_memory |
4161 corresponding to this stop_memory, exit from the loop | 4168 corresponding to this stop_memory, exit from the loop |
4162 by forcing a failure after pushing on the stack the | 4169 by forcing a failure after pushing on the stack the |
4163 on_failure_jump's jump in the pattern, and d. */ | 4170 on_failure_jump's jump in the pattern, and d. */ |
4167 /* If this group ever matched anything, then restore | 4174 /* If this group ever matched anything, then restore |
4168 what its registers were before trying this last | 4175 what its registers were before trying this last |
4169 failed match, e.g., with `(a*)*b' against `ab' for | 4176 failed match, e.g., with `(a*)*b' against `ab' for |
4170 regstart[1], and, e.g., with `((a*)*(b*)*)*' | 4177 regstart[1], and, e.g., with `((a*)*(b*)*)*' |
4171 against `aba' for regend[3]. | 4178 against `aba' for regend[3]. |
4172 | 4179 |
4173 Also restore the registers for inner groups for, | 4180 Also restore the registers for inner groups for, |
4174 e.g., `((a*)(b*))*' against `aba' (register 3 would | 4181 e.g., `((a*)(b*))*' against `aba' (register 3 would |
4175 otherwise get trashed). */ | 4182 otherwise get trashed). */ |
4176 | 4183 |
4177 if (EVER_MATCHED_SOMETHING (reg_info[*p])) | 4184 if (EVER_MATCHED_SOMETHING (reg_info[*p])) |
4178 { | 4185 { |
4179 unsigned r; | 4186 unsigned r; |
4180 | 4187 |
4181 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; | 4188 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; |
4182 | 4189 |
4183 /* Restore this and inner groups' (if any) registers. */ | 4190 /* Restore this and inner groups' (if any) registers. */ |
4184 for (r = *p; r < *p + *(p + 1); r++) | 4191 for (r = *p; r < *p + *(p + 1); r++) |
4185 { | 4192 { |
4186 regstart[r] = old_regstart[r]; | 4193 regstart[r] = old_regstart[r]; |
4187 | 4194 |
4188 /* xx why this test? */ | 4195 /* xx why this test? */ |
4189 if (old_regend[r] >= regstart[r]) | 4196 if (old_regend[r] >= regstart[r]) |
4190 regend[r] = old_regend[r]; | 4197 regend[r] = old_regend[r]; |
4191 } | 4198 } |
4192 } | 4199 } |
4193 p1++; | 4200 p1++; |
4194 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 4201 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
4195 PUSH_FAILURE_POINT (p1 + mcnt, d, -2); | 4202 PUSH_FAILURE_POINT (p1 + mcnt, d, -2); |
4196 | 4203 |
4197 goto fail; | 4204 goto fail; |
4198 } | 4205 } |
4199 } | 4206 } |
4200 | 4207 |
4201 /* Move past the register number and the inner group count. */ | 4208 /* Move past the register number and the inner group count. */ |
4202 p += 2; | 4209 p += 2; |
4203 break; | 4210 break; |
4204 | 4211 |
4205 | 4212 |
4212 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); | 4219 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); |
4213 | 4220 |
4214 /* Can't back reference a group which we've never matched. */ | 4221 /* Can't back reference a group which we've never matched. */ |
4215 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) | 4222 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) |
4216 goto fail; | 4223 goto fail; |
4217 | 4224 |
4218 /* Where in input to try to start matching. */ | 4225 /* Where in input to try to start matching. */ |
4219 d2 = regstart[regno]; | 4226 d2 = regstart[regno]; |
4220 | 4227 |
4221 /* Where to stop matching; if both the place to start and | 4228 /* Where to stop matching; if both the place to start and |
4222 the place to stop matching are in the same string, then | 4229 the place to stop matching are in the same string, then |
4223 set to the place to stop, otherwise, for now have to use | 4230 set to the place to stop, otherwise, for now have to use |
4224 the end of the first string. */ | 4231 the end of the first string. */ |
4225 | 4232 |
4226 dend2 = ((FIRST_STRING_P (regstart[regno]) | 4233 dend2 = ((FIRST_STRING_P (regstart[regno]) |
4227 == FIRST_STRING_P (regend[regno])) | 4234 == FIRST_STRING_P (regend[regno])) |
4228 ? regend[regno] : end_match_1); | 4235 ? regend[regno] : end_match_1); |
4229 for (;;) | 4236 for (;;) |
4230 { | 4237 { |
4231 /* If necessary, advance to next segment in register | 4238 /* If necessary, advance to next segment in register |
4245 /* If necessary, advance to next segment in data. */ | 4252 /* If necessary, advance to next segment in data. */ |
4246 PREFETCH (); | 4253 PREFETCH (); |
4247 | 4254 |
4248 /* How many characters left in this segment to match. */ | 4255 /* How many characters left in this segment to match. */ |
4249 mcnt = dend - d; | 4256 mcnt = dend - d; |
4250 | 4257 |
4251 /* Want how many consecutive characters we can match in | 4258 /* Want how many consecutive characters we can match in |
4252 one shot, so, if necessary, adjust the count. */ | 4259 one shot, so, if necessary, adjust the count. */ |
4253 if (mcnt > dend2 - d2) | 4260 if (mcnt > dend2 - d2) |
4254 mcnt = dend2 - d2; | 4261 mcnt = dend2 - d2; |
4255 | 4262 |
4256 /* Compare that many; failure if mismatch, else move | 4263 /* Compare that many; failure if mismatch, else move |
4257 past them. */ | 4264 past them. */ |
4258 if (translate | 4265 if (translate |
4259 ? bcmp_translate (d, d2, mcnt, translate) | 4266 ? bcmp_translate (d, d2, mcnt, translate) |
4260 : bcmp (d, d2, mcnt)) | 4267 : bcmp (d, d2, mcnt)) |
4261 goto fail; | 4268 goto fail; |
4262 d += mcnt, d2 += mcnt; | 4269 d += mcnt, d2 += mcnt; |
4263 | 4270 |
4264 /* Do this because we've match some characters. */ | 4271 /* Do this because we've match some characters. */ |
4271 /* begline matches the empty string at the beginning of the string | 4278 /* begline matches the empty string at the beginning of the string |
4272 (unless `not_bol' is set in `bufp'), and, if | 4279 (unless `not_bol' is set in `bufp'), and, if |
4273 `newline_anchor' is set, after newlines. */ | 4280 `newline_anchor' is set, after newlines. */ |
4274 case begline: | 4281 case begline: |
4275 DEBUG_PRINT1 ("EXECUTING begline.\n"); | 4282 DEBUG_PRINT1 ("EXECUTING begline.\n"); |
4276 | 4283 |
4277 if (AT_STRINGS_BEG (d)) | 4284 if (AT_STRINGS_BEG (d)) |
4278 { | 4285 { |
4279 if (!bufp->not_bol) break; | 4286 if (!bufp->not_bol) break; |
4280 } | 4287 } |
4281 else if (d[-1] == '\n' && bufp->newline_anchor) | 4288 else if (d[-1] == '\n' && bufp->newline_anchor) |
4292 | 4299 |
4293 if (AT_STRINGS_END (d)) | 4300 if (AT_STRINGS_END (d)) |
4294 { | 4301 { |
4295 if (!bufp->not_eol) break; | 4302 if (!bufp->not_eol) break; |
4296 } | 4303 } |
4297 | 4304 |
4298 /* We have to ``prefetch'' the next character. */ | 4305 /* We have to ``prefetch'' the next character. */ |
4299 else if ((d == end1 ? *string2 : *d) == '\n' | 4306 else if ((d == end1 ? *string2 : *d) == '\n' |
4300 && bufp->newline_anchor) | 4307 && bufp->newline_anchor) |
4301 { | 4308 { |
4302 break; | 4309 break; |
4326 string, instead of restoring it. To see why, consider | 4333 string, instead of restoring it. To see why, consider |
4327 matching `foo\nbar' against `.*\n'. The .* matches the foo; | 4334 matching `foo\nbar' against `.*\n'. The .* matches the foo; |
4328 then the . fails against the \n. But the next thing we want | 4335 then the . fails against the \n. But the next thing we want |
4329 to do is match the \n against the \n; if we restored the | 4336 to do is match the \n against the \n; if we restored the |
4330 string value, we would be back at the foo. | 4337 string value, we would be back at the foo. |
4331 | 4338 |
4332 Because this is used only in specific cases, we don't need to | 4339 Because this is used only in specific cases, we don't need to |
4333 check all the things that `on_failure_jump' does, to make | 4340 check all the things that `on_failure_jump' does, to make |
4334 sure the right things get saved on the stack. Hence we don't | 4341 sure the right things get saved on the stack. Hence we don't |
4335 share its code. The only reason to push anything on the | 4342 share its code. The only reason to push anything on the |
4336 stack at all is that otherwise we would have to change | 4343 stack at all is that otherwise we would have to change |
4337 `anychar's code to do something besides goto fail in this | 4344 `anychar's code to do something besides goto fail in this |
4338 case; that seems worse than this. */ | 4345 case; that seems worse than this. */ |
4339 case on_failure_keep_string_jump: | 4346 case on_failure_keep_string_jump: |
4340 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); | 4347 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); |
4341 | 4348 |
4342 EXTRACT_NUMBER_AND_INCR (mcnt, p); | 4349 EXTRACT_NUMBER_AND_INCR (mcnt, p); |
4343 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); | 4350 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); |
4344 | 4351 |
4345 PUSH_FAILURE_POINT (p + mcnt, NULL, -2); | 4352 PUSH_FAILURE_POINT (p + mcnt, NULL, -2); |
4346 break; | 4353 break; |
4347 | 4354 |
4348 | 4355 |
4349 /* Uses of on_failure_jump: | 4356 /* Uses of on_failure_jump: |
4350 | 4357 |
4351 Each alternative starts with an on_failure_jump that points | 4358 Each alternative starts with an on_failure_jump that points |
4352 to the beginning of the next alternative. Each alternative | 4359 to the beginning of the next alternative. Each alternative |
4353 except the last ends with a jump that in effect jumps past | 4360 except the last ends with a jump that in effect jumps past |
4354 the rest of the alternatives. (They really jump to the | 4361 the rest of the alternatives. (They really jump to the |
4355 ending jump of the following alternative, because tensioning | 4362 ending jump of the following alternative, because tensioning |
4411 pattern follows its end. If we can establish that there | 4418 pattern follows its end. If we can establish that there |
4412 is nothing that they would both match, i.e., that we | 4419 is nothing that they would both match, i.e., that we |
4413 would have to backtrack because of (as in, e.g., `a*a') | 4420 would have to backtrack because of (as in, e.g., `a*a') |
4414 then we can change to pop_failure_jump, because we'll | 4421 then we can change to pop_failure_jump, because we'll |
4415 never have to backtrack. | 4422 never have to backtrack. |
4416 | 4423 |
4417 This is not true in the case of alternatives: in | 4424 This is not true in the case of alternatives: in |
4418 `(a|ab)*' we do need to backtrack to the `ab' alternative | 4425 `(a|ab)*' we do need to backtrack to the `ab' alternative |
4419 (e.g., if the string was `ab'). But instead of trying to | 4426 (e.g., if the string was `ab'). But instead of trying to |
4420 detect that here, the alternative has put on a dummy | 4427 detect that here, the alternative has put on a dummy |
4421 failure point which is what we will end up popping. */ | 4428 failure point which is what we will end up popping. */ |
4437 break; | 4444 break; |
4438 } | 4445 } |
4439 | 4446 |
4440 p1 = p + mcnt; | 4447 p1 = p + mcnt; |
4441 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding | 4448 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding |
4442 to the `maybe_finalize_jump' of this case. Examine what | 4449 to the `maybe_finalize_jump' of this case. Examine what |
4443 follows. */ | 4450 follows. */ |
4444 | 4451 |
4445 /* If we're at the end of the pattern, we can change. */ | 4452 /* If we're at the end of the pattern, we can change. */ |
4446 if (p2 == pend) | 4453 if (p2 == pend) |
4447 { | 4454 { |
4463 { | 4470 { |
4464 p[-3] = (unsigned char) pop_failure_jump; | 4471 p[-3] = (unsigned char) pop_failure_jump; |
4465 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | 4472 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", |
4466 c, p1[5]); | 4473 c, p1[5]); |
4467 } | 4474 } |
4468 | 4475 |
4469 else if ((re_opcode_t) p1[3] == charset | 4476 else if ((re_opcode_t) p1[3] == charset |
4470 || (re_opcode_t) p1[3] == charset_not) | 4477 || (re_opcode_t) p1[3] == charset_not) |
4471 { | 4478 { |
4472 int not = (re_opcode_t) p1[3] == charset_not; | 4479 int not = (re_opcode_t) p1[3] == charset_not; |
4473 | 4480 |
4474 if (c < (unsigned char) (p1[4] * BYTEWIDTH) | 4481 if (c < (unsigned char) (p1[4] * BYTEWIDTH) |
4475 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | 4482 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) |
4476 not = !not; | 4483 not = !not; |
4477 | 4484 |
4478 /* `not' is equal to 1 if c would match, which means | 4485 /* `not' is equal to 1 if c would match, which means |
4498 { | 4505 { |
4499 p[-3] = (unsigned char) pop_failure_jump; | 4506 p[-3] = (unsigned char) pop_failure_jump; |
4500 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | 4507 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", |
4501 c, p1[5]); | 4508 c, p1[5]); |
4502 } | 4509 } |
4503 | 4510 |
4504 else if ((re_opcode_t) p1[3] == charset_not) | 4511 else if ((re_opcode_t) p1[3] == charset_not) |
4505 { | 4512 { |
4506 int idx; | 4513 int idx; |
4507 /* We win if the charset_not inside the loop | 4514 /* We win if the charset_not inside the loop |
4508 lists every character listed in the charset after. */ | 4515 lists every character listed in the charset after. */ |
4569 dummy_low_reg, dummy_high_reg, | 4576 dummy_low_reg, dummy_high_reg, |
4570 reg_dummy, reg_dummy, reg_info_dummy); | 4577 reg_dummy, reg_dummy, reg_info_dummy); |
4571 } | 4578 } |
4572 /* Note fall through. */ | 4579 /* Note fall through. */ |
4573 | 4580 |
4574 | 4581 |
4575 /* Unconditionally jump (without popping any failure points). */ | 4582 /* Unconditionally jump (without popping any failure points). */ |
4576 case jump: | 4583 case jump: |
4577 unconditional_jump: | 4584 unconditional_jump: |
4578 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ | 4585 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ |
4579 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); | 4586 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); |
4580 p += mcnt; /* Do the jump. */ | 4587 p += mcnt; /* Do the jump. */ |
4581 DEBUG_PRINT2 ("(to 0x%x).\n", p); | 4588 DEBUG_PRINT2 ("(to 0x%x).\n", p); |
4582 break; | 4589 break; |
4583 | 4590 |
4584 | 4591 |
4585 /* We need this opcode so we can detect where alternatives end | 4592 /* We need this opcode so we can detect where alternatives end |
4586 in `group_match_null_string_p' et al. */ | 4593 in `group_match_null_string_p' et al. */ |
4587 case jump_past_alt: | 4594 case jump_past_alt: |
4588 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); | 4595 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); |
4589 goto unconditional_jump; | 4596 goto unconditional_jump; |
4614 PUSH_FAILURE_POINT (0, 0, -2); | 4621 PUSH_FAILURE_POINT (0, 0, -2); |
4615 break; | 4622 break; |
4616 | 4623 |
4617 /* Have to succeed matching what follows at least n times. | 4624 /* Have to succeed matching what follows at least n times. |
4618 After that, handle like `on_failure_jump'. */ | 4625 After that, handle like `on_failure_jump'. */ |
4619 case succeed_n: | 4626 case succeed_n: |
4620 EXTRACT_NUMBER (mcnt, p + 2); | 4627 EXTRACT_NUMBER (mcnt, p + 2); |
4621 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); | 4628 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); |
4622 | 4629 |
4623 assert (mcnt >= 0); | 4630 assert (mcnt >= 0); |
4624 /* Originally, this is how many times we HAVE to succeed. */ | 4631 /* Originally, this is how many times we HAVE to succeed. */ |
4635 p[2] = (unsigned char) no_op; | 4642 p[2] = (unsigned char) no_op; |
4636 p[3] = (unsigned char) no_op; | 4643 p[3] = (unsigned char) no_op; |
4637 goto on_failure; | 4644 goto on_failure; |
4638 } | 4645 } |
4639 break; | 4646 break; |
4640 | 4647 |
4641 case jump_n: | 4648 case jump_n: |
4642 EXTRACT_NUMBER (mcnt, p + 2); | 4649 EXTRACT_NUMBER (mcnt, p + 2); |
4643 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); | 4650 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); |
4644 | 4651 |
4645 /* Originally, this is how many times we CAN jump. */ | 4652 /* Originally, this is how many times we CAN jump. */ |
4646 if (mcnt) | 4653 if (mcnt) |
4647 { | 4654 { |
4648 mcnt--; | 4655 mcnt--; |
4649 STORE_NUMBER (p + 2, mcnt); | 4656 STORE_NUMBER (p + 2, mcnt); |
4650 goto unconditional_jump; | 4657 goto unconditional_jump; |
4651 } | 4658 } |
4652 /* If don't have to jump any more, skip over the rest of command. */ | 4659 /* If don't have to jump any more, skip over the rest of command. */ |
4653 else | 4660 else |
4654 p += 4; | 4661 p += 4; |
4655 break; | 4662 break; |
4656 | 4663 |
4657 case set_number_at: | 4664 case set_number_at: |
4658 { | 4665 { |
4659 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); | 4666 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); |
4660 | 4667 |
4661 EXTRACT_NUMBER_AND_INCR (mcnt, p); | 4668 EXTRACT_NUMBER_AND_INCR (mcnt, p); |
4695 case before_dot: | 4702 case before_dot: |
4696 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); | 4703 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); |
4697 if (PTR_CHAR_POS ((unsigned char *) d) >= point) | 4704 if (PTR_CHAR_POS ((unsigned char *) d) >= point) |
4698 goto fail; | 4705 goto fail; |
4699 break; | 4706 break; |
4700 | 4707 |
4701 case at_dot: | 4708 case at_dot: |
4702 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); | 4709 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); |
4703 if (PTR_CHAR_POS ((unsigned char *) d) != point) | 4710 if (PTR_CHAR_POS ((unsigned char *) d) != point) |
4704 goto fail; | 4711 goto fail; |
4705 break; | 4712 break; |
4706 | 4713 |
4707 case after_dot: | 4714 case after_dot: |
4708 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); | 4715 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); |
4709 if (PTR_CHAR_POS ((unsigned char *) d) <= point) | 4716 if (PTR_CHAR_POS ((unsigned char *) d) <= point) |
4710 goto fail; | 4717 goto fail; |
4711 break; | 4718 break; |
4751 if (!WORDCHAR_P (d)) | 4758 if (!WORDCHAR_P (d)) |
4752 goto fail; | 4759 goto fail; |
4753 SET_REGS_MATCHED (); | 4760 SET_REGS_MATCHED (); |
4754 d++; | 4761 d++; |
4755 break; | 4762 break; |
4756 | 4763 |
4757 case notwordchar: | 4764 case notwordchar: |
4758 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); | 4765 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); |
4759 PREFETCH (); | 4766 PREFETCH (); |
4760 if (WORDCHAR_P (d)) | 4767 if (WORDCHAR_P (d)) |
4761 goto fail; | 4768 goto fail; |
4762 SET_REGS_MATCHED (); | 4769 SET_REGS_MATCHED (); |
4763 d++; | 4770 d++; |
4764 break; | 4771 break; |
4765 #endif /* not emacs */ | 4772 #endif /* not emacs */ |
4766 | 4773 |
4767 default: | 4774 default: |
4768 abort (); | 4775 abort (); |
4769 } | 4776 } |
4770 continue; /* Successfully executed one pattern command; keep going. */ | 4777 continue; /* Successfully executed one pattern command; keep going. */ |
4771 | 4778 |
4786 /* If we failed to the end of the pattern, don't examine *p. */ | 4793 /* If we failed to the end of the pattern, don't examine *p. */ |
4787 assert (p <= pend); | 4794 assert (p <= pend); |
4788 if (p < pend) | 4795 if (p < pend) |
4789 { | 4796 { |
4790 boolean is_a_jump_n = false; | 4797 boolean is_a_jump_n = false; |
4791 | 4798 |
4792 /* If failed to a backwards jump that's part of a repetition | 4799 /* If failed to a backwards jump that's part of a repetition |
4793 loop, need to pop this failure point and use the next one. */ | 4800 loop, need to pop this failure point and use the next one. */ |
4794 switch ((re_opcode_t) *p) | 4801 switch ((re_opcode_t) *p) |
4795 { | 4802 { |
4796 case jump_n: | 4803 case jump_n: |
4798 case maybe_pop_jump: | 4805 case maybe_pop_jump: |
4799 case pop_failure_jump: | 4806 case pop_failure_jump: |
4800 case jump: | 4807 case jump: |
4801 p1 = p + 1; | 4808 p1 = p + 1; |
4802 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 4809 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
4803 p1 += mcnt; | 4810 p1 += mcnt; |
4804 | 4811 |
4805 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) | 4812 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) |
4806 || (!is_a_jump_n | 4813 || (!is_a_jump_n |
4807 && (re_opcode_t) *p1 == on_failure_jump)) | 4814 && (re_opcode_t) *p1 == on_failure_jump)) |
4808 goto fail; | 4815 goto fail; |
4829 | 4836 |
4830 /* Subroutine definitions for re_match_2. */ | 4837 /* Subroutine definitions for re_match_2. */ |
4831 | 4838 |
4832 | 4839 |
4833 /* We are passed P pointing to a register number after a start_memory. | 4840 /* We are passed P pointing to a register number after a start_memory. |
4834 | 4841 |
4835 Return true if the pattern up to the corresponding stop_memory can | 4842 Return true if the pattern up to the corresponding stop_memory can |
4836 match the empty string, and false otherwise. | 4843 match the empty string, and false otherwise. |
4837 | 4844 |
4838 If we find the matching stop_memory, sets P to point to one past its number. | 4845 If we find the matching stop_memory, sets P to point to one past its number. |
4839 Otherwise, sets P to an undefined byte less than or equal to END. | 4846 Otherwise, sets P to an undefined byte less than or equal to END. |
4840 | 4847 |
4841 We don't handle duplicates properly (yet). */ | 4848 We don't handle duplicates properly (yet). */ |
4842 | 4849 |
4846 register_info_type *reg_info; | 4853 register_info_type *reg_info; |
4847 { | 4854 { |
4848 int mcnt; | 4855 int mcnt; |
4849 /* Point to after the args to the start_memory. */ | 4856 /* Point to after the args to the start_memory. */ |
4850 unsigned char *p1 = *p + 2; | 4857 unsigned char *p1 = *p + 2; |
4851 | 4858 |
4852 while (p1 < end) | 4859 while (p1 < end) |
4853 { | 4860 { |
4854 /* Skip over opcodes that can match nothing, and return true or | 4861 /* Skip over opcodes that can match nothing, and return true or |
4855 false, as appropriate, when we get to one that can't, or to the | 4862 false, as appropriate, when we get to one that can't, or to the |
4856 matching stop_memory. */ | 4863 matching stop_memory. */ |
4857 | 4864 |
4858 switch ((re_opcode_t) *p1) | 4865 switch ((re_opcode_t) *p1) |
4859 { | 4866 { |
4860 /* Could be either a loop or a series of alternatives. */ | 4867 /* Could be either a loop or a series of alternatives. */ |
4861 case on_failure_jump: | 4868 case on_failure_jump: |
4862 p1++; | 4869 p1++; |
4863 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 4870 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
4864 | 4871 |
4865 /* If the next operation is not a jump backwards in the | 4872 /* If the next operation is not a jump backwards in the |
4866 pattern. */ | 4873 pattern. */ |
4867 | 4874 |
4868 if (mcnt >= 0) | 4875 if (mcnt >= 0) |
4869 { | 4876 { |
4873 whereas the rest start with on_failure_jump and end | 4880 whereas the rest start with on_failure_jump and end |
4874 with a jump, e.g., here is the pattern for `a|b|c': | 4881 with a jump, e.g., here is the pattern for `a|b|c': |
4875 | 4882 |
4876 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 | 4883 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 |
4877 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 | 4884 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 |
4878 /exactn/1/c | 4885 /exactn/1/c |
4879 | 4886 |
4880 So, we have to first go through the first (n-1) | 4887 So, we have to first go through the first (n-1) |
4881 alternatives and then deal with the last one separately. */ | 4888 alternatives and then deal with the last one separately. */ |
4882 | 4889 |
4883 | 4890 |
4889 { | 4896 { |
4890 /* `mcnt' holds how many bytes long the alternative | 4897 /* `mcnt' holds how many bytes long the alternative |
4891 is, including the ending `jump_past_alt' and | 4898 is, including the ending `jump_past_alt' and |
4892 its number. */ | 4899 its number. */ |
4893 | 4900 |
4894 if (!alt_match_null_string_p (p1, p1 + mcnt - 3, | 4901 if (!alt_match_null_string_p (p1, p1 + mcnt - 3, |
4895 reg_info)) | 4902 reg_info)) |
4896 return false; | 4903 return false; |
4897 | 4904 |
4898 /* Move to right after this alternative, including the | 4905 /* Move to right after this alternative, including the |
4899 jump_past_alt. */ | 4906 jump_past_alt. */ |
4900 p1 += mcnt; | 4907 p1 += mcnt; |
4901 | 4908 |
4902 /* Break if it's the beginning of an n-th alternative | 4909 /* Break if it's the beginning of an n-th alternative |
4903 that doesn't begin with an on_failure_jump. */ | 4910 that doesn't begin with an on_failure_jump. */ |
4904 if ((re_opcode_t) *p1 != on_failure_jump) | 4911 if ((re_opcode_t) *p1 != on_failure_jump) |
4905 break; | 4912 break; |
4906 | 4913 |
4907 /* Still have to check that it's not an n-th | 4914 /* Still have to check that it's not an n-th |
4908 alternative that starts with an on_failure_jump. */ | 4915 alternative that starts with an on_failure_jump. */ |
4909 p1++; | 4916 p1++; |
4910 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 4917 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
4911 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) | 4918 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) |
4926 | 4933 |
4927 p1 += mcnt; /* Get past the n-th alternative. */ | 4934 p1 += mcnt; /* Get past the n-th alternative. */ |
4928 } /* if mcnt > 0 */ | 4935 } /* if mcnt > 0 */ |
4929 break; | 4936 break; |
4930 | 4937 |
4931 | 4938 |
4932 case stop_memory: | 4939 case stop_memory: |
4933 assert (p1[1] == **p); | 4940 assert (p1[1] == **p); |
4934 *p = p1 + 2; | 4941 *p = p1 + 2; |
4935 return true; | 4942 return true; |
4936 | 4943 |
4937 | 4944 |
4938 default: | 4945 default: |
4939 if (!common_op_match_null_string_p (&p1, end, reg_info)) | 4946 if (!common_op_match_null_string_p (&p1, end, reg_info)) |
4940 return false; | 4947 return false; |
4941 } | 4948 } |
4942 } /* while p1 < end */ | 4949 } /* while p1 < end */ |
4943 | 4950 |
4946 | 4953 |
4947 | 4954 |
4948 /* Similar to group_match_null_string_p, but doesn't deal with alternatives: | 4955 /* Similar to group_match_null_string_p, but doesn't deal with alternatives: |
4949 It expects P to be the first byte of a single alternative and END one | 4956 It expects P to be the first byte of a single alternative and END one |
4950 byte past the last. The alternative can contain groups. */ | 4957 byte past the last. The alternative can contain groups. */ |
4951 | 4958 |
4952 static boolean | 4959 static boolean |
4953 alt_match_null_string_p (p, end, reg_info) | 4960 alt_match_null_string_p (p, end, reg_info) |
4954 unsigned char *p, *end; | 4961 unsigned char *p, *end; |
4955 register_info_type *reg_info; | 4962 register_info_type *reg_info; |
4956 { | 4963 { |
4957 int mcnt; | 4964 int mcnt; |
4958 unsigned char *p1 = p; | 4965 unsigned char *p1 = p; |
4959 | 4966 |
4960 while (p1 < end) | 4967 while (p1 < end) |
4961 { | 4968 { |
4962 /* Skip over opcodes that can match nothing, and break when we get | 4969 /* Skip over opcodes that can match nothing, and break when we get |
4963 to one that can't. */ | 4970 to one that can't. */ |
4964 | 4971 |
4965 switch ((re_opcode_t) *p1) | 4972 switch ((re_opcode_t) *p1) |
4966 { | 4973 { |
4967 /* It's a loop. */ | 4974 /* It's a loop. */ |
4968 case on_failure_jump: | 4975 case on_failure_jump: |
4969 p1++; | 4976 p1++; |
4970 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 4977 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
4971 p1 += mcnt; | 4978 p1 += mcnt; |
4972 break; | 4979 break; |
4973 | 4980 |
4974 default: | 4981 default: |
4975 if (!common_op_match_null_string_p (&p1, end, reg_info)) | 4982 if (!common_op_match_null_string_p (&p1, end, reg_info)) |
4976 return false; | 4983 return false; |
4977 } | 4984 } |
4978 } /* while p1 < end */ | 4985 } /* while p1 < end */ |
4979 | 4986 |
4980 return true; | 4987 return true; |
4981 } /* alt_match_null_string_p */ | 4988 } /* alt_match_null_string_p */ |
4982 | 4989 |
4983 | 4990 |
4984 /* Deals with the ops common to group_match_null_string_p and | 4991 /* Deals with the ops common to group_match_null_string_p and |
4985 alt_match_null_string_p. | 4992 alt_match_null_string_p. |
4986 | 4993 |
4987 Sets P to one after the op and its arguments, if any. */ | 4994 Sets P to one after the op and its arguments, if any. */ |
4988 | 4995 |
4989 static boolean | 4996 static boolean |
4990 common_op_match_null_string_p (p, end, reg_info) | 4997 common_op_match_null_string_p (p, end, reg_info) |
4991 unsigned char **p, *end; | 4998 unsigned char **p, *end; |
5016 | 5023 |
5017 case start_memory: | 5024 case start_memory: |
5018 reg_no = *p1; | 5025 reg_no = *p1; |
5019 assert (reg_no > 0 && reg_no <= MAX_REGNUM); | 5026 assert (reg_no > 0 && reg_no <= MAX_REGNUM); |
5020 ret = group_match_null_string_p (&p1, end, reg_info); | 5027 ret = group_match_null_string_p (&p1, end, reg_info); |
5021 | 5028 |
5022 /* Have to set this here in case we're checking a group which | 5029 /* Have to set this here in case we're checking a group which |
5023 contains a group and a back reference to it. */ | 5030 contains a group and a back reference to it. */ |
5024 | 5031 |
5025 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) | 5032 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) |
5026 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; | 5033 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; |
5027 | 5034 |
5028 if (!ret) | 5035 if (!ret) |
5029 return false; | 5036 return false; |
5030 break; | 5037 break; |
5031 | 5038 |
5032 /* If this is an optimized succeed_n for zero times, make the jump. */ | 5039 /* If this is an optimized succeed_n for zero times, make the jump. */ |
5033 case jump: | 5040 case jump: |
5034 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 5041 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
5035 if (mcnt >= 0) | 5042 if (mcnt >= 0) |
5036 p1 += mcnt; | 5043 p1 += mcnt; |
5038 return false; | 5045 return false; |
5039 break; | 5046 break; |
5040 | 5047 |
5041 case succeed_n: | 5048 case succeed_n: |
5042 /* Get to the number of times to succeed. */ | 5049 /* Get to the number of times to succeed. */ |
5043 p1 += 2; | 5050 p1 += 2; |
5044 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 5051 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
5045 | 5052 |
5046 if (mcnt == 0) | 5053 if (mcnt == 0) |
5047 { | 5054 { |
5048 p1 -= 4; | 5055 p1 -= 4; |
5051 } | 5058 } |
5052 else | 5059 else |
5053 return false; | 5060 return false; |
5054 break; | 5061 break; |
5055 | 5062 |
5056 case duplicate: | 5063 case duplicate: |
5057 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) | 5064 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) |
5058 return false; | 5065 return false; |
5059 break; | 5066 break; |
5060 | 5067 |
5061 case set_number_at: | 5068 case set_number_at: |
5071 } /* common_op_match_null_string_p */ | 5078 } /* common_op_match_null_string_p */ |
5072 | 5079 |
5073 | 5080 |
5074 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN | 5081 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN |
5075 bytes; nonzero otherwise. */ | 5082 bytes; nonzero otherwise. */ |
5076 | 5083 |
5077 static int | 5084 static int |
5078 bcmp_translate (s1, s2, len, translate) | 5085 bcmp_translate (s1, s2, len, translate) |
5079 unsigned char *s1, *s2; | 5086 unsigned char *s1, *s2; |
5080 register int len; | 5087 register int len; |
5081 RE_TRANSLATE_TYPE translate; | 5088 RE_TRANSLATE_TYPE translate; |
5092 /* Entry points for GNU code. */ | 5099 /* Entry points for GNU code. */ |
5093 | 5100 |
5094 /* re_compile_pattern is the GNU regular expression compiler: it | 5101 /* re_compile_pattern is the GNU regular expression compiler: it |
5095 compiles PATTERN (of length SIZE) and puts the result in BUFP. | 5102 compiles PATTERN (of length SIZE) and puts the result in BUFP. |
5096 Returns 0 if the pattern was valid, otherwise an error string. | 5103 Returns 0 if the pattern was valid, otherwise an error string. |
5097 | 5104 |
5098 Assumes the `allocated' (and perhaps `buffer') and `translate' fields | 5105 Assumes the `allocated' (and perhaps `buffer') and `translate' fields |
5099 are set in BUFP on entry. | 5106 are set in BUFP on entry. |
5100 | 5107 |
5101 We call regex_compile to do the actual compilation. */ | 5108 We call regex_compile to do the actual compilation. */ |
5102 | 5109 |
5103 const char * | 5110 const char * |
5104 re_compile_pattern (pattern, length, bufp) | 5111 re_compile_pattern (pattern, length, bufp) |
5105 const char *pattern; | 5112 const char *pattern; |
5106 int length; | 5113 int length; |
5107 struct re_pattern_buffer *bufp; | 5114 struct re_pattern_buffer *bufp; |
5108 { | 5115 { |
5109 reg_errcode_t ret; | 5116 reg_errcode_t ret; |
5110 | 5117 |
5111 /* GNU code is written to assume at least RE_NREGS registers will be set | 5118 /* GNU code is written to assume at least RE_NREGS registers will be set |
5112 (and at least one extra will be -1). */ | 5119 (and at least one extra will be -1). */ |
5113 bufp->regs_allocated = REGS_UNALLOCATED; | 5120 bufp->regs_allocated = REGS_UNALLOCATED; |
5114 | 5121 |
5115 /* And GNU code determines whether or not to get register information | 5122 /* And GNU code determines whether or not to get register information |
5116 by passing null for the REGS argument to re_match, etc., not by | 5123 by passing null for the REGS argument to re_match, etc., not by |
5117 setting no_sub. */ | 5124 setting no_sub. */ |
5118 bufp->no_sub = 0; | 5125 bufp->no_sub = 0; |
5119 | 5126 |
5120 /* Match anchors at newline. */ | 5127 /* Match anchors at newline. */ |
5121 bufp->newline_anchor = 1; | 5128 bufp->newline_anchor = 1; |
5122 | 5129 |
5123 ret = regex_compile (pattern, length, re_syntax_options, bufp); | 5130 ret = regex_compile (pattern, length, re_syntax_options, bufp); |
5124 | 5131 |
5125 if (!ret) | 5132 if (!ret) |
5126 return NULL; | 5133 return NULL; |
5127 return gettext (re_error_msgid[(int) ret]); | 5134 return gettext (re_error_msgid[(int) ret]); |
5128 } | 5135 } |
5129 | 5136 |
5130 /* Entry points compatible with 4.2 BSD regex library. We don't define | 5137 /* Entry points compatible with 4.2 BSD regex library. We don't define |
5131 them unless specifically requested. */ | 5138 them unless specifically requested. */ |
5132 | 5139 |
5133 #ifdef _REGEX_RE_COMP | 5140 #ifdef _REGEX_RE_COMP |
5138 char * | 5145 char * |
5139 re_comp (s) | 5146 re_comp (s) |
5140 const char *s; | 5147 const char *s; |
5141 { | 5148 { |
5142 reg_errcode_t ret; | 5149 reg_errcode_t ret; |
5143 | 5150 |
5144 if (!s) | 5151 if (!s) |
5145 { | 5152 { |
5146 if (!re_comp_buf.buffer) | 5153 if (!re_comp_buf.buffer) |
5147 return gettext ("No previous regular expression"); | 5154 return gettext ("No previous regular expression"); |
5148 return 0; | 5155 return 0; |
5165 | 5172 |
5166 /* Match anchors at newlines. */ | 5173 /* Match anchors at newlines. */ |
5167 re_comp_buf.newline_anchor = 1; | 5174 re_comp_buf.newline_anchor = 1; |
5168 | 5175 |
5169 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); | 5176 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); |
5170 | 5177 |
5171 if (!ret) | 5178 if (!ret) |
5172 return NULL; | 5179 return NULL; |
5173 | 5180 |
5174 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ | 5181 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ |
5175 return (char *) gettext (re_error_msgid[(int) ret]); | 5182 return (char *) gettext (re_error_msgid[(int) ret]); |
5225 the return codes and their meanings.) */ | 5232 the return codes and their meanings.) */ |
5226 | 5233 |
5227 int | 5234 int |
5228 regcomp (preg, pattern, cflags) | 5235 regcomp (preg, pattern, cflags) |
5229 regex_t *preg; | 5236 regex_t *preg; |
5230 const char *pattern; | 5237 const char *pattern; |
5231 int cflags; | 5238 int cflags; |
5232 { | 5239 { |
5233 reg_errcode_t ret; | 5240 reg_errcode_t ret; |
5234 unsigned syntax | 5241 unsigned syntax |
5235 = (cflags & REG_EXTENDED) ? | 5242 = (cflags & REG_EXTENDED) ? |
5237 | 5244 |
5238 /* regex_compile will allocate the space for the compiled pattern. */ | 5245 /* regex_compile will allocate the space for the compiled pattern. */ |
5239 preg->buffer = 0; | 5246 preg->buffer = 0; |
5240 preg->allocated = 0; | 5247 preg->allocated = 0; |
5241 preg->used = 0; | 5248 preg->used = 0; |
5242 | 5249 |
5243 /* Don't bother to use a fastmap when searching. This simplifies the | 5250 /* Don't bother to use a fastmap when searching. This simplifies the |
5244 REG_NEWLINE case: if we used a fastmap, we'd have to put all the | 5251 REG_NEWLINE case: if we used a fastmap, we'd have to put all the |
5245 characters after newlines into the fastmap. This way, we just try | 5252 characters after newlines into the fastmap. This way, we just try |
5246 every character. */ | 5253 every character. */ |
5247 preg->fastmap = 0; | 5254 preg->fastmap = 0; |
5248 | 5255 |
5249 if (cflags & REG_ICASE) | 5256 if (cflags & REG_ICASE) |
5250 { | 5257 { |
5251 unsigned i; | 5258 unsigned i; |
5252 | 5259 |
5253 preg->translate | 5260 preg->translate |
5254 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE | 5261 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE |
5255 * sizeof (*(RE_TRANSLATE_TYPE)0)); | 5262 * sizeof (*(RE_TRANSLATE_TYPE)0)); |
5256 if (preg->translate == NULL) | 5263 if (preg->translate == NULL) |
5257 return (int) REG_ESPACE; | 5264 return (int) REG_ESPACE; |
5274 else | 5281 else |
5275 preg->newline_anchor = 0; | 5282 preg->newline_anchor = 0; |
5276 | 5283 |
5277 preg->no_sub = !!(cflags & REG_NOSUB); | 5284 preg->no_sub = !!(cflags & REG_NOSUB); |
5278 | 5285 |
5279 /* POSIX says a null character in the pattern terminates it, so we | 5286 /* POSIX says a null character in the pattern terminates it, so we |
5280 can use strlen here in compiling the pattern. */ | 5287 can use strlen here in compiling the pattern. */ |
5281 ret = regex_compile (pattern, strlen (pattern), syntax, preg); | 5288 ret = regex_compile (pattern, strlen (pattern), syntax, preg); |
5282 | 5289 |
5283 /* POSIX doesn't distinguish between an unmatched open-group and an | 5290 /* POSIX doesn't distinguish between an unmatched open-group and an |
5284 unmatched close-group: both are REG_EPAREN. */ | 5291 unmatched close-group: both are REG_EPAREN. */ |
5285 if (ret == REG_ERPAREN) ret = REG_EPAREN; | 5292 if (ret == REG_ERPAREN) ret = REG_EPAREN; |
5286 | 5293 |
5287 return (int) ret; | 5294 return (int) ret; |
5288 } | 5295 } |
5289 | 5296 |
5290 | 5297 |
5291 /* regexec searches for a given pattern, specified by PREG, in the | 5298 /* regexec searches for a given pattern, specified by PREG, in the |
5292 string STRING. | 5299 string STRING. |
5293 | 5300 |
5294 If NMATCH is zero or REG_NOSUB was set in the cflags argument to | 5301 If NMATCH is zero or REG_NOSUB was set in the cflags argument to |
5295 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at | 5302 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at |
5296 least NMATCH elements, and we set them to the offsets of the | 5303 least NMATCH elements, and we set them to the offsets of the |
5297 corresponding matched substrings. | 5304 corresponding matched substrings. |
5298 | 5305 |
5299 EFLAGS specifies `execution flags' which affect matching: if | 5306 EFLAGS specifies `execution flags' which affect matching: if |
5300 REG_NOTBOL is set, then ^ does not match at the beginning of the | 5307 REG_NOTBOL is set, then ^ does not match at the beginning of the |
5301 string; if REG_NOTEOL is set, then $ does not match at the end. | 5308 string; if REG_NOTEOL is set, then $ does not match at the end. |
5302 | 5309 |
5303 We return 0 if we find a match and REG_NOMATCH if not. */ | 5310 We return 0 if we find a match and REG_NOMATCH if not. */ |
5304 | 5311 |
5305 int | 5312 int |
5306 regexec (preg, string, nmatch, pmatch, eflags) | 5313 regexec (preg, string, nmatch, pmatch, eflags) |
5307 const regex_t *preg; | 5314 const regex_t *preg; |
5308 const char *string; | 5315 const char *string; |
5309 size_t nmatch; | 5316 size_t nmatch; |
5310 regmatch_t pmatch[]; | 5317 regmatch_t pmatch[]; |
5311 int eflags; | 5318 int eflags; |
5312 { | 5319 { |
5313 int ret; | 5320 int ret; |
5314 struct re_registers regs; | 5321 struct re_registers regs; |
5315 regex_t private_preg; | 5322 regex_t private_preg; |
5316 int len = strlen (string); | 5323 int len = strlen (string); |
5317 boolean want_reg_info = !preg->no_sub && nmatch > 0; | 5324 boolean want_reg_info = !preg->no_sub && nmatch > 0; |
5318 | 5325 |
5319 private_preg = *preg; | 5326 private_preg = *preg; |
5320 | 5327 |
5321 private_preg.not_bol = !!(eflags & REG_NOTBOL); | 5328 private_preg.not_bol = !!(eflags & REG_NOTBOL); |
5322 private_preg.not_eol = !!(eflags & REG_NOTEOL); | 5329 private_preg.not_eol = !!(eflags & REG_NOTEOL); |
5323 | 5330 |
5324 /* The user has told us exactly how many registers to return | 5331 /* The user has told us exactly how many registers to return |
5325 information about, via `nmatch'. We have to pass that on to the | 5332 information about, via `nmatch'. We have to pass that on to the |
5326 matching routines. */ | 5333 matching routines. */ |
5327 private_preg.regs_allocated = REGS_FIXED; | 5334 private_preg.regs_allocated = REGS_FIXED; |
5328 | 5335 |
5329 if (want_reg_info) | 5336 if (want_reg_info) |
5330 { | 5337 { |
5331 regs.num_regs = nmatch; | 5338 regs.num_regs = nmatch; |
5332 regs.start = TALLOC (nmatch, regoff_t); | 5339 regs.start = TALLOC (nmatch, regoff_t); |
5333 regs.end = TALLOC (nmatch, regoff_t); | 5340 regs.end = TALLOC (nmatch, regoff_t); |
5337 | 5344 |
5338 /* Perform the searching operation. */ | 5345 /* Perform the searching operation. */ |
5339 ret = re_search (&private_preg, string, len, | 5346 ret = re_search (&private_preg, string, len, |
5340 /* start: */ 0, /* range: */ len, | 5347 /* start: */ 0, /* range: */ len, |
5341 want_reg_info ? ®s : (struct re_registers *) 0); | 5348 want_reg_info ? ®s : (struct re_registers *) 0); |
5342 | 5349 |
5343 /* Copy the register information to the POSIX structure. */ | 5350 /* Copy the register information to the POSIX structure. */ |
5344 if (want_reg_info) | 5351 if (want_reg_info) |
5345 { | 5352 { |
5346 if (ret >= 0) | 5353 if (ret >= 0) |
5347 { | 5354 { |
5377 const char *msg; | 5384 const char *msg; |
5378 size_t msg_size; | 5385 size_t msg_size; |
5379 | 5386 |
5380 if (errcode < 0 | 5387 if (errcode < 0 |
5381 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0]))) | 5388 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0]))) |
5382 /* Only error codes returned by the rest of the code should be passed | 5389 /* Only error codes returned by the rest of the code should be passed |
5383 to this routine. If we are given anything else, or if other regex | 5390 to this routine. If we are given anything else, or if other regex |
5384 code generates an invalid error code, then the program has a bug. | 5391 code generates an invalid error code, then the program has a bug. |
5385 Dump core so we can fix it. */ | 5392 Dump core so we can fix it. */ |
5386 abort (); | 5393 abort (); |
5387 | 5394 |
5388 msg = gettext (re_error_msgid[errcode]); | 5395 msg = gettext (re_error_msgid[errcode]); |
5389 | 5396 |
5390 msg_size = strlen (msg) + 1; /* Includes the null. */ | 5397 msg_size = strlen (msg) + 1; /* Includes the null. */ |
5391 | 5398 |
5392 if (errbuf_size != 0) | 5399 if (errbuf_size != 0) |
5393 { | 5400 { |
5394 if (msg_size > errbuf_size) | 5401 if (msg_size > errbuf_size) |
5395 { | 5402 { |
5396 strncpy (errbuf, msg, errbuf_size - 1); | 5403 strncpy (errbuf, msg, errbuf_size - 1); |
5411 regex_t *preg; | 5418 regex_t *preg; |
5412 { | 5419 { |
5413 if (preg->buffer != NULL) | 5420 if (preg->buffer != NULL) |
5414 free (preg->buffer); | 5421 free (preg->buffer); |
5415 preg->buffer = NULL; | 5422 preg->buffer = NULL; |
5416 | 5423 |
5417 preg->allocated = 0; | 5424 preg->allocated = 0; |
5418 preg->used = 0; | 5425 preg->used = 0; |
5419 | 5426 |
5420 if (preg->fastmap != NULL) | 5427 if (preg->fastmap != NULL) |
5421 free (preg->fastmap); | 5428 free (preg->fastmap); |