diff src/regex.c @ 90054:f2ebccfa87d4

Revision: miles@gnu.org--gnu-2004/emacs--unicode--0--patch-74 Merge from emacs--cvs-trunk--0 Patches applied: * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-709 Update from CVS: src/indent.c (Fvertical_motion): Fix last change. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-710 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-715 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-716 Merge from gnus--rel--5.10 * miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-74 Update from CVS
author Miles Bader <miles@gnu.org>
date Wed, 08 Dec 2004 05:02:30 +0000
parents 0fe073a08cef 5f1c0193e984
children eac554634bfa
line wrap: on
line diff
--- a/src/regex.c	Mon Dec 06 12:38:25 2004 +0000
+++ b/src/regex.c	Wed Dec 08 05:02:30 2004 +0000
@@ -1275,7 +1275,7 @@
 
 reg_syntax_t
 re_set_syntax (syntax)
-    reg_syntax_t syntax;
+     reg_syntax_t syntax;
 {
   reg_syntax_t ret = re_syntax_options;
 
@@ -1283,6 +1283,17 @@
   return ret;
 }
 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
+
+/* Regexp to use to replace spaces, or NULL meaning don't.  */
+static re_char *whitespace_regexp;
+
+void
+re_set_whitespace_regexp (regexp)
+     re_char *regexp;
+{
+  whitespace_regexp = regexp;
+}
+WEAK_ALIAS (__re_set_syntax, re_set_syntax)
 
 /* This table gives an error message for each of the error codes listed
    in regex.h.  Obviously the order here has to be same as there.
@@ -1993,28 +2004,27 @@
 
 /* Get the next unsigned number in the uncompiled pattern.  */
 #define GET_UNSIGNED_NUMBER(num)					\
- do { if (p != pend)							\
-     {									\
-       PATFETCH (c);							\
-       if (c == ' ')							\
-	 FREE_STACK_RETURN (REG_BADBR);					\
-       while ('0' <= c && c <= '9')					\
-	 {								\
-           int prev;							\
-	   if (num < 0)							\
-	     num = 0;							\
-	   prev = num;							\
-	   num = num * 10 + c - '0';					\
-	   if (num / 10 != prev)					\
-	     FREE_STACK_RETURN (REG_BADBR);				\
-	   if (p == pend)						\
-	     break;							\
-	   PATFETCH (c);						\
-	 }								\
-       if (c == ' ')							\
-	 FREE_STACK_RETURN (REG_BADBR);					\
-       }								\
-    } while (0)
+  do {									\
+    if (p == pend)							\
+      FREE_STACK_RETURN (REG_EBRACE);					\
+    else								\
+      {									\
+	PATFETCH (c);							\
+	while ('0' <= c && c <= '9')					\
+	  {								\
+	    int prev;							\
+	    if (num < 0)						\
+	      num = 0;							\
+	    prev = num;							\
+	    num = num * 10 + c - '0';					\
+	    if (num / 10 != prev)					\
+	      FREE_STACK_RETURN (REG_BADBR);				\
+	    if (p == pend)						\
+	      FREE_STACK_RETURN (REG_EBRACE);				\
+	    PATFETCH (c);						\
+	  }								\
+      }									\
+  } while (0)
 
 #if ! WIDE_CHAR_SUPPORT
 
@@ -2495,6 +2505,15 @@
   /* If a target of matching can contain multibyte characters.  */
   const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
 
+  /* Nonzero if we have pushed down into a subpattern.  */
+  int in_subpattern = 0;
+
+  /* These hold the values of p, pattern, and pend from the main
+     pattern when we have pushed into a subpattern.  */
+  re_char *main_p;
+  re_char *main_pattern;
+  re_char *main_pend;
+
 #ifdef DEBUG
   debug++;
   DEBUG_PRINT1 ("\nCompiling pattern: ");
@@ -2557,12 +2576,61 @@
   begalt = b = bufp->buffer;
 
   /* Loop through the uncompiled pattern until we're at the end.  */
-  while (p != pend)
+  while (1)
     {
+      if (p == pend)
+	{
+	  /* If this is the end of an included regexp,
+	     pop back to the main regexp and try again.  */
+	  if (in_subpattern)
+	    {
+	      in_subpattern = 0;
+	      pattern = main_pattern;
+	      p = main_p;
+	      pend = main_pend;
+	      continue;
+	    }
+	  /* If this is the end of the main regexp, we are done.  */
+	  break;
+	}
+
       PATFETCH (c);
 
       switch (c)
 	{
+	case ' ':
+	  {
+	    re_char *p1 = p;
+
+	    /* If there's no special whitespace regexp, treat
+	       spaces normally.  And don't try to do this recursively.  */
+	    if (!whitespace_regexp || in_subpattern)
+	      goto normal_char;
+
+	    /* Peek past following spaces.  */
+	    while (p1 != pend)
+	      {
+		if (*p1 != ' ')
+		  break;
+		p1++;
+	      }
+	    /* If the spaces are followed by a repetition op,
+	       treat them normally.  */
+	    if (p1 != pend
+		&& (*p1 == '*' || *p1 == '+' || *p1 == '?'
+		    || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
+	      goto normal_char;
+
+	    /* Replace the spaces with the whitespace regexp.  */
+	    in_subpattern = 1;
+	    main_p = p1;
+	    main_pend = pend;
+	    main_pattern = pattern;
+	    p = pattern = whitespace_regexp;
+	    pend = p + strlen (p);
+	    break;
+	  }    
+
 	case '^':
 	  {
 	    if (   /* If at start of pattern, it's an operator.	 */
@@ -3231,9 +3299,6 @@
 
 		beg_interval = p;
 
-		if (p == pend)
-		  FREE_STACK_RETURN (REG_EBRACE);
-
 		GET_UNSIGNED_NUMBER (lower_bound);
 
 		if (c == ',')
@@ -3250,7 +3315,8 @@
 		  {
 		    if (c != '\\')
 		      FREE_STACK_RETURN (REG_BADBR);
-
+		    if (p == pend)
+		      FREE_STACK_RETURN (REG_EESCAPE);
 		    PATFETCH (c);
 		  }