diff src/regex.c @ 89943:4c90ffeb71c5

Revision: miles@gnu.org--gnu-2004/emacs--unicode--0--patch-15 Merge from emacs--cvs-trunk--0 Patches applied: * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-218 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-220 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-221 Restore deleted tagline in etc/TUTORIAL.ru * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-222 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-228 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-229 Remove TeX output files from the archive * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-230 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-247 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-248 src/lisp.h (CYCLE_CHECK): Macro moved from xfaces.c * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-249 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-256 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-258 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-263 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-264 Update from CVS: lispref/display.texi: emacs -> Emacs. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-265 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-274 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-275 Update from CVS: man/makefile.w32-in: Revert last change * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-276 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-295 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-296 Allow restarting an existing debugger session that's exited * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-297 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-299 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-300 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-327 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-328 Update from CVS: src/.gdbinit (xsymbol): Fix last change. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-329 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-344 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-345 Tweak source regexps so that building in place won't cause problems * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-346 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-351 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-352 Update from CVS: lisp/flymake.el: New file. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-353 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-361 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-362 Support " [...]" style defaults in minibuffer-electric-default-mode * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-363 (read-number): Use canonical format for default in prompt. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-364 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-367 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-368 Improve display-supports-face-attributes-p on non-ttys * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-369 Rewrite face-differs-from-default-p * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-370 Move `display-supports-face-attributes-p' entirely into C code * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-371 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-372 Simplify face-differs-from-default-p; don't consider :stipple. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-373 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-374 (tty_supports_face_attributes_p): Ensure attributes differ from default * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-375 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-376 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-377 (Fdisplay_supports_face_attributes_p): Work around bootstrapping problem * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-378 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-380 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-381 Face merging cleanups * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-382 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-384 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-385 src/xfaces.c (push_named_merge_point): Return 0 if a cycle is detected * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-386 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-395 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-396 Tweak arch tagging to make build/install-in-place less annoying * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-397 Work around vc-arch problems when building eshell * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-398 Tweak permissions * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-399 Tweak directory permissions * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-400 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-401 More build-in-place tweaking of arch tagging * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-402 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-403 Yet more build-in-place tweaking of arch tagging * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-404 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-409 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-410 Make sure image types are initialized for lookup too * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-411 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-416 Update from CVS
author Miles Bader <miles@gnu.org>
date Mon, 28 Jun 2004 07:56:49 +0000
parents 51677cb10ced 10b16f7f1a51
children 0fe073a08cef
line wrap: on
line diff
--- a/src/regex.c	Sat May 29 02:17:09 2004 +0000
+++ b/src/regex.c	Mon Jun 28 07:56:49 2004 +0000
@@ -2,7 +2,7 @@
    0.12.  (Implements POSIX draft P1003.2/D11.2, except for some of the
    internationalization features.)
 
-   Copyright (C) 1993,94,95,96,97,98,99,2000 Free Software Foundation, Inc.
+   Copyright (C) 1993,94,95,96,97,98,99,2000,04  Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -233,7 +233,7 @@
 /* Define the syntax stuff for \<, \>, etc.  */
 
 /* Sword must be nonzero for the wordchar pattern commands in re_match_2.  */
-enum syntaxcode { Swhitespace = 0, Sword = 1 };
+enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
 
 # ifdef SWITCH_ENUM_BUG
 #  define SWITCH_ENUM_CAST(x) ((int)(x))
@@ -423,7 +423,7 @@
      if (ISALNUM (c))
 	re_syntax_table[c] = Sword;
 
-   re_syntax_table['_'] = Sword;
+   re_syntax_table['_'] = Ssymbol;
 
    done = 1;
 }
@@ -680,6 +680,9 @@
   wordbound,	/* Succeeds if at a word boundary.  */
   notwordbound,	/* Succeeds if not at a word boundary.	*/
 
+  symbeg,       /* Succeeds if at symbol beginning.  */
+  symend,       /* Succeeds if at symbol end.  */
+
 	/* Matches any character whose syntax is specified.  Followed by
 	   a byte which contains a syntax code, e.g., Sword.  */
   syntaxspec,
@@ -1118,6 +1121,15 @@
 
 	case wordend:
 	  fprintf (stderr, "/wordend");
+	  break;
+
+	case symbeg:
+	  fprintf (stderr, "/symbeg");
+	  break;
+
+	case symend:
+	  fprintf (stderr, "/symend");
+	  break;
 
 	case syntaxspec:
 	  fprintf (stderr, "/syntaxspec");
@@ -2003,41 +2015,10 @@
        }								\
     } while (0)
 
-#if WIDE_CHAR_SUPPORT
-/* The GNU C library provides support for user-defined character classes
-   and the functions from ISO C amendement 1.  */
-# ifdef CHARCLASS_NAME_MAX
-#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
-# else
-/* This shouldn't happen but some implementation might still have this
-   problem.  Use a reasonable default value.  */
-#  define CHAR_CLASS_MAX_LENGTH 256
-# endif
-typedef wctype_t re_wctype_t;
-typedef wchar_t re_wchar_t;
-# define re_wctype wctype
-# define re_iswctype iswctype
-# define re_wctype_to_bit(cc) 0
-#else
-# define CHAR_CLASS_MAX_LENGTH  9 /* Namely, `multibyte'.  */
-# define btowc(c) c
-
-/* Character classes.  */
-typedef enum { RECC_ERROR = 0,
-	       RECC_ALNUM, RECC_ALPHA, RECC_WORD,
-	       RECC_GRAPH, RECC_PRINT,
-	       RECC_LOWER, RECC_UPPER,
-	       RECC_PUNCT, RECC_CNTRL,
-	       RECC_DIGIT, RECC_XDIGIT,
-	       RECC_BLANK, RECC_SPACE,
-	       RECC_MULTIBYTE, RECC_NONASCII,
-	       RECC_ASCII, RECC_UNIBYTE
-} re_wctype_t;
-
-typedef int re_wchar_t;
+#if ! WIDE_CHAR_SUPPORT
 
 /* Map a string to the char class it names (if any).  */
-static re_wctype_t
+re_wctype_t
 re_wctype (str)
      re_char *str;
 {
@@ -2063,7 +2044,7 @@
 }
 
 /* True iff CH is in the char class CC.  */
-static boolean
+boolean
 re_iswctype (ch, cc)
      int ch;
      re_wctype_t cc;
@@ -3464,6 +3445,19 @@
 	      BUF_PUSH (wordend);
 	      break;
 
+	    case '_':
+	      if (syntax & RE_NO_GNU_OPS)
+		goto normal_char;
+              laststart = b;
+              PATFETCH (c);
+              if (c == '<')
+                BUF_PUSH (symbeg);
+              else if (c == '>')
+                BUF_PUSH (symend);
+              else
+                FREE_STACK_RETURN (REG_BADPAT);
+              break;
+
 	    case 'b':
 	      if (syntax & RE_NO_GNU_OPS)
 		goto normal_char;
@@ -3980,6 +3974,8 @@
 	case notwordbound:
 	case wordbeg:
 	case wordend:
+	case symbeg:
+	case symend:
 	  continue;
 
 
@@ -4768,14 +4764,20 @@
       break;
 
     case wordend:
-    case notsyntaxspec:
+      return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
+    case symend:
       return ((re_opcode_t) *p1 == syntaxspec
-	      && p1[1] == (op2 == wordend ? Sword : p2[1]));
+              && (p1[1] == Ssymbol || p1[1] == Sword));
+    case notsyntaxspec:
+      return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
 
     case wordbeg:
-    case syntaxspec:
+      return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
+    case symbeg:
       return ((re_opcode_t) *p1 == notsyntaxspec
-	      && p1[1] == (op2 == wordend ? Sword : p2[1]));
+              && (p1[1] == Ssymbol || p1[1] == Sword));
+    case syntaxspec:
+      return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
 
     case wordbound:
       return (((re_opcode_t) *p1 == notsyntaxspec
@@ -5952,6 +5954,92 @@
 	    }
 	  break;
 
+	case symbeg:
+	  DEBUG_PRINT1 ("EXECUTING symbeg.\n");
+
+	  /* We FAIL in one of the following cases: */
+
+	  /* Case 1: D is at the end of string.	 */
+	  if (AT_STRINGS_END (d))
+	    goto fail;
+	  else
+	    {
+	      /* C1 is the character before D, S1 is the syntax of C1, C2
+		 is the character at D, and S2 is the syntax of C2.  */
+	      re_wchar_t c1, c2;
+	      int s1, s2;
+#ifdef emacs
+	      int offset = PTR_TO_OFFSET (d);
+	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+	      UPDATE_SYNTAX_TABLE (charpos);
+#endif
+	      PREFETCH ();
+	      c2 = RE_STRING_CHAR (d, dend - d);
+	      s2 = SYNTAX (c2);
+	
+	      /* Case 2: S2 is neither Sword nor Ssymbol. */
+	      if (s2 != Sword && s2 != Ssymbol)
+		goto fail;
+
+	      /* Case 3: D is not at the beginning of string ... */
+	      if (!AT_STRINGS_BEG (d))
+		{
+		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+		  UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
+#endif
+		  s1 = SYNTAX (c1);
+
+		  /* ... and S1 is Sword or Ssymbol.  */
+		  if (s1 == Sword || s1 == Ssymbol)
+		    goto fail;
+		}
+	    }
+	  break;
+
+	case symend:
+	  DEBUG_PRINT1 ("EXECUTING symend.\n");
+
+	  /* We FAIL in one of the following cases: */
+
+	  /* Case 1: D is at the beginning of string.  */
+	  if (AT_STRINGS_BEG (d))
+	    goto fail;
+	  else
+	    {
+	      /* C1 is the character before D, S1 is the syntax of C1, C2
+		 is the character at D, and S2 is the syntax of C2.  */
+	      re_wchar_t c1, c2;
+	      int s1, s2;
+#ifdef emacs
+	      int offset = PTR_TO_OFFSET (d) - 1;
+	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+	      UPDATE_SYNTAX_TABLE (charpos);
+#endif
+	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+	      s1 = SYNTAX (c1);
+
+	      /* Case 2: S1 is neither Ssymbol nor Sword.  */
+	      if (s1 != Sword && s1 != Ssymbol)
+		goto fail;
+
+	      /* Case 3: D is not at the end of string ... */
+	      if (!AT_STRINGS_END (d))
+		{
+		  PREFETCH_NOLIMIT ();
+		  c2 = RE_STRING_CHAR (d, dend - d);
+#ifdef emacs
+		  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+#endif
+		  s2 = SYNTAX (c2);
+
+		  /* ... and S2 is Sword or Ssymbol.  */
+		  if (s2 == Sword || s2 == Ssymbol)
+                    goto fail;
+		}
+	    }
+	  break;
+
 	case syntaxspec:
 	case notsyntaxspec:
 	  not = (re_opcode_t) *(p - 1) == notsyntaxspec;