changeset 56088:a7a676c680d4

Include regex.h. (skip_chars): New arg HANDLE_ISO_CLASSES. Callers changed. If requested, make a list of classes, then check the scanned chars for membership in them. (in_classes): New function. (Fskip_chars_forward): Doc fix.
author Richard M. Stallman <rms@gnu.org>
date Sun, 13 Jun 2004 22:25:34 +0000
parents 3d12da599e18
children 36a475d543b8
files src/syntax.c
diffstat 1 files changed, 119 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/src/syntax.c	Sun Jun 13 22:20:55 2004 +0000
+++ b/src/syntax.c	Sun Jun 13 22:25:34 2004 +0000
@@ -26,6 +26,7 @@
 #include "buffer.h"
 #include "charset.h"
 #include "keymap.h"
+#include "regex.h"
 
 /* Make syntax table lookup grant data in gl_state.  */
 #define SYNTAX_ENTRY_VIA_PROPERTY
@@ -97,11 +98,12 @@
 static int find_defun_start P_ ((int, int));
 static int back_comment P_ ((int, int, int, int, int, int *, int *));
 static int char_quoted P_ ((int, int));
-static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
+static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object, int));
 static Lisp_Object scan_lists P_ ((int, int, int, int));
 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
 				    int, int, int, int,
 				    int, Lisp_Object, int));
+static int in_classes P_ ((int, Lisp_Object));
 
 
 struct gl_state_s gl_state;		/* Global state of syntax parser.  */
@@ -1321,13 +1323,13 @@
  (but not as the end of a range; quoting is never needed there).
 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
 With arg "^a-zA-Z", skips nonletters stopping before first letter.
-Returns the distance traveled, either zero or positive.
-Note that char classes, e.g. `[:alpha:]', are not currently supported;
-they will be treated as literals.  */)
+Char classes, e.g. `[:alpha:]', are supported.
+
+Returns the distance traveled, either zero or positive.  */)
      (string, lim)
      Lisp_Object string, lim;
 {
-  return skip_chars (1, 0, string, lim);
+  return skip_chars (1, 0, string, lim, 1);
 }
 
 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
@@ -1337,7 +1339,7 @@
      (string, lim)
      Lisp_Object string, lim;
 {
-  return skip_chars (0, 0, string, lim);
+  return skip_chars (0, 0, string, lim, 1);
 }
 
 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
@@ -1349,7 +1351,7 @@
      (syntax, lim)
      Lisp_Object syntax, lim;
 {
-  return skip_chars (1, 1, syntax, lim);
+  return skip_chars (1, 1, syntax, lim, 0);
 }
 
 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
@@ -1361,13 +1363,14 @@
      (syntax, lim)
      Lisp_Object syntax, lim;
 {
-  return skip_chars (0, 1, syntax, lim);
+  return skip_chars (0, 1, syntax, lim, 0);
 }
 
 static Lisp_Object
-skip_chars (forwardp, syntaxp, string, lim)
+skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
      int forwardp, syntaxp;
      Lisp_Object string, lim;
+     int handle_iso_classes;
 {
   register unsigned int c;
   unsigned char fastmap[0400];
@@ -1383,12 +1386,14 @@
   int size_byte;
   const unsigned char *str;
   int len;
+  Lisp_Object iso_classes;
 
   CHECK_STRING (string);
   char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
   string_multibyte = STRING_MULTIBYTE (string);
   str = SDATA (string);
   size_byte = SBYTES (string);
+  iso_classes = Qnil;
 
   /* Adjust the multibyteness of the string to that of the buffer.  */
   if (multibyte != string_multibyte)
@@ -1444,6 +1449,45 @@
 	fastmap[syntax_spec_code[c & 0377]] = 1;
       else
 	{
+	  if (handle_iso_classes && c == '['
+	      && i_byte < size_byte
+	      && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
+	    {
+	      const unsigned char *class_beg = str + i_byte + 1;
+	      const unsigned char *class_end = class_beg;
+	      const unsigned char *class_limit = str + size_byte;
+	      /* Leave room for the null.	 */
+	      unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
+	      re_wctype_t cc;
+
+	      if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
+		class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
+
+	      while (class_end != class_limit
+		     && ! (*class_end >= 0200
+			   || *class_end <= 040
+			   || (*class_end == ':'
+			       && class_end[1] == ']')))
+		class_end++;
+
+	      if (class_end == class_limit
+		  || *class_end >= 0200
+		  || *class_end <= 040)
+		error ("Invalid ISO C character class");
+
+	      bcopy (class_beg, class_name, class_end - class_beg);
+	      class_name[class_end - class_beg] = 0;
+
+	      cc = re_wctype (class_name);
+	      if (cc == 0)
+		error ("Invalid ISO C character class");
+
+	      iso_classes = Fcons (make_number (cc), iso_classes);
+
+	      i_byte = class_end + 2 - str;
+	      continue;
+	    }
+
 	  if (c == '\\')
 	    {
 	      if (i_byte == size_byte)
@@ -1637,6 +1681,15 @@
 		      stop = endp;
 		    }
 		  c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+
+		  if (! NILP (iso_classes) && in_classes (c, iso_classes))
+		    {
+		      if (negate)
+			break;
+		      else
+			goto fwd_ok;
+		    }
+
 		  if (SINGLE_BYTE_CHAR_P (c))
 		    {
 		      if (!fastmap[c])
@@ -1659,6 +1712,7 @@
 		      if (!(negate ^ (i < n_char_ranges)))
 			break;
 		    }
+		fwd_ok:
 		  p += nbytes, pos++, pos_byte += nbytes;
 		}
 	    else
@@ -1671,8 +1725,19 @@
 		      p = GAP_END_ADDR;
 		      stop = endp;
 		    }
+
+		  if (!NILP (iso_classes) && in_classes (*p, iso_classes))
+		    {
+		      if (negate)
+			break;
+		      else
+			goto fwd_ok;
+		    }
+
 		  if (!fastmap[*p])
 		    break;
+
+		fwd_unibyte_ok:
 		  p++, pos++;
 		}
 	  }
@@ -1698,6 +1763,15 @@
 		    p = prev_p - 1, c = *p, nbytes = 1;
 		  else
 		    c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
+
+		  if (! NILP (iso_classes) && in_classes (c, iso_classes))
+		    {
+		      if (negate)
+			break;
+		      else
+			goto back_ok;
+		    }
+
 		  if (SINGLE_BYTE_CHAR_P (c))
 		    {
 		      if (!fastmap[c])
@@ -1712,6 +1786,7 @@
 		      if (!(negate ^ (i < n_char_ranges)))
 			break;
 		    }
+		back_ok:
 		  pos--, pos_byte -= nbytes;
 		}
 	    else
@@ -1724,8 +1799,19 @@
 		      p = GPT_ADDR;
 		      stop = endp;
 		    }
+
+		  if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
+		    {
+		      if (negate)
+			break;
+		      else
+			goto fwd_ok;
+		    }
+
 		  if (!fastmap[p[-1]])
 		    break;
+
+		back_unibyte_ok:
 		  p--, pos--;
 		}
 	  }
@@ -1748,6 +1834,30 @@
     return make_number (PT - start_point);
   }
 }
+
+/* Return 1 if character C belongs to one of the ISO classes
+   in the list ISO_CLASSES.  Each class is represented by an
+   integer which is its type according to re_wctype.  */
+
+static int
+in_classes (c, iso_classes)
+     int c;
+     Lisp_Object iso_classes;
+{
+  int fits_class = 0;
+
+  while (! NILP (iso_classes))
+    {
+      Lisp_Object elt;
+      elt = XCAR (iso_classes);
+      iso_classes = XCDR (iso_classes);
+
+      if (re_iswctype (c, XFASTINT (elt)))
+	fits_class = 1;
+    }
+
+  return fits_class;
+}
 
 /* Jump over a comment, assuming we are at the beginning of one.
    FROM is the current position.