changeset 32891:56a4ce418f35

More `unsigned char' -> `re_char' changes. Also change several `int' into `re_wchar_t'. (PATTERN_STACK_EMPTY, PUSH_PATTERN_OP, POP_PATTERN_OP): Remove. (PUSH_FAILURE_POINTER): Don't cast any more. (POP_FAILURE_REG_OR_COUNT): Remove the cast that strips `const'. We want GCC to complain, since this piece of code makes re_match non-reentrant, which *should* be fixed. (GET_BUFFER_SPACE): Use size_t rather than unsigned long. (EXTEND_BUFFER): Use RETALLOC. (SET_LIST_BIT): Don't cast. (re_wchar_t): New type. (re_iswctype, re_wctype_to_bit): Make it crystal clear to GCC that those two functions will always properly return. (IMMEDIATE_QUIT_CHECK): Cast to void. (analyse_first): Use recursion rather than an explicit stack. (re_compile_fastmap): Can't fail anymore. (re_search_2): Don't check re_compile_fastmap for failure. (PUSH_NUMBER): Renamed from PUSH_FAILURE_COUNT. Now also sets the new value (passed in a new argument). (re_match_2_internal): Use it. Also, use a new var `reg' of type size_t when looping through regs rather than reuse the inappropriate `mcnt'.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Thu, 26 Oct 2000 00:45:01 +0000
parents ba002124ad94
children 25182c4fd302
files src/regex.c
diffstat 1 files changed, 165 insertions(+), 195 deletions(-) [+]
line wrap: on
line diff
--- a/src/regex.c	Wed Oct 25 23:36:06 2000 +0000
+++ b/src/regex.c	Thu Oct 26 00:45:01 2000 +0000
@@ -22,10 +22,9 @@
 /* TODO:
    - structure the opcode space into opcode+flag.
    - merge with glibc's regex.[ch].
-   - replace succeed_n + jump_n with a combined operation so that the counter
-     can simply be decremented when popping the failure_point without having
-     to stack up failure_count entries.
- */
+   - replace (succeed_n + jump_n + set_number_at) with something that doesn't
+     need to modify the compiled regexp.
+*/
 
 /* AIX requires this to be the first thing in the file. */
 #if defined _AIX && !defined REGEX_MALLOC
@@ -553,7 +552,7 @@
 	   is followed by a range table:
 	       2 bytes of flags for character sets (low 8 bits, high 8 bits)
 		   See RANGE_TABLE_WORK_BITS below.
-	       2 bytes, the number of pairs that follow
+	       2 bytes, the number of pairs that follow (upto 32767)
 	       pairs, each 2 multibyte characters,
 		   each multibyte character represented as 3 bytes.  */
   charset,
@@ -700,7 +699,7 @@
 static void
 extract_number (dest, source)
     int *dest;
-    unsigned char *source;
+    re_char *source;
 {
   int temp = SIGN_EXTEND_CHAR (*(source + 1));
   *dest = *source & 0377;
@@ -729,7 +728,7 @@
 static void
 extract_number_and_incr (destination, source)
     int *destination;
-    unsigned char **source;
+    re_char **source;
 {
   extract_number (destination, *source);
   *source += 2;
@@ -803,9 +802,9 @@
 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
   do									\
     {									\
-      int range_start, range_end;					\
-      unsigned char *p;							\
-      unsigned char *range_table_end					\
+      re_wchar_t range_start, range_end;				\
+      re_char *p;							\
+      re_char *range_table_end						\
 	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
 									\
       for (p = (range_table); p < range_table_end; p += 2 * 3)		\
@@ -829,8 +828,8 @@
     {									\
       /* Number of ranges in range table. */				\
       int count;							\
-      unsigned char *range_table = CHARSET_RANGE_TABLE (charset);	\
-									\
+      re_char *range_table = CHARSET_RANGE_TABLE (charset);		\
+      									\
       EXTRACT_NUMBER_AND_INCR (count, range_table);			\
       CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
     }									\
@@ -899,12 +898,12 @@
 
 void
 print_partial_compiled_pattern (start, end)
-    unsigned char *start;
-    unsigned char *end;
+    re_char *start;
+    re_char *end;
 {
   int mcnt, mcnt2;
-  unsigned char *p = start;
-  unsigned char *pend = end;
+  re_char *p = start;
+  re_char *pend = end;
 
   if (start == NULL)
     {
@@ -1142,7 +1141,7 @@
 print_compiled_pattern (bufp)
     struct re_pattern_buffer *bufp;
 {
-  unsigned char *buffer = bufp->buffer;
+  re_char *buffer = bufp->buffer;
 
   print_partial_compiled_pattern (buffer, buffer + bufp->used);
   printf ("%ld bytes used/%ld bytes allocated.\n",
@@ -1326,7 +1325,7 @@
 
 union fail_stack_elt
 {
-  const unsigned char *pointer;
+  re_char *pointer;
   /* This should be the biggest `int' that's no bigger than a pointer.  */
   long integer;
 };
@@ -1341,7 +1340,6 @@
   size_t frame;	/* Offset of the cur constructed frame.  */
 } fail_stack_type;
 
-#define PATTERN_STACK_EMPTY()     (fail_stack.avail == 0)
 #define FAIL_STACK_EMPTY()     (fail_stack.frame == 0)
 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
 
@@ -1413,22 +1411,11 @@
 	 1)))
 
 
-/* Push pointer POINTER on FAIL_STACK.
-   Return 1 if was able to do so and 0 if ran out of memory allocating
-   space to do so.  */
-#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
-  ((FAIL_STACK_FULL ()							\
-    && !GROW_FAIL_STACK (FAIL_STACK))					\
-   ? 0									\
-   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
-      1))
-#define POP_PATTERN_OP() POP_FAILURE_POINTER ()
-
 /* Push a pointer value onto the failure stack.
    Assumes the variable `fail_stack'.  Probably should only
    be called from within `PUSH_FAILURE_POINT'.  */
 #define PUSH_FAILURE_POINTER(item)					\
-  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
+  fail_stack.stack[fail_stack.avail++].pointer = (item)
 
 /* This pushes an integer-valued item onto the failure stack.
    Assumes the variable `fail_stack'.  Probably should only
@@ -1478,16 +1465,19 @@
   PUSH_FAILURE_INT (num);						\
 } while (0)
 
-#define PUSH_FAILURE_COUNT(ptr)						\
+/* Change the counter's value to VAL, but make sure that it will
+   be reset when backtracking.  */
+#define PUSH_NUMBER(ptr,val)						\
 do {									\
   char *destination;							\
   int c;								\
   ENSURE_FAIL_STACK(3);							\
   EXTRACT_NUMBER (c, ptr);						\
-  DEBUG_PRINT3 ("    Push counter %p = %d\n", ptr, c);			\
+  DEBUG_PRINT4 ("    Push number %p = %d -> %d\n", ptr, c, val);	\
   PUSH_FAILURE_INT (c);							\
   PUSH_FAILURE_POINTER (ptr);						\
   PUSH_FAILURE_INT (-1);						\
+  STORE_NUMBER (ptr, val);						\
 } while (0)
 
 /* Pop a saved register off the stack.  */
@@ -1497,7 +1487,9 @@
   if (reg == -1)							\
     {									\
       /* It's a counter.  */						\
-      unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER ();	\
+      /* Here, we discard `const', which makes re_match non-reentrant.	\
+         Gcc gives a warning for it, which is good.  */			\
+      unsigned char *ptr = POP_FAILURE_POINTER ();			\
       reg = POP_FAILURE_INT ();						\
       STORE_NUMBER (ptr, reg);						\
       DEBUG_PRINT3 ("     Pop counter %p = %d\n", ptr, reg);		\
@@ -1603,14 +1595,14 @@
   while (fail_stack.frame < fail_stack.avail)				\
     POP_FAILURE_REG_OR_COUNT ();					\
 									\
-  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
+  pat = POP_FAILURE_POINTER ();				\
   DEBUG_PRINT2 ("  Popping pattern %p: ", pat);				\
   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
 									\
   /* If the saved string location is NULL, it came from an		\
      on_failure_keep_string_jump opcode, and we want to throw away the	\
      saved NULL, thus retaining our current position in the string.  */	\
-  str = (re_char *) POP_FAILURE_POINTER ();				\
+  str = POP_FAILURE_POINTER ();						\
   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
   DEBUG_PRINT1 ("'\n");							\
@@ -1641,20 +1633,18 @@
 				  int arg, unsigned char *end));
 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
 				  int arg1, int arg2, unsigned char *end));
-static boolean at_begline_loc_p _RE_ARGS ((const unsigned char *pattern,
-					   const unsigned char *p,
+static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
+					   re_char *p,
 					   reg_syntax_t syntax));
-static boolean at_endline_loc_p _RE_ARGS ((const unsigned char *p,
-					   const unsigned char *pend,
+static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
+					   re_char *pend,
 					   reg_syntax_t syntax));
-static unsigned char *skip_one_char _RE_ARGS ((unsigned char *p));
-static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
+static re_char *skip_one_char _RE_ARGS ((re_char *p));
+static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
 				    char *fastmap, const int multibyte));
 
 /* Fetch the next character in the uncompiled pattern---translating it
-   if necessary.  Also cast from a signed character in the constant
-   string passed to us by the user to an unsigned char that we can use
-   as an array index (in, e.g., `translate').  */
+   if necessary.  */
 #define PATFETCH(c)							\
   do {									\
     PATFETCH_RAW (c);							\
@@ -1689,7 +1679,7 @@
 
 /* Make sure we have at least N more bytes of space in buffer.  */
 #define GET_BUFFER_SPACE(n)						\
-    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
+    while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated)		\
       EXTEND_BUFFER ()
 
 /* Make sure we have one more byte of buffer space and then add C to it.  */
@@ -1778,13 +1768,13 @@
 #endif
 #define EXTEND_BUFFER()							\
   do {									\
-    unsigned char *old_buffer = bufp->buffer;				\
+    re_char *old_buffer = bufp->buffer;					\
     if (bufp->allocated == MAX_BUF_SIZE)				\
       return REG_ESIZE;							\
     bufp->allocated <<= 1;						\
     if (bufp->allocated > MAX_BUF_SIZE)					\
       bufp->allocated = MAX_BUF_SIZE;					\
-    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+    RETALLOC (bufp->buffer, bufp->allocated, unsigned char);		\
     if (bufp->buffer == NULL)						\
       return REG_ESPACE;						\
     /* If the buffer moved, move all the pointers into it.  */		\
@@ -1907,9 +1897,7 @@
 
 
 /* Set the bit for character C in a list.  */
-#define SET_LIST_BIT(c)							\
-  (b[((unsigned char) (c)) / BYTEWIDTH]					\
-   |= 1 << (((unsigned char) c) % BYTEWIDTH))
+#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
 
 
 /* Get the next unsigned number in the uncompiled pattern.  */
@@ -1940,6 +1928,7 @@
 #  define CHAR_CLASS_MAX_LENGTH 256
 # endif
 typedef wctype_t re_wctype_t;
+typedef wchar_t re_wchar_t;
 # define re_wctype wctype
 # define re_iswctype iswctype
 # define re_wctype_to_bit(cc) 0
@@ -1947,7 +1936,7 @@
 # define CHAR_CLASS_MAX_LENGTH  9 /* Namely, `multibyte'.  */
 # define btowc(c) c
 
-/* Character classes' indices.  */
+/* Character classes.  */
 typedef enum { RECC_ERROR = 0,
 	       RECC_ALNUM, RECC_ALPHA, RECC_WORD,
 	       RECC_GRAPH, RECC_PRINT,
@@ -1959,10 +1948,12 @@
 	       RECC_ASCII, RECC_UNIBYTE
 } re_wctype_t;
 
+typedef int re_wchar_t;
+
 /* Map a string to the char class it names (if any).  */
 static re_wctype_t
 re_wctype (string)
-     unsigned char *string;
+     re_char *string;
 {
   if      (STREQ (string, "alnum"))	return RECC_ALNUM;
   else if (STREQ (string, "alpha"))	return RECC_ALPHA;
@@ -1990,27 +1981,30 @@
      int ch;
      re_wctype_t cc;
 {
+  boolean ret = false;
+
   switch (cc)
     {
-    case RECC_ALNUM: return ISALNUM (ch);
-    case RECC_ALPHA: return ISALPHA (ch);
-    case RECC_BLANK: return ISBLANK (ch);
-    case RECC_CNTRL: return ISCNTRL (ch);
-    case RECC_DIGIT: return ISDIGIT (ch);
-    case RECC_GRAPH: return ISGRAPH (ch);
-    case RECC_LOWER: return ISLOWER (ch);
-    case RECC_PRINT: return ISPRINT (ch);
-    case RECC_PUNCT: return ISPUNCT (ch);
-    case RECC_SPACE: return ISSPACE (ch);
-    case RECC_UPPER: return ISUPPER (ch);
-    case RECC_XDIGIT: return ISXDIGIT (ch);
-    case RECC_ASCII: return IS_REAL_ASCII (ch);
-    case RECC_NONASCII: return !IS_REAL_ASCII (ch);
-    case RECC_UNIBYTE: return ISUNIBYTE (ch);
-    case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
-    case RECC_WORD: return ISWORD (ch);
-    case RECC_ERROR: return false;
+    case RECC_ALNUM: ret = ISALNUM (ch);
+    case RECC_ALPHA: ret = ISALPHA (ch);
+    case RECC_BLANK: ret = ISBLANK (ch);
+    case RECC_CNTRL: ret = ISCNTRL (ch);
+    case RECC_DIGIT: ret = ISDIGIT (ch);
+    case RECC_GRAPH: ret = ISGRAPH (ch);
+    case RECC_LOWER: ret = ISLOWER (ch);
+    case RECC_PRINT: ret = ISPRINT (ch);
+    case RECC_PUNCT: ret = ISPUNCT (ch);
+    case RECC_SPACE: ret = ISSPACE (ch);
+    case RECC_UPPER: ret = ISUPPER (ch);
+    case RECC_XDIGIT: ret = ISXDIGIT (ch);
+    case RECC_ASCII: ret = IS_REAL_ASCII (ch);
+    case RECC_NONASCII: ret = !IS_REAL_ASCII (ch);
+    case RECC_UNIBYTE: ret = ISUNIBYTE (ch);
+    case RECC_MULTIBYTE: ret = !ISUNIBYTE (ch);
+    case RECC_WORD: ret = ISWORD (ch);
+    case RECC_ERROR: ret = false;
     }
+  return ret;
 }
 
 /* Return a bit-pattern to use in the range-table bits to match multibyte
@@ -2019,18 +2013,21 @@
 re_wctype_to_bit (cc)
      re_wctype_t cc;
 {
+  int ret = 0;
+
   switch (cc)
     {
     case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
-    case RECC_MULTIBYTE: return BIT_MULTIBYTE;
-    case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
-    case RECC_LOWER: return BIT_LOWER;
-    case RECC_UPPER: return BIT_UPPER;
-    case RECC_PUNCT: return BIT_PUNCT;
-    case RECC_SPACE: return BIT_SPACE;
+    case RECC_MULTIBYTE: ret = BIT_MULTIBYTE;
+    case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: ret = BIT_WORD;
+    case RECC_LOWER: ret = BIT_LOWER;
+    case RECC_UPPER: ret = BIT_UPPER;
+    case RECC_PUNCT: ret = BIT_PUNCT;
+    case RECC_SPACE: ret = BIT_SPACE;
     case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
-    case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
+    case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: ret = 0;
     }
+  return ret;
 }
 #endif
 
@@ -2042,7 +2039,7 @@
       if (immediate_quit) QUIT;			\
     } while (0)
 #else
-# define IMMEDIATE_QUIT_CHECK    (0)
+# define IMMEDIATE_QUIT_CHECK    ((void)0)
 #endif
 
 #ifndef MATCH_MAY_ALLOCATE
@@ -2129,10 +2126,8 @@
      reg_syntax_t syntax;
      struct re_pattern_buffer *bufp;
 {
-  /* We fetch characters from PATTERN here.  Even though PATTERN is
-     `char *' (i.e., signed), we declare these variables as unsigned, so
-     they can be reliably used as array indices.  */
-  register unsigned int c, c1;
+  /* We fetch characters from PATTERN here.  */
+  register re_wchar_t c, c1;
 
   /* A random temporary spot in PATTERN.  */
   re_char *p1;
@@ -2359,6 +2354,7 @@
 		    boolean simple = skip_one_char (laststart) == b;
 		    unsigned int startoffset = 0;
 		    re_opcode_t ofj =
+		      /* Check if the loop can match the empty string.  */
 		      (simple || !analyse_first (laststart, b, NULL, 0)) ?
 		      on_failure_jump : on_failure_jump_loop;
 		    assert (skip_one_char (laststart) <= b);
@@ -2629,7 +2625,7 @@
 		if (SINGLE_BYTE_CHAR_P (c))
 		  /* ... into bitmap.  */
 		  {
-		    unsigned this_char;
+		    re_wchar_t this_char;
 		    int range_start = c, range_end = c1;
 
 		    /* If the start is after the end, the range is empty.  */
@@ -3365,10 +3361,10 @@
 
 static boolean
 at_begline_loc_p (pattern, p, syntax)
-    const unsigned char *pattern, *p;
+    re_char *pattern, *p;
     reg_syntax_t syntax;
 {
-  const unsigned char *prev = p - 2;
+  re_char *prev = p - 2;
   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
 
   return
@@ -3389,12 +3385,12 @@
 
 static boolean
 at_endline_loc_p (p, pend, syntax)
-    const unsigned char *p, *pend;
+    re_char *p, *pend;
     reg_syntax_t syntax;
 {
-  const unsigned char *next = p;
+  re_char *next = p;
   boolean next_backslash = *next == '\\';
-  const unsigned char *next_next = p + 1 < pend ? p + 1 : 0;
+  re_char *next_next = p + 1 < pend ? p + 1 : 0;
 
   return
        /* Before a subexpression?  */
@@ -3433,36 +3429,16 @@
 
    Return 1  if p..pend might match the empty string.
    Return 0  if p..pend matches at least one char.
-   Return -1 if p..pend matches at least one char, but fastmap was not
-      updated accurately.
-   Return -2 if an error occurred.  */
+   Return -1 if fastmap was not updated accurately.  */
 
 static int
 analyse_first (p, pend, fastmap, multibyte)
-     unsigned char *p, *pend;
+     re_char *p, *pend;
      char *fastmap;
      const int multibyte;
 {
   int j, k;
   boolean not;
-#ifdef MATCH_MAY_ALLOCATE
-  fail_stack_type fail_stack;
-#endif
-#ifndef REGEX_MALLOC
-  char *destination;
-#endif
-
-#if defined REL_ALLOC && defined REGEX_MALLOC
-  /* This holds the pointer to the failure stack, when
-     it is allocated relocatably.  */
-  fail_stack_elt_t *failure_stack_ptr;
-#endif
-
-  /* Assume that each path through the pattern can be null until
-     proven otherwise.  We set this false at the bottom of switch
-     statement, to which we get only if a particular path doesn't
-     match the empty string.  */
-  boolean path_can_be_null = true;
 
   /* If all elements for base leading-codes in fastmap is set, this
      flag is set true.	*/
@@ -3470,8 +3446,6 @@
 
   assert (p);
 
-  INIT_FAIL_STACK ();
-
   /* The loop below works as follows:
      - It has a working-list kept in the PATTERN_STACK and which basically
        starts by only containing a pointer to the first operation.
@@ -3487,7 +3461,7 @@
      so that `p' is monotonically increasing.  More to the point, we
      never set `p' (or push) anything `<= p1'.  */
 
-  while (1)
+  while (p < pend)
     {
       /* `p1' is used as a marker of how far back a `on_failure_jump'
 	 can go without being ignored.  It is normally equal to `p'
@@ -3497,29 +3471,12 @@
 	    3..9: <body>
 	    10: on_failure_jump 3
 	 as used for the *? operator.  */
-      unsigned char *p1 = p;
-
-      if (p >= pend)
-	{
-	  if (path_can_be_null)
-	    return (RESET_FAIL_STACK (), 1);
-
-	  /* We have reached the (effective) end of pattern.  */
-	  if (PATTERN_STACK_EMPTY ())
-	    return (RESET_FAIL_STACK (), 0);
-
-	  p = (unsigned char*) POP_PATTERN_OP ();
-	  path_can_be_null = true;
-	  continue;
-	}
-
-      /* We should never be about to go beyond the end of the pattern.	*/
-      assert (p < pend);
+      re_char *p1 = p;
 
       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
 	{
 	case succeed:
-	  p = pend;
+	  return 1;
 	  continue;
 
 	case duplicate:
@@ -3551,7 +3508,7 @@
 	  /* We could put all the chars except for \n (and maybe \0)
 	     but we don't bother since it is generally not worth it.  */
 	  if (!fastmap) break;
-	  return (RESET_FAIL_STACK (), -1);
+	  return -1;
 
 
 	case charset_not:
@@ -3626,7 +3583,7 @@
 #else  /* emacs */
 	  /* This match depends on text properties.  These end with
 	     aborting optimizations.  */
-	  return (RESET_FAIL_STACK (), -1);
+	  return -1;
 
 	case categoryspec:
 	case notcategoryspec:
@@ -3693,8 +3650,14 @@
 	  EXTRACT_NUMBER_AND_INCR (j, p);
 	  if (p + j <= p1)
 	    ; /* Backward jump to be ignored.  */
-	  else if (!PUSH_PATTERN_OP (p + j, fail_stack))
-	    return (RESET_FAIL_STACK (), -2);
+	  else
+	    { /* We have to look down both arms.
+		 We first go down the "straight" path so as to minimize
+		 stack usage when going through alternatives.  */
+	      int r = analyse_first (p, pend, fastmap, multibyte);
+	      if (r) return r;
+	      p += j;
+	    }
 	  continue;
 
 
@@ -3734,15 +3697,13 @@
 
       /* Getting here means we have found the possible starting
 	 characters for one path of the pattern -- and that the empty
-	 string does not match.	 We need not follow this path further.
-	 Instead, look at the next alternative (remembered on the
-	 stack), or quit if no more.  The test at the top of the loop
-	 does these things.  */
-      path_can_be_null = false;
-      p = pend;
+	 string does not match.	 We need not follow this path further.  */
+      return 0;
     } /* while p */
 
-  return (RESET_FAIL_STACK (), 0);
+  /* We reached the end without matching anything.  */
+  return 1;
+
 } /* analyse_first */
 
 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
@@ -3777,8 +3738,6 @@
   analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
 			    fastmap, RE_MULTIBYTE_P (bufp));
   bufp->can_be_null = (analysis != 0);
-  if (analysis < -1)
-    return analysis;
   return 0;
 } /* re_compile_fastmap */
 
@@ -3921,8 +3880,7 @@
 
   /* Update the fastmap now if not correct already.  */
   if (fastmap && !bufp->fastmap_accurate)
-    if (re_compile_fastmap (bufp) == -2)
-      return -2;
+    re_compile_fastmap (bufp);
 
   /* See whether the pattern is anchored.  */
   anchored_start = (bufp->buffer[0] == begline);
@@ -3958,7 +3916,7 @@
       if (fastmap && startpos < total_size && !bufp->can_be_null)
 	{
 	  register re_char *d;
-	  register unsigned int buf_ch;
+	  register re_wchar_t buf_ch;
 
 	  d = POS_ADDR_VSTRING (startpos);
 
@@ -4191,9 +4149,9 @@
 
 /* If the operation is a match against one or more chars,
    return a pointer to the next operation, else return NULL.  */
-static unsigned char *
+static re_char *
 skip_one_char (p)
-     unsigned char *p;
+     re_char *p;
 {
   switch (SWITCH_ENUM_CAST (*p++))
     {
@@ -4303,7 +4261,7 @@
     case endline:
     case exactn:
       {
-	register unsigned int c
+	register re_wchar_t c
 	  = (re_opcode_t) *p2 == endline ? '\n'
 	  : RE_STRING_CHAR(p2 + 2, pend - p2 - 2);
 
@@ -4525,8 +4483,8 @@
 {
   /* General temporaries.  */
   int mcnt;
+  size_t reg;
   boolean not;
-  unsigned char *p1;
 
   /* Just past the end of the corresponding string.  */
   re_char *end1, *end2;
@@ -4545,8 +4503,8 @@
   re_char *dfail;
 
   /* Where we are in the pattern, and the end of the pattern.  */
-  unsigned char *p = bufp->buffer;
-  register unsigned char *pend = p + bufp->used;
+  re_char *p = bufp->buffer;
+  re_char *pend = p + bufp->used;
 
   /* We use this to map every character in the string.	*/
   RE_TRANSLATE_TYPE translate = bufp->translate;
@@ -4655,8 +4613,8 @@
   /* Initialize subexpression text positions to -1 to mark ones that no
      start_memory/stop_memory has been seen for. Also initialize the
      register information struct.  */
-  for (mcnt = 1; mcnt < num_regs; mcnt++)
-    regstart[mcnt] = regend[mcnt] = NULL;
+  for (reg = 1; reg < num_regs; reg++)
+    regstart[reg] = regend[reg] = NULL;
 
   /* We move `string1' into `string2' if the latter's empty -- but not if
      `string1' is null.	 */
@@ -4758,10 +4716,10 @@
 
 		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
 
-		      for (mcnt = 1; mcnt < num_regs; mcnt++)
+		      for (reg = 1; reg < num_regs; reg++)
 			{
-			  best_regstart[mcnt] = regstart[mcnt];
-			  best_regend[mcnt] = regend[mcnt];
+			  best_regstart[reg] = regstart[reg];
+			  best_regend[reg] = regend[reg];
 			}
 		    }
 		  goto fail;
@@ -4784,10 +4742,10 @@
 		  dend = ((d >= string1 && d <= end1)
 			   ? end_match_1 : end_match_2);
 
-		  for (mcnt = 1; mcnt < num_regs; mcnt++)
+		  for (reg = 1; reg < num_regs; reg++)
 		    {
-		      regstart[mcnt] = best_regstart[mcnt];
-		      regend[mcnt] = best_regend[mcnt];
+		      regstart[reg] = best_regstart[reg];
+		      regend[reg] = best_regend[reg];
 		    }
 		}
 	    } /* d != end_match_2 */
@@ -4847,16 +4805,16 @@
 
 	      /* Go through the first `min (num_regs, regs->num_regs)'
 		 registers, since that is all we initialized.  */
-	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+	      for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
 		{
-		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
-		    regs->start[mcnt] = regs->end[mcnt] = -1;
+		  if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
+		    regs->start[reg] = regs->end[reg] = -1;
 		  else
 		    {
-		      regs->start[mcnt]
-			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
-		      regs->end[mcnt]
-			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
+		      regs->start[reg]
+			= (regoff_t) POINTER_TO_OFFSET (regstart[reg]);
+		      regs->end[reg]
+			= (regoff_t) POINTER_TO_OFFSET (regend[reg]);
 		    }
 		}
 
@@ -4865,8 +4823,8 @@
 		 we (re)allocated the registers, this is the case,
 		 because we always allocate enough to have at least one
 		 -1 at the end.	 */
-	      for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
-		regs->start[mcnt] = regs->end[mcnt] = -1;
+	      for (reg = num_regs; reg < regs->num_regs; reg++)
+		regs->start[reg] = regs->end[reg] = -1;
 	    } /* regs && !bufp->no_sub */
 
 	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
@@ -4964,7 +4922,7 @@
 	case anychar:
 	  {
 	    int buf_charlen;
-	    unsigned int buf_ch;
+	    re_wchar_t buf_ch;
 
 	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
 
@@ -4993,7 +4951,7 @@
 
 	    /* Start of actual range_table, or end of bitmap if there is no
 	       range table.  */
-	    unsigned char *range_table;
+	    re_char *range_table;
 
 	    /* Nonzero if there is a range table.  */
 	    int range_table_exists;
@@ -5317,8 +5275,10 @@
 	  DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
 			mcnt, p + mcnt);
 	  {
-	    unsigned char *p1 = p; /* Next operation.  */
+	    re_char *p1 = p; /* Next operation.  */
+	    /* Please don't add casts to try and shut up GCC.  */
 	    unsigned char *p2 = p + mcnt; /* Destination of the jump.  */
+	    unsigned char *p3 = p - 3; /* Location of the opcode.  */
 
 	    p -= 3;		/* Reset so that we will re-execute the
 				   instruction once it's been changed. */
@@ -5334,14 +5294,14 @@
 	      {
 		/* Use a fast `on_failure_keep_string_jump' loop.  */
 		DEBUG_PRINT1 ("  smart exclusive => fast loop.\n");
-		*p = (unsigned char) on_failure_keep_string_jump;
+		*p3 = (unsigned char) on_failure_keep_string_jump;
 		STORE_NUMBER (p2 - 2, mcnt + 3);
 	      }
 	    else
 	      {
 		/* Default to a safe `on_failure_jump' loop.  */
 		DEBUG_PRINT1 ("  smart default => slow loop.\n");
-		*p = (unsigned char) on_failure_jump;
+		*p3 = (unsigned char) on_failure_jump;
 	      }
 	    DEBUG_STATEMENT (debug -= 2);
 	  }
@@ -5361,17 +5321,18 @@
 	/* Have to succeed matching what follows at least n times.
 	   After that, handle like `on_failure_jump'.  */
 	case succeed_n:
+	  /* Signedness doesn't matter since we only compare MCNT to 0.  */
 	  EXTRACT_NUMBER (mcnt, p + 2);
 	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
 
 	  /* Originally, mcnt is how many times we HAVE to succeed.  */
 	  if (mcnt != 0)
 	    {
+	      /* Please don't add a cast to try and shut up GCC.  */
+	      unsigned char *p2 = p + 2; /* Location of the counter.  */
 	      mcnt--;
-	      p += 2;
-	      PUSH_FAILURE_COUNT (p);
-	      DEBUG_PRINT3 ("	Setting %p to %d.\n", p, mcnt);
-	      STORE_NUMBER_AND_INCR (p, mcnt);
+	      p += 4;
+	      PUSH_NUMBER (p2, mcnt);
 	    }
 	  else
 	    /* The two bytes encoding mcnt == 0 are two no_op opcodes.  */
@@ -5379,15 +5340,17 @@
 	  break;
 
 	case jump_n:
+	  /* Signedness doesn't matter since we only compare MCNT to 0.  */
 	  EXTRACT_NUMBER (mcnt, p + 2);
 	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
 
 	  /* Originally, this is how many times we CAN jump.  */
 	  if (mcnt != 0)
 	    {
+	      /* Please don't add a cast to try and shut up GCC.  */
+	      unsigned char *p2 = p + 2; /* Location of the counter.  */
 	      mcnt--;
-	      PUSH_FAILURE_COUNT (p + 2);
-	      STORE_NUMBER (p + 2, mcnt);
+	      PUSH_NUMBER (p2, mcnt);
 	      goto unconditional_jump;
 	    }
 	  /* If don't have to jump any more, skip over the rest of command.  */
@@ -5397,14 +5360,16 @@
 
 	case set_number_at:
 	  {
+	    unsigned char *p2;	/* Location of the counter.  */
 	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
 
 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
-	    p1 = p + mcnt;
+	    /* Please don't add a cast to try and shut up GCC.  */
+	    p2 = p + mcnt;
+	    /* Signedness doesn't matter since we only copy MCNT's bits .  */
 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
-	    DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
-	    PUSH_FAILURE_COUNT (p1);
-	    STORE_NUMBER (p1, mcnt);
+	    DEBUG_PRINT3 ("  Setting %p to %d.\n", p2, mcnt);
+	    PUSH_NUMBER (p2, mcnt);
 	    break;
 	  }
 
@@ -5422,7 +5387,8 @@
 	    {
 	      /* C1 is the character before D, S1 is the syntax of C1, C2
 		 is the character at D, and S2 is the syntax of C2.  */
-	      int c1, c2, s1, s2;
+	      re_wchar_t c1, c2;
+	      int s1, s2;
 #ifdef emacs
 	      int offset = PTR_TO_OFFSET (d - 1);
 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5461,7 +5427,8 @@
 	    {
 	      /* C1 is the character before D, S1 is the syntax of C1, C2
 		 is the character at D, and S2 is the syntax of C2.  */
-	      int c1, c2, s1, s2;
+	      re_wchar_t c1, c2;
+	      int s1, s2;
 #ifdef emacs
 	      int offset = PTR_TO_OFFSET (d);
 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5504,7 +5471,8 @@
 	    {
 	      /* C1 is the character before D, S1 is the syntax of C1, C2
 		 is the character at D, and S2 is the syntax of C2.  */
-	      int c1, c2, s1, s2;
+	      re_wchar_t c1, c2;
+	      int s1, s2;
 #ifdef emacs
 	      int offset = PTR_TO_OFFSET (d) - 1;
 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5549,7 +5517,8 @@
 	  }
 #endif
 	  {
-	    int c, len;
+	    int len;
+	    re_wchar_t c;
 
 	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
 
@@ -5585,7 +5554,9 @@
 	  DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
 	  PREFETCH ();
 	  {
-	    int c, len;
+	    int len;
+	    re_wchar_t c;
+
 	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
 
 	    if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
@@ -5607,8 +5578,7 @@
       IMMEDIATE_QUIT_CHECK;
       if (!FAIL_STACK_EMPTY ())
 	{
-	  re_char *str;
-	  unsigned char *pat;
+	  re_char *str, *pat;
 	  /* A restart point is known.  Restore to that state.  */
 	  DEBUG_PRINT1 ("\nFAIL:\n");
 	  POP_FAILURE_POINT (str, pat);
@@ -5678,7 +5648,7 @@
   while (p1 < p1_end && p2 < p2_end)
     {
       int p1_charlen, p2_charlen;
-      int p1_ch, p2_ch;
+      re_wchar_t p1_ch, p2_ch;
 
       p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
       p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);