diff src/regex.c @ 83542:2d56e13fd23d

Merged from emacs@sv.gnu.org Patches applied: * emacs@sv.gnu.org/emacs--devo--0--patch-413 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-414 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-415 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-416 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-417 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-418 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-419 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-420 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-421 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-422 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-423 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-424 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-425 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-426 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-427 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-428 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-429 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-430 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-431 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-432 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-433 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-434 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-435 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-436 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-437 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-438 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-439 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-440 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-441 lisp/url/url-methods.el: Fix format error when http_proxy is empty string * emacs@sv.gnu.org/emacs--devo--0--patch-442 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-443 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-444 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-445 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-446 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-447 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-448 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-449 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-450 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-451 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-452 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-453 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-454 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-455 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-456 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-457 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-458 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-459 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-460 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-461 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-462 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-463 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-464 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-465 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-466 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-467 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-468 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-469 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-470 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-471 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-472 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-473 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-128 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-129 Merge from emacs--devo--0 * emacs@sv.gnu.org/gnus--rel--5.10--patch-130 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-131 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-132 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-133 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-134 Merge from emacs--devo--0 * emacs@sv.gnu.org/gnus--rel--5.10--patch-135 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-136 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-137 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-138 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-139 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-140 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-141 Merge from emacs--devo--0 * emacs@sv.gnu.org/gnus--rel--5.10--patch-142 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-143 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-144 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-145 Merge from emacs--devo--0 * emacs@sv.gnu.org/gnus--rel--5.10--patch-146 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-147 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-148 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-149 Update from CVS git-archimport-id: lorentey@elte.hu--2004/emacs--multi-tty--0--patch-582
author Karoly Lorentey <lorentey@elte.hu>
date Sat, 14 Oct 2006 17:36:28 +0000
parents 866effff65a4 3501fcb2e68c
children c71725faff1a
line wrap: on
line diff
--- a/src/regex.c	Sat Oct 14 16:56:21 2006 +0000
+++ b/src/regex.c	Sat Oct 14 17:36:28 2006 +0000
@@ -2530,6 +2530,7 @@
   bufp->syntax = syntax;
   bufp->fastmap_accurate = 0;
   bufp->not_bol = bufp->not_eol = 0;
+  bufp->used_syntax = 0;
 
   /* Set `used' to zero, so that if we return an error, the pattern
      printer (for debugging) will think there's no pattern.  We reset it
@@ -2942,6 +2943,14 @@
 			      SET_LIST_BIT (translated);
 			  }
 
+			/* In most cases the matching rule for char classes
+			   only uses the syntax table for multibyte chars,
+			   so that the content of the syntax-table it is not
+			   hardcoded in the range_table.  SPACE and WORD are
+			   the two exceptions.  */
+			if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
+			  bufp->used_syntax = 1;
+
 			/* Repeat the loop. */
 			continue;
 		      }
@@ -3877,11 +3886,13 @@
 	  if (fastmap)
 	    {
 	      int c = RE_STRING_CHAR (p + 1, pend - p);
-
+	      /* When fast-scanning, the fastmap can be indexed either with
+		 a char (smaller than 256) or with the first byte of
+		 a char's byte sequence.  So we have to conservatively add
+		 both to the table.  */
 	      if (SINGLE_BYTE_CHAR_P (c))
 		fastmap[c] = 1;
-	      else
-		fastmap[p[1]] = 1;
+	      fastmap[p[1]] = 1;
 	    }
 	  break;
 
@@ -3899,6 +3910,10 @@
 	     So any that are not listed in the charset
 	     are possible matches, even in multibyte buffers.  */
 	  if (!fastmap) break;
+	  /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
+	     because it will automatically be set when needed by virtue of
+	     being larger than the highest char of its charset (0xbf) but
+	     smaller than (1<<BYTEWIDTH).  */
 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
 	       j < (1 << BYTEWIDTH); j++)
 	    fastmap[j] = 1;
@@ -3909,7 +3924,13 @@
 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
 	       j >= 0; j--)
 	    if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
-	      fastmap[j] = 1;
+	      {
+		fastmap[j] = 1;
+#ifdef emacs
+		if (j >= 0x80 && j < 0xa0)
+		  fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
+#endif
+	      }
 
 	  if ((not && multibyte)
 	      /* Any character set can possibly contain a character
@@ -4352,11 +4373,33 @@
 		    }
 		}
 	      else
-		while (range > lim && !fastmap[*d])
+		do
 		  {
-		    d++;
-		    range--;
-		  }
+		    re_char *d_start = d;
+		    while (range > lim && !fastmap[*d])
+		      {
+			d++;
+			range--;
+		      }
+#ifdef emacs
+		    if (multibyte && range > lim)
+		      {
+			/* Check that we are at the beginning of a char.  */
+			int at_boundary;
+			AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
+			if (at_boundary)
+			  break;
+			else
+			  { /* We have matched an internal byte of a char
+			       rather than the leading byte, so it's a false
+			       positive: we should keep scanning.  */
+			    d++; range--;
+			  }
+		      }
+		    else
+#endif
+		      break;
+		  } while (1);
 
 	      startpos += irange - range;
 	    }
@@ -6197,6 +6240,10 @@
 {
   reg_errcode_t ret;
 
+#ifdef emacs
+  gl_state.current_syntax_table = current_buffer->syntax_table;
+#endif
+
   /* GNU code is written to assume at least RE_NREGS registers will be set
      (and at least one extra will be -1).  */
   bufp->regs_allocated = REGS_UNALLOCATED;