diff src/syntax.c @ 89483:2f877ed80fa6

*** empty log message ***
author Kenichi Handa <handa@m17n.org>
date Mon, 08 Sep 2003 12:53:41 +0000
parents 375f2633d815 9664825d832f
children 96081c0be5ca
line wrap: on
line diff
--- a/src/syntax.c	Mon Sep 08 11:56:09 2003 +0000
+++ b/src/syntax.c	Mon Sep 08 12:53:41 2003 +0000
@@ -24,7 +24,7 @@
 #include "lisp.h"
 #include "commands.h"
 #include "buffer.h"
-#include "charset.h"
+#include "character.h"
 #include "keymap.h"
 
 /* Make syntax table lookup grant data in gl_state.  */
@@ -97,7 +97,8 @@
 static int find_defun_start P_ ((int, int));
 static int back_comment P_ ((int, int, int, int, int, int *, int *));
 static int char_quoted P_ ((int, int));
-static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
+static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object));
+static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
 static Lisp_Object scan_lists P_ ((int, int, int, int));
 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
 				    int, int, int, int,
@@ -293,7 +294,7 @@
   while (bytepos >= beg)
     {
       UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
-      code = SYNTAX (FETCH_CHAR (bytepos));
+      code = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (bytepos));
       if (! (code == Scharquote || code == Sescape))
 	break;
 
@@ -382,10 +383,10 @@
     {
       /* Open-paren at start of line means we may have found our
 	 defun-start.  */
-      if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
+      if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
 	{
 	  SETUP_SYNTAX_TABLE (PT + 1, -1);	/* Try again... */
-	  if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
+	  if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
 	    break;
 	  /* Now fallback to the default value.  */
 	  gl_state.current_syntax_table = current_buffer->syntax_table;
@@ -505,7 +506,7 @@
       UPDATE_SYNTAX_TABLE_BACKWARD (from);
 
       prev_syntax = syntax;
-      c = FETCH_CHAR (from_byte);
+      c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
       syntax = SYNTAX_WITH_FLAGS (c);
       code = SYNTAX (c);
 
@@ -534,7 +535,7 @@
 	  int next = from, next_byte = from_byte, next_c, next_syntax;
 	  DEC_BOTH (next, next_byte);
 	  UPDATE_SYNTAX_TABLE_BACKWARD (next);
-	  next_c = FETCH_CHAR (next_byte);
+	  next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
 	  next_syntax = SYNTAX_WITH_FLAGS (next_c);
 	  if (((com2start || comnested)
 	       && SYNTAX_FLAGS_COMEND_SECOND (syntax)
@@ -838,29 +839,6 @@
 static Lisp_Object Vsyntax_code_object;
 
 
-/* Look up the value for CHARACTER in syntax table TABLE's parent
-   and its parents.  SYNTAX_ENTRY calls this, when TABLE itself has nil
-   for CHARACTER.  It's actually used only when not compiled with GCC.  */
-
-Lisp_Object
-syntax_parent_lookup (table, character)
-     Lisp_Object table;
-     int character;
-{
-  Lisp_Object value;
-
-  while (1)
-    {
-      table = XCHAR_TABLE (table)->parent;
-      if (NILP (table))
-	return Qnil;
-
-      value = XCHAR_TABLE (table)->contents[character];
-      if (!NILP (value))
-	return value;
-    }
-}
-
 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
        doc: /* Return the syntax code of CHARACTER, described by a character.
 For example, if CHARACTER is a word constituent,
@@ -979,6 +957,8 @@
        doc: /* Set syntax for character CHAR according to string NEWENTRY.
 The syntax is changed only for table SYNTAX_TABLE, which defaults to
  the current buffer's syntax table.
+CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
+in the range MIN and MAX are changed.
 The first character of NEWENTRY should be one of the following:
   Space or -  whitespace syntax.    w   word constituent.
   _           symbol constituent.   .   punctuation.
@@ -1015,14 +995,24 @@
      (c, newentry, syntax_table)
      Lisp_Object c, newentry, syntax_table;
 {
-  CHECK_NUMBER (c);
+  if (CONSP (c))
+    {
+      CHECK_CHARACTER_CAR (c);
+      CHECK_CHARACTER_CDR (c);
+    }
+  else
+    CHECK_CHARACTER (c);
 
   if (NILP (syntax_table))
     syntax_table = current_buffer->syntax_table;
   else
     check_syntax_table (syntax_table);
 
-  SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
+  newentry = Fstring_to_syntax (newentry);
+  if (CONSP (c))
+    SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
+  else
+    SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
   return Qnil;
 }
 
@@ -1176,6 +1166,10 @@
 
 int parse_sexp_ignore_comments;
 
+/* Char-table of functions that find the next or previous word
+   boundary.  */
+Lisp_Object Vfind_word_boundary_function_table;
+
 /* Return the position across COUNT words from FROM.
    If that many words cannot be found before the end of the buffer, return 0.
    COUNT negative means scan backward and stop at word beginning.  */
@@ -1189,6 +1183,7 @@
   register int from_byte = CHAR_TO_BYTE (from);
   register enum syntaxcode code;
   int ch0, ch1;
+  Lisp_Object func, script, pos;
 
   immediate_quit = 1;
   QUIT;
@@ -1205,7 +1200,7 @@
 	      return 0;
 	    }
 	  UPDATE_SYNTAX_TABLE_FORWARD (from);
-	  ch0 = FETCH_CHAR (from_byte);
+	  ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 	  code = SYNTAX (ch0);
 	  INC_BOTH (from, from_byte);
 	  if (words_include_escapes
@@ -1216,18 +1211,33 @@
 	}
       /* Now CH0 is a character which begins a word and FROM is the
          position of the next character.  */
-      while (1)
+      func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
+      if (! NILP (Ffboundp (func)))
+	{
+	  pos = call2 (func, make_number (from - 1), make_number (end));
+	  if (INTEGERP (pos) && XINT (pos) > from)
+	    {
+	      from = XINT (pos);
+	      from_byte = CHAR_TO_BYTE (from);
+	    }
+	}
+      else
 	{
-	  if (from == end) break;
-	  UPDATE_SYNTAX_TABLE_FORWARD (from);
-	  ch1 = FETCH_CHAR (from_byte);
-	  code = SYNTAX (ch1);
-	  if (!(words_include_escapes
-		&& (code == Sescape || code == Scharquote)))
-	    if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
-	      break;
-	  INC_BOTH (from, from_byte);
-	  ch0 = ch1;
+	  script = CHAR_TABLE_REF (Vchar_script_table, ch0);
+	  while (1)
+	    {
+	      if (from == end) break;
+	      UPDATE_SYNTAX_TABLE_FORWARD (from);
+	      ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
+	      code = SYNTAX (ch1);
+	      if ((code != Sword
+		   && (! words_include_escapes
+		       || (code != Sescape && code != Scharquote)))
+		  || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch1), script))
+		break;
+	      INC_BOTH (from, from_byte);
+	      ch0 = ch1;
+	    }
 	}
       count--;
     }
@@ -1242,7 +1252,7 @@
 	    }
 	  DEC_BOTH (from, from_byte);
 	  UPDATE_SYNTAX_TABLE_BACKWARD (from);
-	  ch1 = FETCH_CHAR (from_byte);
+	  ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 	  code = SYNTAX (ch1);
 	  if (words_include_escapes
 	      && (code == Sescape || code == Scharquote))
@@ -1252,22 +1262,37 @@
 	}
       /* Now CH1 is a character which ends a word and FROM is the
          position of it.  */
-      while (1)
+      func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
+      if (! NILP (Ffboundp (func)))
+ 	{
+	  pos = call2 (func, make_number (from), make_number (beg));
+	  if (INTEGERP (pos) && XINT (pos) < from)
+	    {
+	      from = XINT (pos);
+	      from_byte = CHAR_TO_BYTE (from);
+	    }
+	}
+      else
 	{
-	  int temp_byte;
-
-	  if (from == beg)
-	    break;
-	  temp_byte = dec_bytepos (from_byte);
-	  UPDATE_SYNTAX_TABLE_BACKWARD (from);
-	  ch0 = FETCH_CHAR (temp_byte);
-	  code = SYNTAX (ch0);
-	  if (!(words_include_escapes
-		&& (code == Sescape || code == Scharquote)))
-	    if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
-	      break;
-	  DEC_BOTH (from, from_byte);
-	  ch1 = ch0;
+	  script = CHAR_TABLE_REF (Vchar_script_table, ch1);
+	  while (1)
+	    {
+	      int temp_byte;
+
+	      if (from == beg)
+		break;
+	      temp_byte = dec_bytepos (from_byte);
+	      UPDATE_SYNTAX_TABLE_BACKWARD (from);
+	      ch0 = FETCH_CHAR_AS_MULTIBYTE (temp_byte);
+	      code = SYNTAX (ch0);
+	      if ((code != Sword
+		   && (! words_include_escapes
+		       || (code != Sescape && code != Scharquote)))
+		  || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch0), script))
+		break;
+	      DEC_BOTH (from, from_byte);
+	      ch1 = ch0;
+	    }
 	}
       count++;
     }
@@ -1316,7 +1341,7 @@
      (string, lim)
      Lisp_Object string, lim;
 {
-  return skip_chars (1, 0, string, lim);
+  return skip_chars (1, string, lim);
 }
 
 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
@@ -1326,7 +1351,7 @@
      (string, lim)
      Lisp_Object string, lim;
 {
-  return skip_chars (0, 0, string, lim);
+  return skip_chars (0, string, lim);
 }
 
 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
@@ -1338,7 +1363,7 @@
      (syntax, lim)
      Lisp_Object syntax, lim;
 {
-  return skip_chars (1, 1, syntax, lim);
+  return skip_syntaxes (1, syntax, lim);
 }
 
 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
@@ -1350,54 +1375,32 @@
      (syntax, lim)
      Lisp_Object syntax, lim;
 {
-  return skip_chars (0, 1, syntax, lim);
+  return skip_syntaxes (0, syntax, lim);
 }
 
 static Lisp_Object
-skip_chars (forwardp, syntaxp, string, lim)
-     int forwardp, syntaxp;
+skip_chars (forwardp, string, lim)
+     int forwardp;
      Lisp_Object string, lim;
 {
   register unsigned int c;
   unsigned char fastmap[0400];
-  /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
-     of which codes don't fit in FASTMAP.  In that case, set the
-     ranges of characters in CHAR_RANGES.  */
+  /* Store the ranges of non-ASCII characters.  */
   int *char_ranges;
   int n_char_ranges = 0;
   int negate = 0;
   register int i, i_byte;
-  int multibyte = !NILP (current_buffer->enable_multibyte_characters);
+  /* Set to 1 if the current buffer is multibyte and the region
+     contains non-ASCII chars.  */
+  int multibyte;
+  /* Set to 1 if STRING is multibyte and it contains non-ASCII
+     chars.  */
   int string_multibyte;
   int size_byte;
   const unsigned char *str;
   int len;
 
   CHECK_STRING (string);
-  char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
-  string_multibyte = STRING_MULTIBYTE (string);
-  str = SDATA (string);
-  size_byte = SBYTES (string);
-
-  /* Adjust the multibyteness of the string to that of the buffer.  */
-  if (multibyte != string_multibyte)
-    {
-      int nbytes;
-
-      if (multibyte)
-	nbytes = count_size_as_multibyte (SDATA (string),
-					  SCHARS (string));
-      else
-	nbytes = SCHARS (string);
-      if (nbytes != size_byte)
-	{
-	  unsigned char *tmp = (unsigned char *) alloca (nbytes);
-	  copy_text (SDATA (string), tmp, size_byte,
-		     string_multibyte, multibyte);
-	  size_byte = nbytes;
-	  str = tmp;
-	}
-    }
 
   if (NILP (lim))
     XSETINT (lim, forwardp ? ZV : BEGV);
@@ -1410,10 +1413,18 @@
   if (XINT (lim) < BEGV)
     XSETFASTINT (lim, BEGV);
 
+  multibyte = (!NILP (current_buffer->enable_multibyte_characters)
+	       && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
+  string_multibyte = SBYTES (string) > SCHARS (string);
+
   bzero (fastmap, sizeof fastmap);
+  if (multibyte)
+    char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
+
+  str = SDATA (string);
+  size_byte = SBYTES (string);
 
   i_byte = 0;
-
   if (i_byte < size_byte
       && SREF (string, 0) == '^')
     {
@@ -1421,25 +1432,26 @@
     }
 
   /* Find the characters specified and set their elements of fastmap.
-     If syntaxp, each character counts as itself.
-     Otherwise, handle backslashes and ranges specially.  */
-
-  while (i_byte < size_byte)
+     Handle backslashes and ranges specially.
+
+     If STRING contains non-ASCII characters, setup char_ranges for
+     them and use fastmap only for their leading codes.  */
+
+  if (! string_multibyte)
     {
-      c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
-      i_byte += len;
-
-      if (syntaxp)
-	fastmap[syntax_spec_code[c & 0377]] = 1;
-      else
+      int string_has_eight_bit = 0;
+
+      /* At first setup fastmap.  */
+      while (i_byte < size_byte)
 	{
+	  c = str[i_byte++];
+
 	  if (c == '\\')
 	    {
 	      if (i_byte == size_byte)
 		break;
 
-	      c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
-	      i_byte += len;
+	      c = str[i_byte++];
 	    }
 	  if (i_byte < size_byte
 	      && str[i_byte] == '-')
@@ -1453,49 +1465,346 @@
 		break;
 
 	      /* Get the end of the range.  */
-	      c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
-	      i_byte += len;
-
-	      if (SINGLE_BYTE_CHAR_P (c))
+	      c2 = str[i_byte++];
+	      if (c2 == '\\'
+		  && i_byte < size_byte)
+		c2 = str[i_byte++];
+
+	      if (c <= c2)
 		{
-		  if (! SINGLE_BYTE_CHAR_P (c2))
-		    {
-		      /* Handle a range starting with a character of
-			 less than 256, and ending with a character of
-			 not less than 256.  Split that into two
-			 ranges, the low one ending at 0377, and the
-			 high one starting at the smallest character
-			 in the charset of C2 and ending at C2.  */
-		      int charset = CHAR_CHARSET (c2);
-		      int c1 = MAKE_CHAR (charset, 0, 0);
-
-		      char_ranges[n_char_ranges++] = c1;
-		      char_ranges[n_char_ranges++] = c2;
-		      c2 = 0377;
-		    }
 		  while (c <= c2)
-		    {
-		      fastmap[c] = 1;
-		      c++;
-		    }
-		}
-	      else if (c <= c2)	/* Both C and C2 are multibyte char.  */
-		{
-		  char_ranges[n_char_ranges++] = c;
-		  char_ranges[n_char_ranges++] = c2;
+		    fastmap[c++] = 1;
+		  if (! ASCII_CHAR_P (c2))
+		    string_has_eight_bit = 1;
 		}
 	    }
 	  else
 	    {
-	      if (SINGLE_BYTE_CHAR_P (c))
+	      fastmap[c] = 1;
+	      if (! ASCII_CHAR_P (c))
+		string_has_eight_bit = 1;
+	    }
+	}
+
+      /* If the current range is multibyte and STRING contains
+	 eight-bit chars, arrange fastmap and setup char_ranges for
+	 the corresponding multibyte chars.  */
+      if (multibyte && string_has_eight_bit)
+	{
+	  unsigned char fastmap2[0400];
+	  int range_start_byte, range_start_char;
+
+	  bcopy (fastmap2 + 0200, fastmap + 0200, 0200);
+	  bzero (fastmap + 0200, 0200);
+	  /* We are sure that this loop stops.  */
+	  for (i = 0200; ! fastmap2[i]; i++);
+	  c = unibyte_char_to_multibyte (i);
+	  fastmap[CHAR_LEADING_CODE (c)] = 1;
+	  range_start_byte = i;
+	  range_start_char = c;
+	  for (i = 129; i < 0400; i++)
+	    {
+	      c = unibyte_char_to_multibyte (i);
+	      fastmap[CHAR_LEADING_CODE (c)] = 1;
+	      if (i - range_start_byte != c - range_start_char)
+		{
+		  char_ranges[n_char_ranges++] = range_start_char;
+		  char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
+						  + range_start_char);
+		  range_start_byte = i;
+		  range_start_char = c;
+		}
+	    }
+	  char_ranges[n_char_ranges++] = range_start_char;
+	  char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
+					  + range_start_char);
+	}
+    }
+  else
+    {
+      while (i_byte < size_byte)
+	{
+	  unsigned char leading_code;
+
+	  leading_code = str[i_byte];
+	  c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
+	  i_byte += len;
+
+	  if (c == '\\')
+	    {
+	      if (i_byte == size_byte)
+		break;
+
+	      leading_code = str[i_byte];
+	      c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
+	      i_byte += len;
+	    }
+	  if (i_byte < size_byte
+	      && str[i_byte] == '-')
+	    {
+	      unsigned int c2;
+	      unsigned char leading_code2;
+
+	      /* Skip over the dash.  */
+	      i_byte++;
+
+	      if (i_byte == size_byte)
+		break;
+
+	      /* Get the end of the range.  */
+	      leading_code2 = str[i_byte];
+	      c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
+	      i_byte += len;
+
+	      if (c2 == '\\'
+		  && i_byte < size_byte)
+		{
+		  leading_code2 = str[i_byte];
+		  c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
+		  i_byte += len;
+		}
+
+	      if (ASCII_CHAR_P (c))
+		{
+		  while (c <= c2 && c < 0x80)
+		    fastmap[c++] = 1;
+		  leading_code = CHAR_LEADING_CODE (c);
+		}
+	      if (! ASCII_CHAR_P (c))
+		{
+		  while (leading_code <= leading_code2)
+		    fastmap[leading_code++] = 1;
+		  if (c <= c2)
+		    {
+		      char_ranges[n_char_ranges++] = c;
+		      char_ranges[n_char_ranges++] = c2;
+		    }
+		}
+	    }
+	  else
+	    {
+	      if (ASCII_CHAR_P (c))
 		fastmap[c] = 1;
 	      else
 		{
+		  fastmap[leading_code] = 1;
 		  char_ranges[n_char_ranges++] = c;
 		  char_ranges[n_char_ranges++] = c;
 		}
 	    }
 	}
+
+      /* If the current range is unibyte and STRING contains non-ASCII
+	 chars, arrange fastmap for the corresponding unibyte
+	 chars.  */
+
+      if (! multibyte && n_char_ranges > 0)
+	{
+	  bzero (fastmap + 0200, 0200);
+	  for (i = 0; i < n_char_ranges; i += 2)
+	    {
+	      int c1 = char_ranges[i];
+	      int c2 = char_ranges[i + 1];
+
+	      for (; c1 <= c2; c1++)
+		fastmap[CHAR_TO_BYTE8 (c1)] = 1;
+	    }
+	}
+    }
+
+  /* If ^ was the first character, complement the fastmap.  */
+  if (negate)
+    {
+      if (! multibyte)
+	for (i = 0; i < sizeof fastmap; i++)
+	  fastmap[i] ^= 1;
+      else
+	{
+	  for (i = 0; i < 0200; i++)
+	    fastmap[i] ^= 1;
+	  /* All non-ASCII chars possibly match.  */
+	  for (; i < sizeof fastmap; i++)
+	    fastmap[i] = 1;
+	}
+    }
+
+  {
+    int start_point = PT;
+    int pos = PT;
+    int pos_byte = PT_BYTE;
+    unsigned char *p = PT_ADDR, *endp, *stop;
+
+    if (forwardp)
+      {
+ 	endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
+ 	stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
+      }
+    else
+      {
+ 	endp = CHAR_POS_ADDR (XINT (lim));
+ 	stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
+      }
+
+    immediate_quit = 1;
+    if (forwardp)
+      {
+	if (multibyte)
+	  while (1)
+	    {
+	      int nbytes;
+
+	      if (p >= stop)
+		{
+		  if (p >= endp)
+		    break;
+		  p = GAP_END_ADDR;
+		  stop = endp;
+		}
+	      if (! fastmap[*p])
+		break;
+	      c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+	      if (! ASCII_CHAR_P (c))
+		{
+		  /* As we are looking at a multibyte character, we
+		     must look up the character in the table
+		     CHAR_RANGES.  If there's no data in the table,
+		     that character is not what we want to skip.  */
+
+		  /* The following code do the right thing even if
+		     n_char_ranges is zero (i.e. no data in
+		     CHAR_RANGES).  */
+		  for (i = 0; i < n_char_ranges; i += 2)
+		    if (c >= char_ranges[i] && c <= char_ranges[i + 1])
+		      break;
+		  if (!(negate ^ (i < n_char_ranges)))
+		    break;
+		}
+	      p += nbytes, pos++, pos_byte += nbytes;
+	    }
+	else
+	  while (1)
+	    {
+	      if (p >= stop)
+		{
+		  if (p >= endp)
+		    break;
+		  p = GAP_END_ADDR;
+		  stop = endp;
+		}
+	      if (!fastmap[*p])
+		break;
+	      p++, pos++, pos_byte++;
+	    }
+      }
+    else
+      {
+	if (multibyte)
+	  while (1)
+	    {
+	      unsigned char *prev_p;
+
+	      if (p <= stop)
+		{
+		  if (p <= endp)
+		    break;
+		  p = GPT_ADDR;
+		  stop = endp;
+		}
+	      prev_p = p;
+	      while (--p >= stop && ! CHAR_HEAD_P (*p));
+	      if (! fastmap[*p])
+		break;
+	      c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
+	      if (! ASCII_CHAR_P (c))
+		{
+		  /* See the comment in the previous similar code.  */
+		  for (i = 0; i < n_char_ranges; i += 2)
+		    if (c >= char_ranges[i] && c <= char_ranges[i + 1])
+		      break;
+		  if (!(negate ^ (i < n_char_ranges)))
+		    break;
+		}
+	      pos--, pos_byte -= prev_p - p;
+	    }
+	else
+	  while (1)
+	    {
+	      if (p <= stop)
+		{
+		  if (p <= endp)
+		    break;
+		  p = GPT_ADDR;
+		  stop = endp;
+		}
+	      if (!fastmap[p[-1]])
+		break;
+	      p--, pos--, pos_byte--;
+	    }
+      }
+
+    SET_PT_BOTH (pos, pos_byte);
+    immediate_quit = 0;
+
+    return make_number (PT - start_point);
+  }
+}
+
+
+static Lisp_Object
+skip_syntaxes (forwardp, string, lim)
+     int forwardp;
+     Lisp_Object string, lim;
+{
+  register unsigned int c;
+  unsigned char fastmap[0400];
+  int negate = 0;
+  register int i, i_byte;
+  int multibyte;
+  int size_byte;
+  unsigned char *str;
+
+  CHECK_STRING (string);
+
+  if (NILP (lim))
+    XSETINT (lim, forwardp ? ZV : BEGV);
+  else
+    CHECK_NUMBER_COERCE_MARKER (lim);
+
+  /* In any case, don't allow scan outside bounds of buffer.  */
+  if (XINT (lim) > ZV)
+    XSETFASTINT (lim, ZV);
+  if (XINT (lim) < BEGV)
+    XSETFASTINT (lim, BEGV);
+
+  if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
+    return Qnil;
+
+  multibyte = (!NILP (current_buffer->enable_multibyte_characters)
+	       && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
+
+  bzero (fastmap, sizeof fastmap);
+
+  if (SBYTES (string) > SCHARS (string))
+    /* As this is very rare case (syntax spec is ASCII only), don't
+       consider efficiency.  */
+    string = string_make_unibyte (string);
+
+  str = SDATA (string);
+  size_byte = SBYTES (string);
+
+  i_byte = 0;
+  if (i_byte < size_byte
+      && SREF (string, 0) == '^')
+    {
+      negate = 1; i_byte++;
+    }
+
+  /* Find the syntaxes specified and set their elements of fastmap.  */
+
+  while (i_byte < size_byte)
+    {
+      c = str[i_byte++];
+      fastmap[syntax_spec_code[c]] = 1;
     }
 
   /* If ^ was the first character, complement the fastmap.  */
@@ -1512,224 +1821,99 @@
     if (forwardp)
       {
 	endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
-	stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp; 
+	stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
       }
     else
       {
 	endp = CHAR_POS_ADDR (XINT (lim));
-	stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp; 
+	stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
       }
 
     immediate_quit = 1;
-    if (syntaxp)
+    SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
+    if (forwardp)
       {
-        SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
-	if (forwardp)
+	if (multibyte)
 	  {
-	    if (multibyte)
-	      while (1)
-		{
-		  int nbytes;
-
-		  if (p >= stop)
-		    {
-		      if (p >= endp)
-			break;
-		      p = GAP_END_ADDR;
-		      stop = endp;
-		    }
-		  c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
-		  if (! fastmap[(int) SYNTAX (c)])
-		    break;
-		  p += nbytes, pos++, pos_byte += nbytes;
-		  UPDATE_SYNTAX_TABLE_FORWARD (pos);
-		}
-	    else
-	      while (1)
-		{
-		  if (p >= stop)
-		    {
-		      if (p >= endp)
-			break;
-		      p = GAP_END_ADDR;
-		      stop = endp;
-		    }
-		  if (! fastmap[(int) SYNTAX (*p)])
-		    break;
-		  p++, pos++;
-		  UPDATE_SYNTAX_TABLE_FORWARD (pos);
-		}
+	    while (1)
+	      {
+		int nbytes;
+
+		if (p >= stop)
+		  {
+		    if (p >= endp)
+		      break;
+		    p = GAP_END_ADDR;
+		    stop = endp;
+		  }
+		c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+		if (! fastmap[(int) SYNTAX (c)])
+		  break;
+		p += nbytes, pos++, pos_byte += nbytes;
+		UPDATE_SYNTAX_TABLE_FORWARD (pos);
+	      }
 	  }
 	else
 	  {
-	    if (multibyte)
-	      while (1)
-		{
-		  unsigned char *prev_p;
-		  int nbytes;
-
-		  if (p <= stop)
-		    {
-		      if (p <= endp)
-			break;
-		      p = GPT_ADDR;
-		      stop = endp;
-		    }
-		  prev_p = p;
-		  while (--p >= stop && ! CHAR_HEAD_P (*p));
-		  PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
-		  if (prev_p - p > nbytes)
-		    p = prev_p - 1, c = *p, nbytes = 1;
-		  else
-		    c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
-		  pos--, pos_byte -= nbytes;
-		  UPDATE_SYNTAX_TABLE_BACKWARD (pos);
-		  if (! fastmap[(int) SYNTAX (c)])
-		    {
-		      pos++;
-		      pos_byte += nbytes;
+	    while (1)
+	      {
+		if (p >= stop)
+		  {
+		    if (p >= endp)
 		      break;
-		    }
-		}
-	    else
-	      while (1)
-		{
-		  if (p <= stop)
-		    {
-		      if (p <= endp)
-			break;
-		      p = GPT_ADDR;
-		      stop = endp;
-		    }
-		  if (! fastmap[(int) SYNTAX (p[-1])])
-		    break;
-		  p--, pos--;
-		  UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
-		}
+		    p = GAP_END_ADDR;
+		    stop = endp;
+		  }
+		if (! fastmap[(int) SYNTAX (*p)])
+		  break;
+		p++, pos++, pos_byte++;
+		UPDATE_SYNTAX_TABLE_FORWARD (pos);
+	      }
 	  }
       }
     else
       {
-	if (forwardp)
+	if (multibyte)
 	  {
-	    if (multibyte)
-	      while (1)
-		{
-		  int nbytes;
-
-		  if (p >= stop)
-		    {
-		      if (p >= endp)
-			break;
-		      p = GAP_END_ADDR;
-		      stop = endp;
-		    }
-		  c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
-		  if (SINGLE_BYTE_CHAR_P (c))
-		    {
-		      if (!fastmap[c])
-			break;
-		    }
-		  else
-		    {
-		      /* If we are looking at a multibyte character,
-			 we must look up the character in the table
-			 CHAR_RANGES.  If there's no data in the
-			 table, that character is not what we want to
-			 skip.  */
-
-		      /* The following code do the right thing even if
-			 n_char_ranges is zero (i.e. no data in
-			 CHAR_RANGES).  */
-		      for (i = 0; i < n_char_ranges; i += 2)
-			if (c >= char_ranges[i] && c <= char_ranges[i + 1])
-			  break;
-		      if (!(negate ^ (i < n_char_ranges)))
-			break;
-		    }
-		  p += nbytes, pos++, pos_byte += nbytes;
-		}
-	    else
-	      while (1)
-		{
-		  if (p >= stop)
-		    {
-		      if (p >= endp)
-			break;
-		      p = GAP_END_ADDR;
-		      stop = endp;
-		    }
-		  if (!fastmap[*p])
-		    break;
-		  p++, pos++;
-		}
+	    while (1)
+	      {
+		unsigned char *prev_p;
+
+		if (p <= stop)
+		  {
+		    if (p <= endp)
+		      break;
+		    p = GPT_ADDR;
+		    stop = endp;
+		  }
+		prev_p = p;
+		while (--p >= stop && ! CHAR_HEAD_P (*p));
+		c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
+		if (! fastmap[(int) SYNTAX (c)])
+		  break;
+		pos--, pos_byte -= prev_p - p;
+		UPDATE_SYNTAX_TABLE_BACKWARD (pos);
+	      }
 	  }
 	else
 	  {
-	    if (multibyte)
-	      while (1)
-		{
-		  unsigned char *prev_p;
-		  int nbytes;
-
-		  if (p <= stop)
-		    {
-		      if (p <= endp)
-			break;
-		      p = GPT_ADDR;
-		      stop = endp;
-		    }
-		  prev_p = p;
-		  while (--p >= stop && ! CHAR_HEAD_P (*p));
-		  PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
-		  if (prev_p - p > nbytes)
-		    p = prev_p - 1, c = *p, nbytes = 1;
-		  else
-		    c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
-		  if (SINGLE_BYTE_CHAR_P (c))
-		    {
-		      if (!fastmap[c])
-			break;
-		    }
-		  else
-		    {
-		      /* See the comment in the previous similar code.  */
-		      for (i = 0; i < n_char_ranges; i += 2)
-			if (c >= char_ranges[i] && c <= char_ranges[i + 1])
-			  break;
-		      if (!(negate ^ (i < n_char_ranges)))
-			break;
-		    }
-		  pos--, pos_byte -= nbytes;
-		}
-	    else
-	      while (1)
-		{
-		  if (p <= stop)
-		    {
-		      if (p <= endp)
-			break;
-		      p = GPT_ADDR;
-		      stop = endp;
-		    }
-		  if (!fastmap[p[-1]])
-		    break;
-		  p--, pos--;
-		}
+	    while (1)
+	      {
+		if (p <= stop)
+		  {
+		    if (p <= endp)
+		      break;
+		    p = GPT_ADDR;
+		    stop = endp;
+		  }
+		if (! fastmap[(int) SYNTAX (p[-1])])
+		  break;
+		p--, pos--, pos_byte--;
+		UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
+	      }
 	  }
       }
 
-#if 0 /* Not needed now that a position in mid-character
-	 cannot be specified in Lisp.  */
-    if (multibyte
-	/* INC_POS or DEC_POS might have moved POS over LIM.  */
-	&& (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
-      pos = XINT (lim);
-#endif
-
-    if (! multibyte)
-      pos_byte = pos;
-
     SET_PT_BOTH (pos, pos_byte);
     immediate_quit = 0;
 
@@ -1788,7 +1972,7 @@
 	  *bytepos_ptr = from_byte;
 	  return 0;
 	}
-      c = FETCH_CHAR (from_byte);
+      c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
       syntax = SYNTAX_WITH_FLAGS (c);
       code = syntax & 0xff;
       if (code == Sendcomment
@@ -1818,7 +2002,7 @@
     forw_incomment:
       if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
 	  && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
-	  && (c1 = FETCH_CHAR (from_byte),
+	  && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
 	      SYNTAX_COMEND_SECOND (c1))
 	  && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
 	       SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
@@ -1837,7 +2021,7 @@
       if (nesting > 0
 	  && from < stop
 	  && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
-	  && (c1 = FETCH_CHAR (from_byte),
+	  && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
 	      SYNTAX_COMMENT_STYLE (c1) == style
 	      && SYNTAX_COMSTART_SECOND (c1))
 	  && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
@@ -1901,7 +2085,7 @@
 	      immediate_quit = 0;
 	      return Qnil;
 	    }
-	  c = FETCH_CHAR (from_byte);
+	  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 	  code = SYNTAX (c);
 	  comstart_first = SYNTAX_COMSTART_FIRST (c);
 	  comnested = SYNTAX_COMMENT_NESTED (c);
@@ -1909,7 +2093,7 @@
 	  INC_BOTH (from, from_byte);
 	  UPDATE_SYNTAX_TABLE_FORWARD (from);
 	  if (from < stop && comstart_first
-	      && (c1 = FETCH_CHAR (from_byte),
+	      && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
 		  SYNTAX_COMSTART_SECOND (c1)))
 	    {
 	      /* We have encountered a comment start sequence and we
@@ -1967,7 +2151,7 @@
 	  DEC_BOTH (from, from_byte);
 	  /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from).  */
 	  quoted = char_quoted (from, from_byte);
-	  c = FETCH_CHAR (from_byte);
+	  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 	  code = SYNTAX (c);
 	  comstyle = 0;
 	  comnested = SYNTAX_COMMENT_NESTED (c);
@@ -1984,7 +2168,7 @@
 	      code = Sendcomment;
 	      /* Calling char_quoted, above, set up global syntax position
 		 at the new value of FROM.  */
-	      c1 = FETCH_CHAR (from_byte);
+	      c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 	      comstyle = SYNTAX_COMMENT_STYLE (c1);
 	      comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
 	    }
@@ -2000,7 +2184,7 @@
 		  if (from == stop)
 		    break;
 		  UPDATE_SYNTAX_TABLE_BACKWARD (from);
-		  c = FETCH_CHAR (from_byte);
+		  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 		  if (SYNTAX (c) == Scomment_fence
 		      && !char_quoted (from, from_byte))
 		    {
@@ -2061,11 +2245,11 @@
   return Qt;
 }
 
-/* Return syntax code of character C if C is a single byte character
+/* Return syntax code of character C if C is an ASCII character
    or `multibyte_symbol_p' is zero.  Otherwise, return Ssymbol.  */
 
-#define SYNTAX_WITH_MULTIBYTE_CHECK(c)			\
-  ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p)	\
+#define SYNTAX_WITH_MULTIBYTE_CHECK(c)		\
+  ((ASCII_CHAR_P (c) || !multibyte_symbol_p)	\
    ? SYNTAX (c) : Ssymbol)
 
 static Lisp_Object
@@ -2108,7 +2292,7 @@
 	{
 	  int comstart_first, prefix;
 	  UPDATE_SYNTAX_TABLE_FORWARD (from);
-	  c = FETCH_CHAR (from_byte);
+	  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 	  code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
 	  comstart_first = SYNTAX_COMSTART_FIRST (c);
 	  comnested = SYNTAX_COMMENT_NESTED (c);
@@ -2119,7 +2303,7 @@
 	  INC_BOTH (from, from_byte);
 	  UPDATE_SYNTAX_TABLE_FORWARD (from);
 	  if (from < stop && comstart_first
-	      && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
+	      && SYNTAX_COMSTART_SECOND (FETCH_CHAR_AS_MULTIBYTE (from_byte))
 	      && parse_sexp_ignore_comments)
 	    {
 	      /* we have encountered a comment start sequence and we
@@ -2128,7 +2312,7 @@
 		 only a comment end of the same style actually ends
 		 the comment section */
 	      code = Scomment;
-	      c1 = FETCH_CHAR (from_byte);
+	      c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 	      comstyle = SYNTAX_COMMENT_STYLE (c1);
 	      comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
 	      INC_BOTH (from, from_byte);
@@ -2154,7 +2338,7 @@
 		  UPDATE_SYNTAX_TABLE_FORWARD (from);
 
 		  /* Some compilers can't handle this inside the switch.  */
-		  c = FETCH_CHAR (from_byte);
+		  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 		  temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
 		  switch (temp)
 		    {
@@ -2197,7 +2381,7 @@
 	    case Smath:
 	      if (!sexpflag)
 		break;
-	      if (from != stop && c == FETCH_CHAR (from_byte))
+	      if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
 		{
 		  INC_BOTH (from, from_byte);
 		}
@@ -2225,12 +2409,12 @@
 	    case Sstring:
 	    case Sstring_fence:
 	      temp_pos = dec_bytepos (from_byte);
-	      stringterm = FETCH_CHAR (temp_pos);
+	      stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
 	      while (1)
 		{
 		  if (from >= stop) goto lose;
 		  UPDATE_SYNTAX_TABLE_FORWARD (from);
-		  c = FETCH_CHAR (from_byte);
+		  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 		  if (code == Sstring
 		      ? (c == stringterm
 			 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
@@ -2273,7 +2457,7 @@
 	{
 	  DEC_BOTH (from, from_byte);
 	  UPDATE_SYNTAX_TABLE_BACKWARD (from);
-	  c = FETCH_CHAR (from_byte);
+	  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 	  code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
 	  if (depth == min_depth)
 	    last_good = from;
@@ -2291,7 +2475,7 @@
 	      DEC_BOTH (from, from_byte);
 	      UPDATE_SYNTAX_TABLE_BACKWARD (from);
 	      code = Sendcomment;
-	      c1 = FETCH_CHAR (from_byte);
+	      c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 	      comstyle = SYNTAX_COMMENT_STYLE (c1);
 	      comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
 	    }
@@ -2324,7 +2508,7 @@
 		  else
 		    temp_pos--;
 		  UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
-		  c1 = FETCH_CHAR (temp_pos);
+		  c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
 		  temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
 		  /* Don't allow comment-end to be quoted.  */
 		  if (temp_code == Sendcomment)
@@ -2336,7 +2520,7 @@
 		      temp_pos = dec_bytepos (temp_pos);
 		      UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
 		    }
-		  c1 = FETCH_CHAR (temp_pos);
+		  c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
 		  temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
 		  if (! (quoted || temp_code == Sword
 			 || temp_code == Ssymbol
@@ -2351,7 +2535,7 @@
 		break;
 	      temp_pos = dec_bytepos (from_byte);
 	      UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
-	      if (from != stop && c == FETCH_CHAR (temp_pos))
+	      if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
 		DEC_BOTH (from, from_byte);
 	      if (mathexit)
 		{
@@ -2397,7 +2581,7 @@
 		  if (from == stop) goto lose;
 		  UPDATE_SYNTAX_TABLE_BACKWARD (from);
 		  if (!char_quoted (from, from_byte)
-		      && (c = FETCH_CHAR (from_byte),
+		      && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
 			  SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
 		    break;
 		}
@@ -2405,7 +2589,7 @@
 	      break;
 
 	    case Sstring:
-	      stringterm = FETCH_CHAR (from_byte);
+	      stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 	      while (1)
 		{
 		  if (from == stop) goto lose;
@@ -2416,7 +2600,7 @@
 		    temp_pos--;
 		  UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
 		  if (!char_quoted (from - 1, temp_pos)
-		      && stringterm == (c = FETCH_CHAR (temp_pos))
+		      && stringterm == (c = FETCH_CHAR_AS_MULTIBYTE (temp_pos))
 		      && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
 		    break;
 		  DEC_BOTH (from, from_byte);
@@ -2524,7 +2708,7 @@
 
   while (!char_quoted (pos, pos_byte)
 	 /* Previous statement updates syntax table.  */
-	 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
+	 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
 	     || SYNTAX_PREFIX (c)))
     {
       opoint = pos;
@@ -2552,7 +2736,8 @@
 		    stopbefore, oldstate, commentstop)
      struct lisp_parse_state *stateptr;
      register int from;
-     int end, targetdepth, stopbefore, from_byte;
+     int from_byte;
+     int end, targetdepth, stopbefore;
      Lisp_Object oldstate;
      int commentstop;
 {
@@ -2590,7 +2775,7 @@
 do { prev_from = from;				\
      prev_from_byte = from_byte; 		\
      prev_from_syntax				\
-       = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
+       = SYNTAX_WITH_FLAGS (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte)); \
      INC_BOTH (from, from_byte);		\
      if (from < end)				\
        UPDATE_SYNTAX_TABLE_FORWARD (from);	\
@@ -2706,7 +2891,7 @@
 	}
      else if (from < end)
 	if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
-	  if (c1 = FETCH_CHAR (from_byte),
+	  if (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
 	      SYNTAX_COMSTART_SECOND (c1))
 	    /* Duplicate code to avoid a complex if-expression
 	       which causes trouble for the SGI compiler.  */
@@ -2744,7 +2929,7 @@
 	  while (from < end)
 	    {
 	      /* Some compilers can't handle this inside the switch.  */
-	      temp = SYNTAX (FETCH_CHAR (from_byte));
+	      temp = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (from_byte));
 	      switch (temp)
 		{
 		case Scharquote:
@@ -2817,7 +3002,7 @@
 	  if (stopbefore) goto stop;  /* this arg means stop at sexp start */
 	  curlevel->last = prev_from;
 	  state.instring = (code == Sstring
-			    ? (FETCH_CHAR (prev_from_byte))
+			    ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
 			    : ST_STRING_STYLE);
 	  if (boundary_stop) goto done;
 	startinstring:
@@ -2829,7 +3014,7 @@
 		int c;
 
 		if (from >= end) goto done;
-		c = FETCH_CHAR (from_byte);
+		c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 		/* Some compilers can't handle this inside the switch.  */
 		temp = SYNTAX (c);
 
@@ -3041,8 +3226,7 @@
 
   /* All multibyte characters have syntax `word' by default.  */
   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
-  for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
-    XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
+  char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
 }
 
 void
@@ -3082,6 +3266,25 @@
 	       doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun.  */);
   open_paren_in_column_0_is_defun_start = 1;
 
+
+  DEFVAR_LISP ("find-word-boundary-function-table",
+	       &Vfind_word_boundary_function_table,
+	       doc: /*
+Char table of functions to search for the word boundary.
+Each function is called with two arguments; POS and LIMIT.
+POS and LIMIT are character positions in the current buffer.
+
+If POS is less than LIMIT, POS is at the first character of a word,
+and the return value of a function is a position after the last
+character of that word.
+
+If POS is not less than LIMIT, POS is at the last character of a word,
+and the return value of a function is a position at the first
+character of that word.
+
+In both cases, LIMIT bounds the search. */);
+  Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
+
   defsubr (&Ssyntax_table_p);
   defsubr (&Ssyntax_table);
   defsubr (&Sstandard_syntax_table);