changeset 23876:8d2f38338c81

(search_buffer): Don't use Boyer-Moore to search for an invalid multibyte code. In unibyte case, no need to check whether there are translations in more than one charset; just set charset_base to 0.
author Kenichi Handa <handa@m17n.org>
date Tue, 15 Dec 1998 04:35:38 +0000
parents 4b12480a9cd7
children 2d62a1611751
files src/search.c
diffstat 1 files changed, 13 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/src/search.c	Tue Dec 15 04:35:38 1998 +0000
+++ b/src/search.c	Tue Dec 15 04:35:38 1998 +0000
@@ -1142,7 +1142,7 @@
       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
       unsigned char *base_pat = XSTRING (string)->data;
       int charset_base = -1;
-      int simple = 1;
+      int boyer_moore_ok = 1;
 
       /* MULTIBYTE says whether the text to be searched is multibyte.
 	 We must convert PATTERN to match that, or we will not really
@@ -1204,6 +1204,12 @@
 		}
 
 	      c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
+
+	      /* If we are searching for something strange,
+		 an invalid multibyte code, don't use boyer-moore.  */
+	      if (! ASCII_BYTE_P (c))
+		boyer_moore_ok = 0;
+
 	      /* Translate the character, if requested.  */
 	      TRANSLATE (translated, trt, c);
 	      /* If translation changed the byte-length, go back
@@ -1229,8 +1235,8 @@
 		  else if (charset_base != charset_base_code)
 		    /* If two different rows appear, needing translation,
 		       then we cannot use boyer_moore search.  */
-		    simple = 0;
-		    /* ??? Handa: this must do simple = 0
+		    boyer_moore_ok = 0;
+		    /* ??? Handa: this must do boyer_moore_ok = 0
 		       if c is a composite character.  */
 		}
 
@@ -1243,9 +1249,11 @@
 	}
       else
 	{
+	  /* Unibyte buffer.  */
+	  charset_base = 0;
 	  while (--len >= 0)
 	    {
-	      int c, translated, inverse;
+	      int c, translated;
 
 	      /* If we got here and the RE flag is set, it's because we're
 		 dealing with a regexp known to be trivial, so the backslash
@@ -1257,22 +1265,6 @@
 		}
 	      c = *base_pat++;
 	      TRANSLATE (translated, trt, c);
-	      TRANSLATE (inverse, inverse_trt, c);
-
-	      /* Did this char actually get translated?
-		 Would any other char get translated into it?  */
-	      if (translated != c || inverse != c)
-		{
-		  /* Keep track of which character set row
-		     contains the characters that need translation.  */
-		  int charset_base_code = c & ~0xff;
-		  if (charset_base == -1)
-		    charset_base = charset_base_code;
-		  else if (charset_base != charset_base_code)
-		    /* If two different rows appear, needing translation,
-		       then we cannot use boyer_moore search.  */
-		    simple = 0;
-		}
 	      *pat++ = translated;
 	    }
 	}
@@ -1281,7 +1273,7 @@
       len = raw_pattern_size;
       pat = base_pat = patbuf;
 
-      if (simple)
+      if (boyer_moore_ok)
 	return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
 			    pos, pos_byte, lim, lim_byte,
 			    charset_base);