changeset 21117:a88d2c555a06

(simple_search): Don't count a character until it matches! Call set_search_regs differently in a forward search. (boyer_moore): Fix up the code that translates the pattern and loops thru equivalent characters.
author Richard M. Stallman <rms@gnu.org>
date Mon, 09 Mar 1998 00:25:30 +0000
parents ccf251830c66
children 2aa0dc203036
files src/search.c
diffstat 1 files changed, 62 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/src/search.c	Sun Mar 08 23:40:28 1998 +0000
+++ b/src/search.c	Mon Mar 09 00:25:30 1998 +0000
@@ -1304,6 +1304,7 @@
      int lim, lim_byte;
 {
   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+  int forward = n > 0;
 
   if (lim > pos && multibyte)
     while (n > 0)
@@ -1322,22 +1323,23 @@
 	    while (this_len > 0)
 	      {
 		int charlen, buf_charlen;
-		int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
-		int buf_ch;
+		int pat_ch, buf_ch;
+
+		pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
+		buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
+						 ZV_BYTE - this_pos_byte,
+						 buf_charlen);
+		TRANSLATE (buf_ch, trt, buf_ch);
+
+		if (buf_ch != pat_ch)
+		  break;
 
 		this_len_byte -= charlen;
 		this_len--;
 		p += charlen;
 
-		buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
-						 ZV_BYTE - this_pos_byte,
-						 buf_charlen);
 		this_pos_byte += buf_charlen;
 		this_pos++;
-		TRANSLATE (buf_ch, trt, buf_ch);
-
-		if (buf_ch != pat_ch)
-		  break;
 	      }
 
 	    if (this_len == 0)
@@ -1369,12 +1371,13 @@
 	      {
 		int pat_ch = *p++;
 		int buf_ch = FETCH_BYTE (this_pos);
-		this_len--;
-		this_pos++;
 		TRANSLATE (buf_ch, trt, buf_ch);
 
 		if (buf_ch != pat_ch)
 		  break;
+
+		this_len--;
+		this_pos++;
 	      }
 
 	    if (this_len == 0)
@@ -1407,22 +1410,22 @@
 	    while (this_len > 0)
 	      {
 		int charlen, buf_charlen;
-		int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
-		int buf_ch;
+		int pat_ch, buf_ch;
+
+		pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
+		buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
+						 ZV_BYTE - this_pos_byte,
+						 buf_charlen);
+		TRANSLATE (buf_ch, trt, buf_ch);
+
+		if (buf_ch != pat_ch)
+		  break;
 
 		this_len_byte -= charlen;
 		this_len--;
 		p += charlen;
-
-		buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
-						 ZV_BYTE - this_pos_byte,
-						 buf_charlen);
 		this_pos_byte += buf_charlen;
 		this_pos++;
-		TRANSLATE (buf_ch, trt, buf_ch);
-
-		if (buf_ch != pat_ch)
-		  break;
 	      }
 
 	    if (this_len == 0)
@@ -1454,12 +1457,12 @@
 	      {
 		int pat_ch = *p++;
 		int buf_ch = FETCH_BYTE (this_pos);
-		this_len--;
-		this_pos++;
 		TRANSLATE (buf_ch, trt, buf_ch);
 
 		if (buf_ch != pat_ch)
 		  break;
+		this_len--;
+		this_pos++;
 	      }
 
 	    if (this_len == 0)
@@ -1477,7 +1480,10 @@
  stop:
   if (n == 0)
     {
-      set_search_regs (multibyte ? pos_byte : pos, len_byte);
+      if (forward)
+	set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
+      else
+	set_search_regs (multibyte ? pos_byte : pos, len_byte);
 
       return pos;
     }
@@ -1605,9 +1611,9 @@
 	      while (! CHAR_HEAD_P (*charstart))
 		charstart--;
 	      untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
-	      TRANSLATE (ch, trt, untranslated);
-	      if (charset_base == (ch & ~0xff))
+	      if (charset_base == (untranslated & ~0xff))
 		{
+		  TRANSLATE (ch, trt, untranslated);
 		  if (! CHAR_HEAD_P (*ptr))
 		    {
 		      translate_prev_byte = ptr[-1];
@@ -1616,7 +1622,10 @@
 		    }
 		}
 	      else
-		this_translated = 0;
+		{
+		  this_translated = 0;
+		  ch = *ptr;
+		}
 	    }
 	  else if (!multibyte)
 	    TRANSLATE (ch, trt, *ptr);
@@ -1626,23 +1635,38 @@
 	      this_translated = 0;
 	    }
 
-	  k = j = (unsigned char) ch;
+	  if (ch > 0400)
+	    j = ((unsigned char) ch) | 0200;
+	  else
+	    j = (unsigned char) ch;
+
 	  if (i == infinity)
 	    stride_for_teases = BM_tab[j];
+
 	  BM_tab[j] = dirlen - i;
 	  /* A translation table is accompanied by its inverse -- see */
 	  /* comment following downcase_table for details */ 
 	  if (this_translated)
-	    while (1)
-	      {
-		TRANSLATE (ch, inverse_trt, ch);
-		/* For all the characters that map into K,
-		   set up simple_translate to map them into K.  */
-		simple_translate[(unsigned char) ch] = k;
-		if ((unsigned char) ch == k)
-		  break;
-		BM_tab[(unsigned char) ch] = dirlen - i;
-	      }
+	    {
+	      int starting_ch = ch;
+	      int starting_j = j;
+	      while (1)
+		{
+		  TRANSLATE (ch, inverse_trt, ch);
+		  if (ch > 0400)
+		    j = ((unsigned char) ch) | 0200;
+		  else
+		    j = (unsigned char) ch;
+
+		  /* For all the characters that map into CH,
+		     set up simple_translate to map the last byte
+		     into STARTING_J.  */
+		  simple_translate[j] = starting_j;
+		  if (ch == starting_ch)
+		    break;
+		  BM_tab[j] = dirlen - i;
+		}
+	    }
 	}
       else
 	{