Mercurial > emacs

diff src/search.c @ 603:470f556a9453
Initial revision
author: Jim Blandy <jimb@redhat.com>
date: Wed, 01 Apr 1992 10:45:51 +0000
children: 5f36058e31f9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/search.c	Wed Apr 01 10:45:51 1992 +0000
@@ -0,0 +1,1294 @@
+/* String search routines for GNU Emacs.
+   Copyright (C) 1985, 1986, 1987, 1992 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+
+#include "config.h"
+#include "lisp.h"
+#include "syntax.h"
+#include "buffer.h"
+#include "commands.h"
+#include <sys/types.h>
+#include "regex.h"
+
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+/* We compile regexps into this buffer and then use it for searching. */
+
+struct re_pattern_buffer searchbuf;
+
+char search_fastmap[0400];
+
+/* Last regexp we compiled */
+
+Lisp_Object last_regexp;
+
+/* Every call to re_match, etc., must pass &search_regs as the regs argument
+ unless you can show it is unnecessary (i.e., if re_match is certainly going
+ to be called again before region-around-match can be called).  */
+
+static struct re_registers search_regs;
+
+/* Nonzero if search_regs are indices in a string; 0 if in a buffer.  */
+
+static int search_regs_from_string;
+
+/* error condition signalled when regexp compile_pattern fails */
+
+Lisp_Object Qinvalid_regexp;
+
+static void
+matcher_overflow ()
+{
+  error ("Stack overflow in regexp matcher");
+}
+
+#ifdef __STDC__
+#define CONST const
+#else
+#define CONST
+#endif
+
+/* Compile a regexp and signal a Lisp error if anything goes wrong.  */
+
+compile_pattern (pattern, bufp, translate)
+     Lisp_Object pattern;
+     struct re_pattern_buffer *bufp;
+     char *translate;
+{
+  CONST char *val;
+  Lisp_Object dummy;
+
+  if (EQ (pattern, last_regexp)
+      && translate == bufp->translate)
+    return;
+  last_regexp = Qnil;
+  bufp->translate = translate;
+  val = re_compile_pattern ((char *) XSTRING (pattern)->data,
+			    XSTRING (pattern)->size,
+			    bufp);
+  if (val)
+    {
+      dummy = build_string (val);
+      while (1)
+	Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
+    }
+  last_regexp = pattern;
+  return;
+}
+
+/* Error condition used for failing searches */
+Lisp_Object Qsearch_failed;
+
+Lisp_Object
+signal_failure (arg)
+     Lisp_Object arg;
+{
+  Fsignal (Qsearch_failed, Fcons (arg, Qnil));
+  return Qnil;
+}
+
+DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
+  "Return t if text after point matches regular expression PAT.")
+  (string)
+     Lisp_Object string;
+{
+  Lisp_Object val;
+  unsigned char *p1, *p2;
+  int s1, s2;
+  register int i;
+
+  CHECK_STRING (string, 0);
+  compile_pattern (string, &searchbuf,
+		   !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
+
+  immediate_quit = 1;
+  QUIT;			/* Do a pending quit right away, to avoid paradoxical behavior */
+
+  /* Get pointers and sizes of the two strings
+     that make up the visible portion of the buffer. */
+
+  p1 = BEGV_ADDR;
+  s1 = GPT - BEGV;
+  p2 = GAP_END_ADDR;
+  s2 = ZV - GPT;
+  if (s1 < 0)
+    {
+      p2 = p1;
+      s2 = ZV - BEGV;
+      s1 = 0;
+    }
+  if (s2 < 0)
+    {
+      s1 = ZV - BEGV;
+      s2 = 0;
+    }
+  
+  i = re_match_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
+		  point - BEGV, &search_regs,
+		  ZV - BEGV);
+  if (i == -2)
+    matcher_overflow ();
+
+  val = (0 <= i ? Qt : Qnil);
+  for (i = 0; i < RE_NREGS; i++)
+    if (search_regs.start[i] >= 0)
+      {
+	search_regs.start[i] += BEGV;
+	search_regs.end[i] += BEGV;
+      }
+  search_regs_from_string = 0;
+  immediate_quit = 0;
+  return val;
+}
+
+DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
+  "Return index of start of first match for REGEXP in STRING, or nil.\n\
+If third arg START is non-nil, start search at that index in STRING.\n\
+For index of first char beyond the match, do (match-end 0).\n\
+`match-end' and `match-beginning' also give indices of substrings\n\
+matched by parenthesis constructs in the pattern.")
+  (regexp, string, start)
+     Lisp_Object regexp, string, start;
+{
+  int val;
+  int s;
+
+  CHECK_STRING (regexp, 0);
+  CHECK_STRING (string, 1);
+
+  if (NILP (start))
+    s = 0;
+  else
+    {
+      int len = XSTRING (string)->size;
+
+      CHECK_NUMBER (start, 2);
+      s = XINT (start);
+      if (s < 0 && -s <= len)
+	s = len - s;
+      else if (0 > s || s > len)
+	args_out_of_range (string, start);
+    }
+
+  compile_pattern (regexp, &searchbuf,
+		   !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
+  immediate_quit = 1;
+  val = re_search (&searchbuf, (char *) XSTRING (string)->data,
+		   XSTRING (string)->size, s, XSTRING (string)->size - s,
+		   &search_regs);
+  immediate_quit = 0;
+  search_regs_from_string = 1;
+  if (val == -2)
+    matcher_overflow ();
+  if (val < 0) return Qnil;
+  return make_number (val);
+}
+
+scan_buffer (target, pos, cnt, shortage)
+     int *shortage, pos;
+     register int cnt, target;
+{
+  int lim = ((cnt > 0) ? ZV - 1 : BEGV);
+  int direction = ((cnt > 0) ? 1 : -1);
+  register int lim0;
+  unsigned char *base;
+  register unsigned char *cursor, *limit;
+
+  if (shortage != 0)
+    *shortage = 0;
+
+  immediate_quit = 1;
+
+  if (cnt > 0)
+    while (pos != lim + 1)
+      {
+	lim0 =  BUFFER_CEILING_OF (pos);
+	lim0 = min (lim, lim0);
+	limit = &FETCH_CHAR (lim0) + 1;
+	base = (cursor = &FETCH_CHAR (pos));
+	while (1)
+	  {
+	    while (*cursor != target && ++cursor != limit)
+	      ;
+	    if (cursor != limit)
+	      {
+		if (--cnt == 0)
+		  {
+		    immediate_quit = 0;
+		    return (pos + cursor - base + 1);
+		  }
+		else
+		  if (++cursor == limit)
+		    break;
+	      }
+	    else
+	      break;
+	  }
+	pos += cursor - base;
+      }
+  else
+    {
+      pos--;			/* first character we scan */
+      while (pos > lim - 1)
+	{			/* we WILL scan under pos */
+	  lim0 =  BUFFER_FLOOR_OF (pos);
+	  lim0 = max (lim, lim0);
+	  limit = &FETCH_CHAR (lim0) - 1;
+	  base = (cursor = &FETCH_CHAR (pos));
+	  cursor++;
+	  while (1)
+	    {
+	      while (--cursor != limit && *cursor != target)
+		;
+	      if (cursor != limit)
+		{
+		  if (++cnt == 0)
+		    {
+		      immediate_quit = 0;
+		      return (pos + cursor - base + 1);
+		    }
+		}
+	      else
+		break;
+	    }
+	  pos += cursor - base;
+	}
+    }
+  immediate_quit = 0;
+  if (shortage != 0)
+    *shortage = cnt * direction;
+  return (pos + ((direction == 1 ? 0 : 1)));
+}
+
+int
+find_next_newline (from, cnt)
+     register int from, cnt;
+{
+  return (scan_buffer ('\n', from, cnt, (int *) 0));
+}
+
+DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
+  "Move point forward, stopping before a char not in CHARS, or at position LIM.\n\
+CHARS is like the inside of a `[...]' in a regular expression\n\
+except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
+Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
+With arg \"^a-zA-Z\", skips nonletters stopping before first letter.")
+  (string, lim)
+     Lisp_Object string, lim;
+{
+  skip_chars (1, string, lim);
+  return Qnil;
+}
+
+DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
+  "Move point backward, stopping after a char not in CHARS, or at position LIM.\n\
+See `skip-chars-forward' for details.")
+  (string, lim)
+     Lisp_Object string, lim;
+{
+  skip_chars (0, string, lim);
+  return Qnil;
+}
+
+skip_chars (forwardp, string, lim)
+     int forwardp;
+     Lisp_Object string, lim;
+{
+  register unsigned char *p, *pend;
+  register unsigned char c;
+  unsigned char fastmap[0400];
+  int negate = 0;
+  register int i;
+
+  CHECK_STRING (string, 0);
+
+  if (NILP (lim))
+    XSET (lim, Lisp_Int, forwardp ? ZV : BEGV);
+  else
+    CHECK_NUMBER_COERCE_MARKER (lim, 1);
+
+#if 0				/* This breaks some things... jla. */
+  /* In any case, don't allow scan outside bounds of buffer.  */
+  if (XFASTINT (lim) > ZV)
+    XFASTINT (lim) = ZV;
+  if (XFASTINT (lim) < BEGV)
+    XFASTINT (lim) = BEGV;
+#endif
+
+  p = XSTRING (string)->data;
+  pend = p + XSTRING (string)->size;
+  bzero (fastmap, sizeof fastmap);
+
+  if (p != pend && *p == '^')
+    {
+      negate = 1; p++;
+    }
+
+  /* Find the characters specified and set their elements of fastmap.  */
+
+  while (p != pend)
+    {
+      c = *p++;
+      if (c == '\\')
+        {
+	  if (p == pend) break;
+	  c = *p++;
+	}
+      if (p != pend && *p == '-')
+	{
+	  p++;
+	  if (p == pend) break;
+	  while (c <= *p)
+	    {
+	      fastmap[c] = 1;
+	      c++;
+	    }
+	  p++;
+	}
+      else
+	fastmap[c] = 1;
+    }
+
+  /* If ^ was the first character, complement the fastmap. */
+
+  if (negate)
+    for (i = 0; i < sizeof fastmap; i++)
+      fastmap[i] ^= 1;
+
+  immediate_quit = 1;
+  if (forwardp)
+    {
+      while (point < XINT (lim) && fastmap[FETCH_CHAR (point)])
+	SET_PT (point + 1);
+    }
+  else
+    {
+      while (point > XINT (lim) && fastmap[FETCH_CHAR (point - 1)])
+	SET_PT (point - 1);
+    }
+  immediate_quit = 0;
+}
+
+/* Subroutines of Lisp buffer search functions. */
+
+static Lisp_Object
+search_command (string, bound, noerror, count, direction, RE)
+     Lisp_Object string, bound, noerror, count;
+     int direction;
+     int RE;
+{
+  register int np;
+  int lim;
+  int n = direction;
+
+  if (!NILP (count))
+    {
+      CHECK_NUMBER (count, 3);
+      n *= XINT (count);
+    }
+
+  CHECK_STRING (string, 0);
+  if (NILP (bound))
+    lim = n > 0 ? ZV : BEGV;
+  else
+    {
+      CHECK_NUMBER_COERCE_MARKER (bound, 1);
+      lim = XINT (bound);
+      if (n > 0 ? lim < point : lim > point)
+	error ("Invalid search bound (wrong side of point)");
+      if (lim > ZV)
+	lim = ZV;
+      if (lim < BEGV)
+	lim = BEGV;
+    }
+
+  np = search_buffer (string, point, lim, n, RE,
+		      (!NILP (current_buffer->case_fold_search)
+		       ? XSTRING (current_buffer->case_canon_table)->data : 0),
+		      (!NILP (current_buffer->case_fold_search)
+		       ? XSTRING (current_buffer->case_eqv_table)->data : 0));
+  if (np <= 0)
+    {
+      if (NILP (noerror))
+	return signal_failure (string);
+      if (!EQ (noerror, Qt))
+	{
+	  if (lim < BEGV || lim > ZV)
+	    abort ();
+	  SET_PT (lim);
+	}
+      return Qnil;
+    }
+
+  if (np < BEGV || np > ZV)
+    abort ();
+
+  SET_PT (np);
+
+  return make_number (np);
+}
+
+/* search for the n'th occurrence of STRING in the current buffer,
+   starting at position POS and stopping at position LIM,
+   treating PAT as a literal string if RE is false or as
+   a regular expression if RE is true.
+
+   If N is positive, searching is forward and LIM must be greater than POS.
+   If N is negative, searching is backward and LIM must be less than POS.
+
+   Returns -x if only N-x occurrences found (x > 0),
+   or else the position at the beginning of the Nth occurrence
+   (if searching backward) or the end (if searching forward).  */
+
+search_buffer (string, pos, lim, n, RE, trt, inverse_trt)
+     Lisp_Object string;
+     int pos;
+     int lim;
+     int n;
+     int RE;
+     register unsigned char *trt;
+     register unsigned char *inverse_trt;
+{
+  int len = XSTRING (string)->size;
+  unsigned char *base_pat = XSTRING (string)->data;
+  register int *BM_tab;
+  int *BM_tab_base;
+  register int direction = ((n > 0) ? 1 : -1);
+  register int dirlen;
+  int infinity, limit, k, stride_for_teases;
+  register unsigned char *pat, *cursor, *p_limit;  
+  register int i, j;
+  unsigned char *p1, *p2;
+  int s1, s2;
+
+  /* Null string is found at starting position.  */
+  if (!len)
+    return pos;
+
+  if (RE)
+    compile_pattern (string, &searchbuf, (char *) trt);
+  
+  if (RE			/* Here we detect whether the */
+				/* generality of an RE search is */
+				/* really needed. */
+      /* first item is "exact match" */
+      && *(searchbuf.buffer) == RE_EXACTN_VALUE
+      && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
+    {
+      RE = 0;			/* can do straight (non RE) search */
+      pat = (base_pat = (unsigned char *) searchbuf.buffer + 2);
+				/* trt already applied */
+      len = searchbuf.used - 2;
+    }
+  else if (!RE)
+    {
+      pat = (unsigned char *) alloca (len);
+
+      for (i = len; i--;)		/* Copy the pattern; apply trt */
+	*pat++ = (((int) trt) ? trt [*base_pat++] : *base_pat++);
+      pat -= len; base_pat = pat;
+    }
+
+  if (RE)
+    {
+      immediate_quit = 1;	/* Quit immediately if user types ^G,
+				   because letting this function finish
+				   can take too long. */
+      QUIT;			/* Do a pending quit right away,
+				   to avoid paradoxical behavior */
+      /* Get pointers and sizes of the two strings
+	 that make up the visible portion of the buffer. */
+
+      p1 = BEGV_ADDR;
+      s1 = GPT - BEGV;
+      p2 = GAP_END_ADDR;
+      s2 = ZV - GPT;
+      if (s1 < 0)
+	{
+	  p2 = p1;
+	  s2 = ZV - BEGV;
+	  s1 = 0;
+	}
+      if (s2 < 0)
+	{
+	  s1 = ZV - BEGV;
+	  s2 = 0;
+	}
+      while (n < 0)
+	{
+	  int val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
+				 pos - BEGV, lim - pos, &search_regs,
+				 /* Don't allow match past current point */
+				 pos - BEGV);
+	  if (val == -2)
+	    matcher_overflow ();
+	  if (val >= 0)
+	    {
+	      j = BEGV;
+	      for (i = 0; i < RE_NREGS; i++)
+		if (search_regs.start[i] >= 0)
+		  {
+		    search_regs.start[i] += j;
+		    search_regs.end[i] += j;
+		  }
+	      search_regs_from_string = 0;
+	      /* Set pos to the new position. */
+	      pos = search_regs.start[0];
+	    }
+	  else
+	    {
+	      immediate_quit = 0;
+	      return (n);
+	    }
+	  n++;
+	}
+      while (n > 0)
+	{
+	  int val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
+				 pos - BEGV, lim - pos, &search_regs,
+				 lim - BEGV);
+	  if (val == -2)
+	    matcher_overflow ();
+	  if (val >= 0)
+	    {
+	      j = BEGV;
+	      for (i = 0; i < RE_NREGS; i++)
+		if (search_regs.start[i] >= 0)
+		  {
+		    search_regs.start[i] += j;
+		    search_regs.end[i] += j;
+		  }
+	      search_regs_from_string = 0;
+	      pos = search_regs.end[0];
+	    }
+	  else
+	    {
+	      immediate_quit = 0;
+	      return (0 - n);
+	    }
+	  n--;
+	}
+      immediate_quit = 0;
+      return (pos);
+    }
+  else				/* non-RE case */
+    {
+#ifdef C_ALLOCA
+      int BM_tab_space[0400];
+      BM_tab = &BM_tab_space[0];
+#else
+      BM_tab = (int *) alloca (0400 * sizeof (int));
+#endif
+      /* The general approach is that we are going to maintain that we know */
+      /* the first (closest to the present position, in whatever direction */
+      /* we're searching) character that could possibly be the last */
+      /* (furthest from present position) character of a valid match.  We */
+      /* advance the state of our knowledge by looking at that character */
+      /* and seeing whether it indeed matches the last character of the */
+      /* pattern.  If it does, we take a closer look.  If it does not, we */
+      /* move our pointer (to putative last characters) as far as is */
+      /* logically possible.  This amount of movement, which I call a */
+      /* stride, will be the length of the pattern if the actual character */
+      /* appears nowhere in the pattern, otherwise it will be the distance */
+      /* from the last occurrence of that character to the end of the */
+      /* pattern. */
+      /* As a coding trick, an enormous stride is coded into the table for */
+      /* characters that match the last character.  This allows use of only */
+      /* a single test, a test for having gone past the end of the */
+      /* permissible match region, to test for both possible matches (when */
+      /* the stride goes past the end immediately) and failure to */
+      /* match (where you get nudged past the end one stride at a time). */ 
+
+      /* Here we make a "mickey mouse" BM table.  The stride of the search */
+      /* is determined only by the last character of the putative match. */
+      /* If that character does not match, we will stride the proper */
+      /* distance to propose a match that superimposes it on the last */
+      /* instance of a character that matches it (per trt), or misses */
+      /* it entirely if there is none. */  
+
+      dirlen = len * direction;
+      infinity = dirlen - (lim + pos + len + len) * direction;
+      if (direction < 0)
+	pat = (base_pat += len - 1);
+      BM_tab_base = BM_tab;
+      BM_tab += 0400;
+      j = dirlen;		/* to get it in a register */
+      /* A character that does not appear in the pattern induces a */
+      /* stride equal to the pattern length. */
+      while (BM_tab_base != BM_tab)
+	{
+	  *--BM_tab = j;
+	  *--BM_tab = j;
+	  *--BM_tab = j;
+	  *--BM_tab = j;
+	}
+      i = 0;
+      while (i != infinity)
+	{
+	  j = pat[i]; i += direction;
+	  if (i == dirlen) i = infinity;
+	  if ((int) trt)
+	    {
+	      k = (j = trt[j]);
+	      if (i == infinity)
+		stride_for_teases = BM_tab[j];
+	      BM_tab[j] = dirlen - i;
+	      /* A translation table is accompanied by its inverse -- see */
+	      /* comment following downcase_table for details */ 
+	      while ((j = inverse_trt[j]) != k)
+		BM_tab[j] = dirlen - i;
+	    }
+	  else
+	    {
+	      if (i == infinity)
+		stride_for_teases = BM_tab[j];
+	      BM_tab[j] = dirlen - i;
+	    }
+	  /* stride_for_teases tells how much to stride if we get a */
+	  /* match on the far character but are subsequently */
+	  /* disappointed, by recording what the stride would have been */
+	  /* for that character if the last character had been */
+	  /* different. */
+	}
+      infinity = dirlen - infinity;
+      pos += dirlen - ((direction > 0) ? direction : 0);
+      /* loop invariant - pos points at where last char (first char if reverse)
+	 of pattern would align in a possible match.  */
+      while (n != 0)
+	{
+	  if ((lim - pos - (direction > 0)) * direction < 0)
+	    return (n * (0 - direction));
+	  /* First we do the part we can by pointers (maybe nothing) */
+	  QUIT;
+	  pat = base_pat;
+	  limit = pos - dirlen + direction;
+	  limit = ((direction > 0)
+		   ? BUFFER_CEILING_OF (limit)
+		   : BUFFER_FLOOR_OF (limit));
+	  /* LIMIT is now the last (not beyond-last!) value
+	     POS can take on without hitting edge of buffer or the gap.  */
+	  limit = ((direction > 0)
+		   ? min (lim - 1, min (limit, pos + 20000))
+		   : max (lim, max (limit, pos - 20000)));
+	  if ((limit - pos) * direction > 20)
+	    {
+	      p_limit = &FETCH_CHAR (limit);
+	      p2 = (cursor = &FETCH_CHAR (pos));
+	      /* In this loop, pos + cursor - p2 is the surrogate for pos */
+	      while (1)		/* use one cursor setting as long as i can */
+		{
+		  if (direction > 0) /* worth duplicating */
+		    {
+		      /* Use signed comparison if appropriate
+			 to make cursor+infinity sure to be > p_limit.
+			 Assuming that the buffer lies in a range of addresses
+			 that are all "positive" (as ints) or all "negative",
+			 either kind of comparison will work as long
+			 as we don't step by infinity.  So pick the kind
+			 that works when we do step by infinity.  */
+		      if ((int) (p_limit + infinity) > (int) p_limit)
+			while ((int) cursor <= (int) p_limit)
+			  cursor += BM_tab[*cursor];
+		      else
+			while ((unsigned int) cursor <= (unsigned int) p_limit)
+			  cursor += BM_tab[*cursor];
+		    }
+		  else
+		    {
+		      if ((int) (p_limit + infinity) < (int) p_limit)
+			while ((int) cursor >= (int) p_limit)
+			  cursor += BM_tab[*cursor];
+		      else
+			while ((unsigned int) cursor >= (unsigned int) p_limit)
+			  cursor += BM_tab[*cursor];
+		    }
+/* If you are here, cursor is beyond the end of the searched region. */
+ /* This can happen if you match on the far character of the pattern, */
+ /* because the "stride" of that character is infinity, a number able */
+ /* to throw you well beyond the end of the search.  It can also */
+ /* happen if you fail to match within the permitted region and would */
+ /* otherwise try a character beyond that region */
+		  if ((cursor - p_limit) * direction <= len)
+		    break;	/* a small overrun is genuine */
+		  cursor -= infinity; /* large overrun = hit */
+		  i = dirlen - direction;
+		  if ((int) trt)
+		    {
+		      while ((i -= direction) + direction != 0)
+			if (pat[i] != trt[*(cursor -= direction)])
+			  break;
+		    }
+		  else
+		    {
+		      while ((i -= direction) + direction != 0)
+			if (pat[i] != *(cursor -= direction))
+			  break;
+		    }
+		  cursor += dirlen - i - direction;	/* fix cursor */
+		  if (i + direction == 0)
+		    {
+		      cursor -= direction;
+		      search_regs.start[0]
+			= pos + cursor - p2 + ((direction > 0)
+					       ? 1 - len : 0);
+		      search_regs.end[0] = len + search_regs.start[0];
+		      search_regs_from_string = 0;
+		      if ((n -= direction) != 0)
+			cursor += dirlen; /* to resume search */
+		      else
+			return ((direction > 0)
+				? search_regs.end[0] : search_regs.start[0]);
+		    }
+		  else
+		    cursor += stride_for_teases; /* <sigh> we lose -  */
+		}
+	      pos += cursor - p2;
+	    }
+	  else
+	    /* Now we'll pick up a clump that has to be done the hard */
+	    /* way because it covers a discontinuity */
+	    {
+	      limit = ((direction > 0)
+		       ? BUFFER_CEILING_OF (pos - dirlen + 1)
+		       : BUFFER_FLOOR_OF (pos - dirlen - 1));
+	      limit = ((direction > 0)
+		       ? min (limit + len, lim - 1)
+		       : max (limit - len, lim));
+	      /* LIMIT is now the last value POS can have
+		 and still be valid for a possible match.  */
+	      while (1)
+		{
+		  /* This loop can be coded for space rather than */
+		  /* speed because it will usually run only once. */
+		  /* (the reach is at most len + 21, and typically */
+		  /* does not exceed len) */    
+		  while ((limit - pos) * direction >= 0)
+		    pos += BM_tab[FETCH_CHAR(pos)];
+		  /* now run the same tests to distinguish going off the */
+		  /* end, a match or a phoney match. */
+		  if ((pos - limit) * direction <= len)
+		    break;	/* ran off the end */
+		  /* Found what might be a match.
+		     Set POS back to last (first if reverse) char pos.  */
+		  pos -= infinity;
+		  i = dirlen - direction;
+		  while ((i -= direction) + direction != 0)
+		    {
+		      pos -= direction;
+		      if (pat[i] != (((int) trt)
+				     ? trt[FETCH_CHAR(pos)]
+				     : FETCH_CHAR (pos)))
+			break;
+		    }
+		  /* Above loop has moved POS part or all the way
+		     back to the first char pos (last char pos if reverse).
+		     Set it once again at the last (first if reverse) char.  */
+		  pos += dirlen - i- direction;
+		  if (i + direction == 0)
+		    {
+		      pos -= direction;
+		      search_regs.start[0]
+			= pos + ((direction > 0) ? 1 - len : 0);
+		      search_regs.end[0] = len + search_regs.start[0];
+		      search_regs_from_string = 0;
+		      if ((n -= direction) != 0)
+			pos += dirlen; /* to resume search */
+		      else
+			return ((direction > 0)
+				? search_regs.end[0] : search_regs.start[0]);
+		    }
+		  else
+		    pos += stride_for_teases;
+		}
+	      }
+	  /* We have done one clump.  Can we continue? */
+	  if ((lim - pos) * direction < 0)
+	    return ((0 - n) * direction);
+	}
+      return pos;
+    }
+}
+
+/* Given a string of words separated by word delimiters,
+  compute a regexp that matches those exact words
+  separated by arbitrary punctuation.  */
+
+static Lisp_Object
+wordify (string)
+     Lisp_Object string;
+{
+  register unsigned char *p, *o;
+  register int i, len, punct_count = 0, word_count = 0;
+  Lisp_Object val;
+
+  CHECK_STRING (string, 0);
+  p = XSTRING (string)->data;
+  len = XSTRING (string)->size;
+
+  for (i = 0; i < len; i++)
+    if (SYNTAX (p[i]) != Sword)
+      {
+	punct_count++;
+	if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
+      }
+  if (SYNTAX (p[len-1]) == Sword) word_count++;
+  if (!word_count) return build_string ("");
+
+  val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
+
+  o = XSTRING (val)->data;
+  *o++ = '\\';
+  *o++ = 'b';
+
+  for (i = 0; i < len; i++)
+    if (SYNTAX (p[i]) == Sword)
+      *o++ = p[i];
+    else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
+      {
+	*o++ = '\\';
+	*o++ = 'W';
+	*o++ = '\\';
+	*o++ = 'W';
+	*o++ = '*';
+      }
+
+  *o++ = '\\';
+  *o++ = 'b';
+
+  return val;
+}
+
+DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
+  "sSearch backward: ",
+  "Search backward from point for STRING.\n\
+Set point to the beginning of the occurrence found, and return point.\n\
+An optional second argument bounds the search; it is a buffer position.\n\
+The match found must not extend before that position.\n\
+Optional third argument, if t, means if fail just return nil (no error).\n\
+ If not nil and not t, position at limit of search and return nil.\n\
+Optional fourth argument is repeat count--search for successive occurrences.\n\
+See also the functions `match-beginning', `match-end' and `replace-match'.")
+  (string, bound, noerror, count)
+     Lisp_Object string, bound, noerror, count;
+{
+  return search_command (string, bound, noerror, count, -1, 0);
+}
+
+DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
+  "Search forward from point for STRING.\n\
+Set point to the end of the occurrence found, and return point.\n\
+An optional second argument bounds the search; it is a buffer position.\n\
+The match found must not extend after that position.  nil is equivalent\n\
+  to (point-max).\n\
+Optional third argument, if t, means if fail just return nil (no error).\n\
+  If not nil and not t, move to limit of search and return nil.\n\
+Optional fourth argument is repeat count--search for successive occurrences.\n\
+See also the functions `match-beginning', `match-end' and `replace-match'.")
+  (string, bound, noerror, count)
+     Lisp_Object string, bound, noerror, count;
+{
+  return search_command (string, bound, noerror, count, 1, 0);
+}
+
+DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
+  "sWord search backward: ",
+  "Search backward from point for STRING, ignoring differences in punctuation.\n\
+Set point to the beginning of the occurrence found, and return point.\n\
+An optional second argument bounds the search; it is a buffer position.\n\
+The match found must not extend before that position.\n\
+Optional third argument, if t, means if fail just return nil (no error).\n\
+  If not nil and not t, move to limit of search and return nil.\n\
+Optional fourth argument is repeat count--search for successive occurrences.")
+  (string, bound, noerror, count)
+     Lisp_Object string, bound, noerror, count;
+{
+  return search_command (wordify (string), bound, noerror, count, -1, 1);
+}
+
+DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
+  "sWord search: ",
+  "Search forward from point for STRING, ignoring differences in punctuation.\n\
+Set point to the end of the occurrence found, and return point.\n\
+An optional second argument bounds the search; it is a buffer position.\n\
+The match found must not extend after that position.\n\
+Optional third argument, if t, means if fail just return nil (no error).\n\
+  If not nil and not t, move to limit of search and return nil.\n\
+Optional fourth argument is repeat count--search for successive occurrences.")
+  (string, bound, noerror, count)
+     Lisp_Object string, bound, noerror, count;
+{
+  return search_command (wordify (string), bound, noerror, count, 1, 1);
+}
+
+DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
+  "sRE search backward: ",
+  "Search backward from point for match for regular expression REGEXP.\n\
+Set point to the beginning of the match, and return point.\n\
+The match found is the one starting last in the buffer\n\
+and yet ending before the place the origin of the search.\n\
+An optional second argument bounds the search; it is a buffer position.\n\
+The match found must start at or after that position.\n\
+Optional third argument, if t, means if fail just return nil (no error).\n\
+  If not nil and not t, move to limit of search and return nil.\n\
+Optional fourth argument is repeat count--search for successive occurrences.\n\
+See also the functions `match-beginning', `match-end' and `replace-match'.")
+  (string, bound, noerror, count)
+     Lisp_Object string, bound, noerror, count;
+{
+  return search_command (string, bound, noerror, count, -1, 1);
+}
+
+DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
+  "sRE search: ",
+  "Search forward from point for regular expression REGEXP.\n\
+Set point to the end of the occurrence found, and return point.\n\
+An optional second argument bounds the search; it is a buffer position.\n\
+The match found must not extend after that position.\n\
+Optional third argument, if t, means if fail just return nil (no error).\n\
+  If not nil and not t, move to limit of search and return nil.\n\
+Optional fourth argument is repeat count--search for successive occurrences.\n\
+See also the functions `match-beginning', `match-end' and `replace-match'.")
+  (string, bound, noerror, count)
+     Lisp_Object string, bound, noerror, count;
+{
+  return search_command (string, bound, noerror, count, 1, 1);
+}
+
+DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 3, 0,
+  "Replace text matched by last search with NEWTEXT.\n\
+If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
+Otherwise convert to all caps or cap initials, like replaced text.\n\
+If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
+Otherwise treat `\\' as special:\n\
+  `\\&' in NEWTEXT means substitute original matched text.\n\
+  `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
+       If Nth parens didn't match, substitute nothing.\n\
+  `\\\\' means insert one `\\'.\n\
+Leaves point at end of replacement text.")
+  (string, fixedcase, literal)
+     Lisp_Object string, fixedcase, literal;
+{
+  enum { nochange, all_caps, cap_initial } case_action;
+  register int pos, last;
+  int some_multiletter_word;
+  int some_letter = 0;
+  register int c, prevc;
+  int inslen;
+
+  CHECK_STRING (string, 0);
+
+  case_action = nochange;	/* We tried an initialization */
+				/* but some C compilers blew it */
+  if (search_regs.start[0] < BEGV
+      || search_regs.start[0] > search_regs.end[0]
+      || search_regs.end[0] > ZV)
+    args_out_of_range(make_number (search_regs.start[0]),
+		      make_number (search_regs.end[0]));
+
+  if (NILP (fixedcase))
+    {
+      /* Decide how to casify by examining the matched text. */
+
+      last = search_regs.end[0];
+      prevc = '\n';
+      case_action = all_caps;
+
+      /* some_multiletter_word is set nonzero if any original word
+	 is more than one letter long. */
+      some_multiletter_word = 0;
+
+      for (pos = search_regs.start[0]; pos < last; pos++)
+	{
+	  c = FETCH_CHAR (pos);
+	  if (LOWERCASEP (c))
+	    {
+	      /* Cannot be all caps if any original char is lower case */
+
+	      case_action = cap_initial;
+	      if (SYNTAX (prevc) != Sword)
+		{
+		  /* Cannot even be cap initials
+		     if some original initial is lower case */
+		  case_action = nochange;
+		  break;
+		}
+	      else
+		some_multiletter_word = 1;
+	    }
+	  else if (!NOCASEP (c))
+	    {
+	      some_letter = 1;
+	      if (!some_multiletter_word && SYNTAX (prevc) == Sword)
+		some_multiletter_word = 1;
+	    }
+
+	  prevc = c;
+	}
+
+      /* Do not make new text all caps
+	 if the original text contained only single letter words. */
+      if (case_action == all_caps && !some_multiletter_word)
+	case_action = cap_initial;
+
+      if (!some_letter) case_action = nochange;
+    }
+
+  SET_PT (search_regs.end[0]);
+  if (!NILP (literal))
+    Finsert (1, &string);
+  else
+    {
+      struct gcpro gcpro1;
+      GCPRO1 (string);
+
+      for (pos = 0; pos < XSTRING (string)->size; pos++)
+	{
+	  c = XSTRING (string)->data[pos];
+	  if (c == '\\')
+	    {
+	      c = XSTRING (string)->data[++pos];
+	      if (c == '&')
+		Finsert_buffer_substring (Fcurrent_buffer (),
+					  make_number (search_regs.start[0]),
+					  make_number (search_regs.end[0]));
+	      else if (c >= '1' && c <= RE_NREGS + '0')
+		{
+		  if (search_regs.start[c - '0'] >= 1)
+		    Finsert_buffer_substring (Fcurrent_buffer (),
+					      make_number (search_regs.start[c - '0']),
+					      make_number (search_regs.end[c - '0']));
+		}
+	      else
+		insert_char (c);
+	    }
+	  else
+	    insert_char (c);
+	}
+      UNGCPRO;
+    }
+
+  inslen = point - (search_regs.end[0]);
+  del_range (search_regs.start[0], search_regs.end[0]);
+
+  if (case_action == all_caps)
+    Fupcase_region (make_number (point - inslen), make_number (point));
+  else if (case_action == cap_initial)
+    upcase_initials_region (make_number (point - inslen), make_number (point));
+  return Qnil;
+}
+
+static Lisp_Object
+match_limit (num, beginningp)
+     Lisp_Object num;
+     int beginningp;
+{
+  register int n;
+
+  CHECK_NUMBER (num, 0);
+  n = XINT (num);
+  if (n < 0 || n >= RE_NREGS)
+    args_out_of_range (num, make_number (RE_NREGS));
+  if (search_regs.start[n] < 0)
+    return Qnil;
+  return (make_number ((beginningp) ? search_regs.start[n]
+		                    : search_regs.end[n]));
+}
+
+DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
+  "Return position of start of text matched by last search.\n\
+ARG, a number, specifies which parenthesized expression in the last regexp.\n\
+ Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
+Zero means the entire text matched by the whole regexp or whole string.")
+  (num)
+     Lisp_Object num;
+{
+  return match_limit (num, 1);
+}
+
+DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
+  "Return position of end of text matched by last search.\n\
+ARG, a number, specifies which parenthesized expression in the last regexp.\n\
+ Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
+Zero means the entire text matched by the whole regexp or whole string.")
+  (num)
+     Lisp_Object num;
+{
+  return match_limit (num, 0);
+} 
+
+DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
+  "Return a list containing all info on what the last search matched.\n\
+Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
+All the elements are markers or nil (nil if the Nth pair didn't match)\n\
+if the last match was on a buffer; integers or nil if a string was matched.\n\
+Use `store-match-data' to reinstate the data in this list.")
+  ()
+{
+  Lisp_Object data[2 * RE_NREGS];
+  int i, len;
+
+  len = -1;
+  for (i = 0; i < RE_NREGS; i++)
+    {
+      int start = search_regs.start[i];
+      if (start >= 0)
+	{
+	  if (search_regs_from_string)
+	    {
+	      XFASTINT (data[2 * i]) = start;
+	      XFASTINT (data[2 * i + 1]) = search_regs.end[i];
+	    }
+	  else
+	    {
+	      data[2 * i] = Fmake_marker ();
+	      Fset_marker (data[2 * i], make_number (start), Qnil);
+	      data[2 * i + 1] = Fmake_marker ();
+	      Fset_marker (data[2 * i + 1],
+			   make_number (search_regs.end[i]), Qnil);
+	    }
+	  len = i;
+	}
+      else
+	data[2 * i] = data [2 * i + 1] = Qnil;
+    }
+  return Flist (2 * len + 2, data);
+}
+
+
+DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
+  "Set internal data on last search match from elements of LIST.\n\
+LIST should have been created by calling `match-data' previously.")
+  (list)
+     register Lisp_Object list;
+{
+  register int i;
+  register Lisp_Object marker;
+
+  if (!CONSP (list) && !NILP (list))
+    list = wrong_type_argument (Qconsp, list, 0);
+
+  for (i = 0; i < RE_NREGS; i++)
+    {
+      marker = Fcar (list);
+      if (NILP (marker))
+	{
+	  search_regs.start[i] = -1;
+	  list = Fcdr (list);
+	}
+      else
+	{
+	  if (XTYPE (marker) == Lisp_Marker
+	      && XMARKER (marker)->buffer == 0)
+	    XFASTINT (marker) = 0;
+
+	  CHECK_NUMBER_COERCE_MARKER (marker, 0);
+	  search_regs.start[i] = XINT (marker);
+	  list = Fcdr (list);
+
+	  marker = Fcar (list);
+	  if (XTYPE (marker) == Lisp_Marker
+	      && XMARKER (marker)->buffer == 0)
+	    XFASTINT (marker) = 0;
+
+	  CHECK_NUMBER_COERCE_MARKER (marker, 0);
+	  search_regs.end[i] = XINT (marker);
+	}
+      list = Fcdr (list);
+    }
+
+  return Qnil;  
+}
+
+/* Quote a string to inactivate reg-expr chars */
+
+DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
+  "Return a regexp string which matches exactly STRING and nothing else.")
+  (str)
+     Lisp_Object str;
+{
+  register unsigned char *in, *out, *end;
+  register unsigned char *temp;
+
+  CHECK_STRING (str, 0);
+
+  temp = (unsigned char *) alloca (XSTRING (str)->size * 2);
+
+  /* Now copy the data into the new string, inserting escapes. */
+
+  in = XSTRING (str)->data;
+  end = in + XSTRING (str)->size;
+  out = temp; 
+
+  for (; in != end; in++)
+    {
+      if (*in == '[' || *in == ']'
+	  || *in == '*' || *in == '.' || *in == '\\'
+	  || *in == '?' || *in == '+'
+	  || *in == '^' || *in == '$')
+	*out++ = '\\';
+      *out++ = *in;
+    }
+
+  return make_string (temp, out - temp);
+}
+  
+syms_of_search ()
+{
+  register int i;
+
+  searchbuf.allocated = 100;
+  searchbuf.buffer = (char *) malloc (searchbuf.allocated);
+  searchbuf.fastmap = search_fastmap;
+
+  Qsearch_failed = intern ("search-failed");
+  staticpro (&Qsearch_failed);
+  Qinvalid_regexp = intern ("invalid-regexp");
+  staticpro (&Qinvalid_regexp);
+
+  Fput (Qsearch_failed, Qerror_conditions,
+	Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
+  Fput (Qsearch_failed, Qerror_message,
+	build_string ("Search failed"));
+
+  Fput (Qinvalid_regexp, Qerror_conditions,
+	Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
+  Fput (Qinvalid_regexp, Qerror_message,
+	build_string ("Invalid regexp"));
+
+  last_regexp = Qnil;
+  staticpro (&last_regexp);
+
+  defsubr (&Sstring_match);
+  defsubr (&Slooking_at);
+  defsubr (&Sskip_chars_forward);
+  defsubr (&Sskip_chars_backward);
+  defsubr (&Ssearch_forward);
+  defsubr (&Ssearch_backward);
+  defsubr (&Sword_search_forward);
+  defsubr (&Sword_search_backward);
+  defsubr (&Sre_search_forward);
+  defsubr (&Sre_search_backward);
+  defsubr (&Sreplace_match);
+  defsubr (&Smatch_beginning);
+  defsubr (&Smatch_end);
+  defsubr (&Smatch_data);
+  defsubr (&Sstore_match_data);
+  defsubr (&Sregexp_quote);
+}
author	Jim Blandy <jimb@redhat.com>
date	Wed, 01 Apr 1992 10:45:51 +0000
parents
children	5f36058e31f9