comparison src/search.c @ 603:470f556a9453

Initial revision
author Jim Blandy <jimb@redhat.com>
date Wed, 01 Apr 1992 10:45:51 +0000
parents
children 5f36058e31f9
comparison
equal deleted inserted replaced
602:d2de231ee7f5 603:470f556a9453
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1992 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 1, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21 #include "config.h"
22 #include "lisp.h"
23 #include "syntax.h"
24 #include "buffer.h"
25 #include "commands.h"
26 #include <sys/types.h>
27 #include "regex.h"
28
29 #define max(a, b) ((a) > (b) ? (a) : (b))
30 #define min(a, b) ((a) < (b) ? (a) : (b))
31
32 /* We compile regexps into this buffer and then use it for searching. */
33
34 struct re_pattern_buffer searchbuf;
35
36 char search_fastmap[0400];
37
38 /* Last regexp we compiled */
39
40 Lisp_Object last_regexp;
41
42 /* Every call to re_match, etc., must pass &search_regs as the regs argument
43 unless you can show it is unnecessary (i.e., if re_match is certainly going
44 to be called again before region-around-match can be called). */
45
46 static struct re_registers search_regs;
47
48 /* Nonzero if search_regs are indices in a string; 0 if in a buffer. */
49
50 static int search_regs_from_string;
51
52 /* error condition signalled when regexp compile_pattern fails */
53
54 Lisp_Object Qinvalid_regexp;
55
56 static void
57 matcher_overflow ()
58 {
59 error ("Stack overflow in regexp matcher");
60 }
61
62 #ifdef __STDC__
63 #define CONST const
64 #else
65 #define CONST
66 #endif
67
68 /* Compile a regexp and signal a Lisp error if anything goes wrong. */
69
70 compile_pattern (pattern, bufp, translate)
71 Lisp_Object pattern;
72 struct re_pattern_buffer *bufp;
73 char *translate;
74 {
75 CONST char *val;
76 Lisp_Object dummy;
77
78 if (EQ (pattern, last_regexp)
79 && translate == bufp->translate)
80 return;
81 last_regexp = Qnil;
82 bufp->translate = translate;
83 val = re_compile_pattern ((char *) XSTRING (pattern)->data,
84 XSTRING (pattern)->size,
85 bufp);
86 if (val)
87 {
88 dummy = build_string (val);
89 while (1)
90 Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
91 }
92 last_regexp = pattern;
93 return;
94 }
95
96 /* Error condition used for failing searches */
97 Lisp_Object Qsearch_failed;
98
99 Lisp_Object
100 signal_failure (arg)
101 Lisp_Object arg;
102 {
103 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
104 return Qnil;
105 }
106
107 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
108 "Return t if text after point matches regular expression PAT.")
109 (string)
110 Lisp_Object string;
111 {
112 Lisp_Object val;
113 unsigned char *p1, *p2;
114 int s1, s2;
115 register int i;
116
117 CHECK_STRING (string, 0);
118 compile_pattern (string, &searchbuf,
119 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
120
121 immediate_quit = 1;
122 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
123
124 /* Get pointers and sizes of the two strings
125 that make up the visible portion of the buffer. */
126
127 p1 = BEGV_ADDR;
128 s1 = GPT - BEGV;
129 p2 = GAP_END_ADDR;
130 s2 = ZV - GPT;
131 if (s1 < 0)
132 {
133 p2 = p1;
134 s2 = ZV - BEGV;
135 s1 = 0;
136 }
137 if (s2 < 0)
138 {
139 s1 = ZV - BEGV;
140 s2 = 0;
141 }
142
143 i = re_match_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
144 point - BEGV, &search_regs,
145 ZV - BEGV);
146 if (i == -2)
147 matcher_overflow ();
148
149 val = (0 <= i ? Qt : Qnil);
150 for (i = 0; i < RE_NREGS; i++)
151 if (search_regs.start[i] >= 0)
152 {
153 search_regs.start[i] += BEGV;
154 search_regs.end[i] += BEGV;
155 }
156 search_regs_from_string = 0;
157 immediate_quit = 0;
158 return val;
159 }
160
161 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
162 "Return index of start of first match for REGEXP in STRING, or nil.\n\
163 If third arg START is non-nil, start search at that index in STRING.\n\
164 For index of first char beyond the match, do (match-end 0).\n\
165 `match-end' and `match-beginning' also give indices of substrings\n\
166 matched by parenthesis constructs in the pattern.")
167 (regexp, string, start)
168 Lisp_Object regexp, string, start;
169 {
170 int val;
171 int s;
172
173 CHECK_STRING (regexp, 0);
174 CHECK_STRING (string, 1);
175
176 if (NILP (start))
177 s = 0;
178 else
179 {
180 int len = XSTRING (string)->size;
181
182 CHECK_NUMBER (start, 2);
183 s = XINT (start);
184 if (s < 0 && -s <= len)
185 s = len - s;
186 else if (0 > s || s > len)
187 args_out_of_range (string, start);
188 }
189
190 compile_pattern (regexp, &searchbuf,
191 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
192 immediate_quit = 1;
193 val = re_search (&searchbuf, (char *) XSTRING (string)->data,
194 XSTRING (string)->size, s, XSTRING (string)->size - s,
195 &search_regs);
196 immediate_quit = 0;
197 search_regs_from_string = 1;
198 if (val == -2)
199 matcher_overflow ();
200 if (val < 0) return Qnil;
201 return make_number (val);
202 }
203
204 scan_buffer (target, pos, cnt, shortage)
205 int *shortage, pos;
206 register int cnt, target;
207 {
208 int lim = ((cnt > 0) ? ZV - 1 : BEGV);
209 int direction = ((cnt > 0) ? 1 : -1);
210 register int lim0;
211 unsigned char *base;
212 register unsigned char *cursor, *limit;
213
214 if (shortage != 0)
215 *shortage = 0;
216
217 immediate_quit = 1;
218
219 if (cnt > 0)
220 while (pos != lim + 1)
221 {
222 lim0 = BUFFER_CEILING_OF (pos);
223 lim0 = min (lim, lim0);
224 limit = &FETCH_CHAR (lim0) + 1;
225 base = (cursor = &FETCH_CHAR (pos));
226 while (1)
227 {
228 while (*cursor != target && ++cursor != limit)
229 ;
230 if (cursor != limit)
231 {
232 if (--cnt == 0)
233 {
234 immediate_quit = 0;
235 return (pos + cursor - base + 1);
236 }
237 else
238 if (++cursor == limit)
239 break;
240 }
241 else
242 break;
243 }
244 pos += cursor - base;
245 }
246 else
247 {
248 pos--; /* first character we scan */
249 while (pos > lim - 1)
250 { /* we WILL scan under pos */
251 lim0 = BUFFER_FLOOR_OF (pos);
252 lim0 = max (lim, lim0);
253 limit = &FETCH_CHAR (lim0) - 1;
254 base = (cursor = &FETCH_CHAR (pos));
255 cursor++;
256 while (1)
257 {
258 while (--cursor != limit && *cursor != target)
259 ;
260 if (cursor != limit)
261 {
262 if (++cnt == 0)
263 {
264 immediate_quit = 0;
265 return (pos + cursor - base + 1);
266 }
267 }
268 else
269 break;
270 }
271 pos += cursor - base;
272 }
273 }
274 immediate_quit = 0;
275 if (shortage != 0)
276 *shortage = cnt * direction;
277 return (pos + ((direction == 1 ? 0 : 1)));
278 }
279
280 int
281 find_next_newline (from, cnt)
282 register int from, cnt;
283 {
284 return (scan_buffer ('\n', from, cnt, (int *) 0));
285 }
286
287 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
288 "Move point forward, stopping before a char not in CHARS, or at position LIM.\n\
289 CHARS is like the inside of a `[...]' in a regular expression\n\
290 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
291 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
292 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.")
293 (string, lim)
294 Lisp_Object string, lim;
295 {
296 skip_chars (1, string, lim);
297 return Qnil;
298 }
299
300 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
301 "Move point backward, stopping after a char not in CHARS, or at position LIM.\n\
302 See `skip-chars-forward' for details.")
303 (string, lim)
304 Lisp_Object string, lim;
305 {
306 skip_chars (0, string, lim);
307 return Qnil;
308 }
309
310 skip_chars (forwardp, string, lim)
311 int forwardp;
312 Lisp_Object string, lim;
313 {
314 register unsigned char *p, *pend;
315 register unsigned char c;
316 unsigned char fastmap[0400];
317 int negate = 0;
318 register int i;
319
320 CHECK_STRING (string, 0);
321
322 if (NILP (lim))
323 XSET (lim, Lisp_Int, forwardp ? ZV : BEGV);
324 else
325 CHECK_NUMBER_COERCE_MARKER (lim, 1);
326
327 #if 0 /* This breaks some things... jla. */
328 /* In any case, don't allow scan outside bounds of buffer. */
329 if (XFASTINT (lim) > ZV)
330 XFASTINT (lim) = ZV;
331 if (XFASTINT (lim) < BEGV)
332 XFASTINT (lim) = BEGV;
333 #endif
334
335 p = XSTRING (string)->data;
336 pend = p + XSTRING (string)->size;
337 bzero (fastmap, sizeof fastmap);
338
339 if (p != pend && *p == '^')
340 {
341 negate = 1; p++;
342 }
343
344 /* Find the characters specified and set their elements of fastmap. */
345
346 while (p != pend)
347 {
348 c = *p++;
349 if (c == '\\')
350 {
351 if (p == pend) break;
352 c = *p++;
353 }
354 if (p != pend && *p == '-')
355 {
356 p++;
357 if (p == pend) break;
358 while (c <= *p)
359 {
360 fastmap[c] = 1;
361 c++;
362 }
363 p++;
364 }
365 else
366 fastmap[c] = 1;
367 }
368
369 /* If ^ was the first character, complement the fastmap. */
370
371 if (negate)
372 for (i = 0; i < sizeof fastmap; i++)
373 fastmap[i] ^= 1;
374
375 immediate_quit = 1;
376 if (forwardp)
377 {
378 while (point < XINT (lim) && fastmap[FETCH_CHAR (point)])
379 SET_PT (point + 1);
380 }
381 else
382 {
383 while (point > XINT (lim) && fastmap[FETCH_CHAR (point - 1)])
384 SET_PT (point - 1);
385 }
386 immediate_quit = 0;
387 }
388
389 /* Subroutines of Lisp buffer search functions. */
390
391 static Lisp_Object
392 search_command (string, bound, noerror, count, direction, RE)
393 Lisp_Object string, bound, noerror, count;
394 int direction;
395 int RE;
396 {
397 register int np;
398 int lim;
399 int n = direction;
400
401 if (!NILP (count))
402 {
403 CHECK_NUMBER (count, 3);
404 n *= XINT (count);
405 }
406
407 CHECK_STRING (string, 0);
408 if (NILP (bound))
409 lim = n > 0 ? ZV : BEGV;
410 else
411 {
412 CHECK_NUMBER_COERCE_MARKER (bound, 1);
413 lim = XINT (bound);
414 if (n > 0 ? lim < point : lim > point)
415 error ("Invalid search bound (wrong side of point)");
416 if (lim > ZV)
417 lim = ZV;
418 if (lim < BEGV)
419 lim = BEGV;
420 }
421
422 np = search_buffer (string, point, lim, n, RE,
423 (!NILP (current_buffer->case_fold_search)
424 ? XSTRING (current_buffer->case_canon_table)->data : 0),
425 (!NILP (current_buffer->case_fold_search)
426 ? XSTRING (current_buffer->case_eqv_table)->data : 0));
427 if (np <= 0)
428 {
429 if (NILP (noerror))
430 return signal_failure (string);
431 if (!EQ (noerror, Qt))
432 {
433 if (lim < BEGV || lim > ZV)
434 abort ();
435 SET_PT (lim);
436 }
437 return Qnil;
438 }
439
440 if (np < BEGV || np > ZV)
441 abort ();
442
443 SET_PT (np);
444
445 return make_number (np);
446 }
447
448 /* search for the n'th occurrence of STRING in the current buffer,
449 starting at position POS and stopping at position LIM,
450 treating PAT as a literal string if RE is false or as
451 a regular expression if RE is true.
452
453 If N is positive, searching is forward and LIM must be greater than POS.
454 If N is negative, searching is backward and LIM must be less than POS.
455
456 Returns -x if only N-x occurrences found (x > 0),
457 or else the position at the beginning of the Nth occurrence
458 (if searching backward) or the end (if searching forward). */
459
460 search_buffer (string, pos, lim, n, RE, trt, inverse_trt)
461 Lisp_Object string;
462 int pos;
463 int lim;
464 int n;
465 int RE;
466 register unsigned char *trt;
467 register unsigned char *inverse_trt;
468 {
469 int len = XSTRING (string)->size;
470 unsigned char *base_pat = XSTRING (string)->data;
471 register int *BM_tab;
472 int *BM_tab_base;
473 register int direction = ((n > 0) ? 1 : -1);
474 register int dirlen;
475 int infinity, limit, k, stride_for_teases;
476 register unsigned char *pat, *cursor, *p_limit;
477 register int i, j;
478 unsigned char *p1, *p2;
479 int s1, s2;
480
481 /* Null string is found at starting position. */
482 if (!len)
483 return pos;
484
485 if (RE)
486 compile_pattern (string, &searchbuf, (char *) trt);
487
488 if (RE /* Here we detect whether the */
489 /* generality of an RE search is */
490 /* really needed. */
491 /* first item is "exact match" */
492 && *(searchbuf.buffer) == RE_EXACTN_VALUE
493 && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
494 {
495 RE = 0; /* can do straight (non RE) search */
496 pat = (base_pat = (unsigned char *) searchbuf.buffer + 2);
497 /* trt already applied */
498 len = searchbuf.used - 2;
499 }
500 else if (!RE)
501 {
502 pat = (unsigned char *) alloca (len);
503
504 for (i = len; i--;) /* Copy the pattern; apply trt */
505 *pat++ = (((int) trt) ? trt [*base_pat++] : *base_pat++);
506 pat -= len; base_pat = pat;
507 }
508
509 if (RE)
510 {
511 immediate_quit = 1; /* Quit immediately if user types ^G,
512 because letting this function finish
513 can take too long. */
514 QUIT; /* Do a pending quit right away,
515 to avoid paradoxical behavior */
516 /* Get pointers and sizes of the two strings
517 that make up the visible portion of the buffer. */
518
519 p1 = BEGV_ADDR;
520 s1 = GPT - BEGV;
521 p2 = GAP_END_ADDR;
522 s2 = ZV - GPT;
523 if (s1 < 0)
524 {
525 p2 = p1;
526 s2 = ZV - BEGV;
527 s1 = 0;
528 }
529 if (s2 < 0)
530 {
531 s1 = ZV - BEGV;
532 s2 = 0;
533 }
534 while (n < 0)
535 {
536 int val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
537 pos - BEGV, lim - pos, &search_regs,
538 /* Don't allow match past current point */
539 pos - BEGV);
540 if (val == -2)
541 matcher_overflow ();
542 if (val >= 0)
543 {
544 j = BEGV;
545 for (i = 0; i < RE_NREGS; i++)
546 if (search_regs.start[i] >= 0)
547 {
548 search_regs.start[i] += j;
549 search_regs.end[i] += j;
550 }
551 search_regs_from_string = 0;
552 /* Set pos to the new position. */
553 pos = search_regs.start[0];
554 }
555 else
556 {
557 immediate_quit = 0;
558 return (n);
559 }
560 n++;
561 }
562 while (n > 0)
563 {
564 int val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
565 pos - BEGV, lim - pos, &search_regs,
566 lim - BEGV);
567 if (val == -2)
568 matcher_overflow ();
569 if (val >= 0)
570 {
571 j = BEGV;
572 for (i = 0; i < RE_NREGS; i++)
573 if (search_regs.start[i] >= 0)
574 {
575 search_regs.start[i] += j;
576 search_regs.end[i] += j;
577 }
578 search_regs_from_string = 0;
579 pos = search_regs.end[0];
580 }
581 else
582 {
583 immediate_quit = 0;
584 return (0 - n);
585 }
586 n--;
587 }
588 immediate_quit = 0;
589 return (pos);
590 }
591 else /* non-RE case */
592 {
593 #ifdef C_ALLOCA
594 int BM_tab_space[0400];
595 BM_tab = &BM_tab_space[0];
596 #else
597 BM_tab = (int *) alloca (0400 * sizeof (int));
598 #endif
599 /* The general approach is that we are going to maintain that we know */
600 /* the first (closest to the present position, in whatever direction */
601 /* we're searching) character that could possibly be the last */
602 /* (furthest from present position) character of a valid match. We */
603 /* advance the state of our knowledge by looking at that character */
604 /* and seeing whether it indeed matches the last character of the */
605 /* pattern. If it does, we take a closer look. If it does not, we */
606 /* move our pointer (to putative last characters) as far as is */
607 /* logically possible. This amount of movement, which I call a */
608 /* stride, will be the length of the pattern if the actual character */
609 /* appears nowhere in the pattern, otherwise it will be the distance */
610 /* from the last occurrence of that character to the end of the */
611 /* pattern. */
612 /* As a coding trick, an enormous stride is coded into the table for */
613 /* characters that match the last character. This allows use of only */
614 /* a single test, a test for having gone past the end of the */
615 /* permissible match region, to test for both possible matches (when */
616 /* the stride goes past the end immediately) and failure to */
617 /* match (where you get nudged past the end one stride at a time). */
618
619 /* Here we make a "mickey mouse" BM table. The stride of the search */
620 /* is determined only by the last character of the putative match. */
621 /* If that character does not match, we will stride the proper */
622 /* distance to propose a match that superimposes it on the last */
623 /* instance of a character that matches it (per trt), or misses */
624 /* it entirely if there is none. */
625
626 dirlen = len * direction;
627 infinity = dirlen - (lim + pos + len + len) * direction;
628 if (direction < 0)
629 pat = (base_pat += len - 1);
630 BM_tab_base = BM_tab;
631 BM_tab += 0400;
632 j = dirlen; /* to get it in a register */
633 /* A character that does not appear in the pattern induces a */
634 /* stride equal to the pattern length. */
635 while (BM_tab_base != BM_tab)
636 {
637 *--BM_tab = j;
638 *--BM_tab = j;
639 *--BM_tab = j;
640 *--BM_tab = j;
641 }
642 i = 0;
643 while (i != infinity)
644 {
645 j = pat[i]; i += direction;
646 if (i == dirlen) i = infinity;
647 if ((int) trt)
648 {
649 k = (j = trt[j]);
650 if (i == infinity)
651 stride_for_teases = BM_tab[j];
652 BM_tab[j] = dirlen - i;
653 /* A translation table is accompanied by its inverse -- see */
654 /* comment following downcase_table for details */
655 while ((j = inverse_trt[j]) != k)
656 BM_tab[j] = dirlen - i;
657 }
658 else
659 {
660 if (i == infinity)
661 stride_for_teases = BM_tab[j];
662 BM_tab[j] = dirlen - i;
663 }
664 /* stride_for_teases tells how much to stride if we get a */
665 /* match on the far character but are subsequently */
666 /* disappointed, by recording what the stride would have been */
667 /* for that character if the last character had been */
668 /* different. */
669 }
670 infinity = dirlen - infinity;
671 pos += dirlen - ((direction > 0) ? direction : 0);
672 /* loop invariant - pos points at where last char (first char if reverse)
673 of pattern would align in a possible match. */
674 while (n != 0)
675 {
676 if ((lim - pos - (direction > 0)) * direction < 0)
677 return (n * (0 - direction));
678 /* First we do the part we can by pointers (maybe nothing) */
679 QUIT;
680 pat = base_pat;
681 limit = pos - dirlen + direction;
682 limit = ((direction > 0)
683 ? BUFFER_CEILING_OF (limit)
684 : BUFFER_FLOOR_OF (limit));
685 /* LIMIT is now the last (not beyond-last!) value
686 POS can take on without hitting edge of buffer or the gap. */
687 limit = ((direction > 0)
688 ? min (lim - 1, min (limit, pos + 20000))
689 : max (lim, max (limit, pos - 20000)));
690 if ((limit - pos) * direction > 20)
691 {
692 p_limit = &FETCH_CHAR (limit);
693 p2 = (cursor = &FETCH_CHAR (pos));
694 /* In this loop, pos + cursor - p2 is the surrogate for pos */
695 while (1) /* use one cursor setting as long as i can */
696 {
697 if (direction > 0) /* worth duplicating */
698 {
699 /* Use signed comparison if appropriate
700 to make cursor+infinity sure to be > p_limit.
701 Assuming that the buffer lies in a range of addresses
702 that are all "positive" (as ints) or all "negative",
703 either kind of comparison will work as long
704 as we don't step by infinity. So pick the kind
705 that works when we do step by infinity. */
706 if ((int) (p_limit + infinity) > (int) p_limit)
707 while ((int) cursor <= (int) p_limit)
708 cursor += BM_tab[*cursor];
709 else
710 while ((unsigned int) cursor <= (unsigned int) p_limit)
711 cursor += BM_tab[*cursor];
712 }
713 else
714 {
715 if ((int) (p_limit + infinity) < (int) p_limit)
716 while ((int) cursor >= (int) p_limit)
717 cursor += BM_tab[*cursor];
718 else
719 while ((unsigned int) cursor >= (unsigned int) p_limit)
720 cursor += BM_tab[*cursor];
721 }
722 /* If you are here, cursor is beyond the end of the searched region. */
723 /* This can happen if you match on the far character of the pattern, */
724 /* because the "stride" of that character is infinity, a number able */
725 /* to throw you well beyond the end of the search. It can also */
726 /* happen if you fail to match within the permitted region and would */
727 /* otherwise try a character beyond that region */
728 if ((cursor - p_limit) * direction <= len)
729 break; /* a small overrun is genuine */
730 cursor -= infinity; /* large overrun = hit */
731 i = dirlen - direction;
732 if ((int) trt)
733 {
734 while ((i -= direction) + direction != 0)
735 if (pat[i] != trt[*(cursor -= direction)])
736 break;
737 }
738 else
739 {
740 while ((i -= direction) + direction != 0)
741 if (pat[i] != *(cursor -= direction))
742 break;
743 }
744 cursor += dirlen - i - direction; /* fix cursor */
745 if (i + direction == 0)
746 {
747 cursor -= direction;
748 search_regs.start[0]
749 = pos + cursor - p2 + ((direction > 0)
750 ? 1 - len : 0);
751 search_regs.end[0] = len + search_regs.start[0];
752 search_regs_from_string = 0;
753 if ((n -= direction) != 0)
754 cursor += dirlen; /* to resume search */
755 else
756 return ((direction > 0)
757 ? search_regs.end[0] : search_regs.start[0]);
758 }
759 else
760 cursor += stride_for_teases; /* <sigh> we lose - */
761 }
762 pos += cursor - p2;
763 }
764 else
765 /* Now we'll pick up a clump that has to be done the hard */
766 /* way because it covers a discontinuity */
767 {
768 limit = ((direction > 0)
769 ? BUFFER_CEILING_OF (pos - dirlen + 1)
770 : BUFFER_FLOOR_OF (pos - dirlen - 1));
771 limit = ((direction > 0)
772 ? min (limit + len, lim - 1)
773 : max (limit - len, lim));
774 /* LIMIT is now the last value POS can have
775 and still be valid for a possible match. */
776 while (1)
777 {
778 /* This loop can be coded for space rather than */
779 /* speed because it will usually run only once. */
780 /* (the reach is at most len + 21, and typically */
781 /* does not exceed len) */
782 while ((limit - pos) * direction >= 0)
783 pos += BM_tab[FETCH_CHAR(pos)];
784 /* now run the same tests to distinguish going off the */
785 /* end, a match or a phoney match. */
786 if ((pos - limit) * direction <= len)
787 break; /* ran off the end */
788 /* Found what might be a match.
789 Set POS back to last (first if reverse) char pos. */
790 pos -= infinity;
791 i = dirlen - direction;
792 while ((i -= direction) + direction != 0)
793 {
794 pos -= direction;
795 if (pat[i] != (((int) trt)
796 ? trt[FETCH_CHAR(pos)]
797 : FETCH_CHAR (pos)))
798 break;
799 }
800 /* Above loop has moved POS part or all the way
801 back to the first char pos (last char pos if reverse).
802 Set it once again at the last (first if reverse) char. */
803 pos += dirlen - i- direction;
804 if (i + direction == 0)
805 {
806 pos -= direction;
807 search_regs.start[0]
808 = pos + ((direction > 0) ? 1 - len : 0);
809 search_regs.end[0] = len + search_regs.start[0];
810 search_regs_from_string = 0;
811 if ((n -= direction) != 0)
812 pos += dirlen; /* to resume search */
813 else
814 return ((direction > 0)
815 ? search_regs.end[0] : search_regs.start[0]);
816 }
817 else
818 pos += stride_for_teases;
819 }
820 }
821 /* We have done one clump. Can we continue? */
822 if ((lim - pos) * direction < 0)
823 return ((0 - n) * direction);
824 }
825 return pos;
826 }
827 }
828
829 /* Given a string of words separated by word delimiters,
830 compute a regexp that matches those exact words
831 separated by arbitrary punctuation. */
832
833 static Lisp_Object
834 wordify (string)
835 Lisp_Object string;
836 {
837 register unsigned char *p, *o;
838 register int i, len, punct_count = 0, word_count = 0;
839 Lisp_Object val;
840
841 CHECK_STRING (string, 0);
842 p = XSTRING (string)->data;
843 len = XSTRING (string)->size;
844
845 for (i = 0; i < len; i++)
846 if (SYNTAX (p[i]) != Sword)
847 {
848 punct_count++;
849 if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
850 }
851 if (SYNTAX (p[len-1]) == Sword) word_count++;
852 if (!word_count) return build_string ("");
853
854 val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
855
856 o = XSTRING (val)->data;
857 *o++ = '\\';
858 *o++ = 'b';
859
860 for (i = 0; i < len; i++)
861 if (SYNTAX (p[i]) == Sword)
862 *o++ = p[i];
863 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
864 {
865 *o++ = '\\';
866 *o++ = 'W';
867 *o++ = '\\';
868 *o++ = 'W';
869 *o++ = '*';
870 }
871
872 *o++ = '\\';
873 *o++ = 'b';
874
875 return val;
876 }
877
878 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
879 "sSearch backward: ",
880 "Search backward from point for STRING.\n\
881 Set point to the beginning of the occurrence found, and return point.\n\
882 An optional second argument bounds the search; it is a buffer position.\n\
883 The match found must not extend before that position.\n\
884 Optional third argument, if t, means if fail just return nil (no error).\n\
885 If not nil and not t, position at limit of search and return nil.\n\
886 Optional fourth argument is repeat count--search for successive occurrences.\n\
887 See also the functions `match-beginning', `match-end' and `replace-match'.")
888 (string, bound, noerror, count)
889 Lisp_Object string, bound, noerror, count;
890 {
891 return search_command (string, bound, noerror, count, -1, 0);
892 }
893
894 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
895 "Search forward from point for STRING.\n\
896 Set point to the end of the occurrence found, and return point.\n\
897 An optional second argument bounds the search; it is a buffer position.\n\
898 The match found must not extend after that position. nil is equivalent\n\
899 to (point-max).\n\
900 Optional third argument, if t, means if fail just return nil (no error).\n\
901 If not nil and not t, move to limit of search and return nil.\n\
902 Optional fourth argument is repeat count--search for successive occurrences.\n\
903 See also the functions `match-beginning', `match-end' and `replace-match'.")
904 (string, bound, noerror, count)
905 Lisp_Object string, bound, noerror, count;
906 {
907 return search_command (string, bound, noerror, count, 1, 0);
908 }
909
910 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
911 "sWord search backward: ",
912 "Search backward from point for STRING, ignoring differences in punctuation.\n\
913 Set point to the beginning of the occurrence found, and return point.\n\
914 An optional second argument bounds the search; it is a buffer position.\n\
915 The match found must not extend before that position.\n\
916 Optional third argument, if t, means if fail just return nil (no error).\n\
917 If not nil and not t, move to limit of search and return nil.\n\
918 Optional fourth argument is repeat count--search for successive occurrences.")
919 (string, bound, noerror, count)
920 Lisp_Object string, bound, noerror, count;
921 {
922 return search_command (wordify (string), bound, noerror, count, -1, 1);
923 }
924
925 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
926 "sWord search: ",
927 "Search forward from point for STRING, ignoring differences in punctuation.\n\
928 Set point to the end of the occurrence found, and return point.\n\
929 An optional second argument bounds the search; it is a buffer position.\n\
930 The match found must not extend after that position.\n\
931 Optional third argument, if t, means if fail just return nil (no error).\n\
932 If not nil and not t, move to limit of search and return nil.\n\
933 Optional fourth argument is repeat count--search for successive occurrences.")
934 (string, bound, noerror, count)
935 Lisp_Object string, bound, noerror, count;
936 {
937 return search_command (wordify (string), bound, noerror, count, 1, 1);
938 }
939
940 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
941 "sRE search backward: ",
942 "Search backward from point for match for regular expression REGEXP.\n\
943 Set point to the beginning of the match, and return point.\n\
944 The match found is the one starting last in the buffer\n\
945 and yet ending before the place the origin of the search.\n\
946 An optional second argument bounds the search; it is a buffer position.\n\
947 The match found must start at or after that position.\n\
948 Optional third argument, if t, means if fail just return nil (no error).\n\
949 If not nil and not t, move to limit of search and return nil.\n\
950 Optional fourth argument is repeat count--search for successive occurrences.\n\
951 See also the functions `match-beginning', `match-end' and `replace-match'.")
952 (string, bound, noerror, count)
953 Lisp_Object string, bound, noerror, count;
954 {
955 return search_command (string, bound, noerror, count, -1, 1);
956 }
957
958 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
959 "sRE search: ",
960 "Search forward from point for regular expression REGEXP.\n\
961 Set point to the end of the occurrence found, and return point.\n\
962 An optional second argument bounds the search; it is a buffer position.\n\
963 The match found must not extend after that position.\n\
964 Optional third argument, if t, means if fail just return nil (no error).\n\
965 If not nil and not t, move to limit of search and return nil.\n\
966 Optional fourth argument is repeat count--search for successive occurrences.\n\
967 See also the functions `match-beginning', `match-end' and `replace-match'.")
968 (string, bound, noerror, count)
969 Lisp_Object string, bound, noerror, count;
970 {
971 return search_command (string, bound, noerror, count, 1, 1);
972 }
973
974 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 3, 0,
975 "Replace text matched by last search with NEWTEXT.\n\
976 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
977 Otherwise convert to all caps or cap initials, like replaced text.\n\
978 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
979 Otherwise treat `\\' as special:\n\
980 `\\&' in NEWTEXT means substitute original matched text.\n\
981 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
982 If Nth parens didn't match, substitute nothing.\n\
983 `\\\\' means insert one `\\'.\n\
984 Leaves point at end of replacement text.")
985 (string, fixedcase, literal)
986 Lisp_Object string, fixedcase, literal;
987 {
988 enum { nochange, all_caps, cap_initial } case_action;
989 register int pos, last;
990 int some_multiletter_word;
991 int some_letter = 0;
992 register int c, prevc;
993 int inslen;
994
995 CHECK_STRING (string, 0);
996
997 case_action = nochange; /* We tried an initialization */
998 /* but some C compilers blew it */
999 if (search_regs.start[0] < BEGV
1000 || search_regs.start[0] > search_regs.end[0]
1001 || search_regs.end[0] > ZV)
1002 args_out_of_range(make_number (search_regs.start[0]),
1003 make_number (search_regs.end[0]));
1004
1005 if (NILP (fixedcase))
1006 {
1007 /* Decide how to casify by examining the matched text. */
1008
1009 last = search_regs.end[0];
1010 prevc = '\n';
1011 case_action = all_caps;
1012
1013 /* some_multiletter_word is set nonzero if any original word
1014 is more than one letter long. */
1015 some_multiletter_word = 0;
1016
1017 for (pos = search_regs.start[0]; pos < last; pos++)
1018 {
1019 c = FETCH_CHAR (pos);
1020 if (LOWERCASEP (c))
1021 {
1022 /* Cannot be all caps if any original char is lower case */
1023
1024 case_action = cap_initial;
1025 if (SYNTAX (prevc) != Sword)
1026 {
1027 /* Cannot even be cap initials
1028 if some original initial is lower case */
1029 case_action = nochange;
1030 break;
1031 }
1032 else
1033 some_multiletter_word = 1;
1034 }
1035 else if (!NOCASEP (c))
1036 {
1037 some_letter = 1;
1038 if (!some_multiletter_word && SYNTAX (prevc) == Sword)
1039 some_multiletter_word = 1;
1040 }
1041
1042 prevc = c;
1043 }
1044
1045 /* Do not make new text all caps
1046 if the original text contained only single letter words. */
1047 if (case_action == all_caps && !some_multiletter_word)
1048 case_action = cap_initial;
1049
1050 if (!some_letter) case_action = nochange;
1051 }
1052
1053 SET_PT (search_regs.end[0]);
1054 if (!NILP (literal))
1055 Finsert (1, &string);
1056 else
1057 {
1058 struct gcpro gcpro1;
1059 GCPRO1 (string);
1060
1061 for (pos = 0; pos < XSTRING (string)->size; pos++)
1062 {
1063 c = XSTRING (string)->data[pos];
1064 if (c == '\\')
1065 {
1066 c = XSTRING (string)->data[++pos];
1067 if (c == '&')
1068 Finsert_buffer_substring (Fcurrent_buffer (),
1069 make_number (search_regs.start[0]),
1070 make_number (search_regs.end[0]));
1071 else if (c >= '1' && c <= RE_NREGS + '0')
1072 {
1073 if (search_regs.start[c - '0'] >= 1)
1074 Finsert_buffer_substring (Fcurrent_buffer (),
1075 make_number (search_regs.start[c - '0']),
1076 make_number (search_regs.end[c - '0']));
1077 }
1078 else
1079 insert_char (c);
1080 }
1081 else
1082 insert_char (c);
1083 }
1084 UNGCPRO;
1085 }
1086
1087 inslen = point - (search_regs.end[0]);
1088 del_range (search_regs.start[0], search_regs.end[0]);
1089
1090 if (case_action == all_caps)
1091 Fupcase_region (make_number (point - inslen), make_number (point));
1092 else if (case_action == cap_initial)
1093 upcase_initials_region (make_number (point - inslen), make_number (point));
1094 return Qnil;
1095 }
1096
1097 static Lisp_Object
1098 match_limit (num, beginningp)
1099 Lisp_Object num;
1100 int beginningp;
1101 {
1102 register int n;
1103
1104 CHECK_NUMBER (num, 0);
1105 n = XINT (num);
1106 if (n < 0 || n >= RE_NREGS)
1107 args_out_of_range (num, make_number (RE_NREGS));
1108 if (search_regs.start[n] < 0)
1109 return Qnil;
1110 return (make_number ((beginningp) ? search_regs.start[n]
1111 : search_regs.end[n]));
1112 }
1113
1114 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1115 "Return position of start of text matched by last search.\n\
1116 ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1117 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1118 Zero means the entire text matched by the whole regexp or whole string.")
1119 (num)
1120 Lisp_Object num;
1121 {
1122 return match_limit (num, 1);
1123 }
1124
1125 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1126 "Return position of end of text matched by last search.\n\
1127 ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1128 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1129 Zero means the entire text matched by the whole regexp or whole string.")
1130 (num)
1131 Lisp_Object num;
1132 {
1133 return match_limit (num, 0);
1134 }
1135
1136 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
1137 "Return a list containing all info on what the last search matched.\n\
1138 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1139 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1140 if the last match was on a buffer; integers or nil if a string was matched.\n\
1141 Use `store-match-data' to reinstate the data in this list.")
1142 ()
1143 {
1144 Lisp_Object data[2 * RE_NREGS];
1145 int i, len;
1146
1147 len = -1;
1148 for (i = 0; i < RE_NREGS; i++)
1149 {
1150 int start = search_regs.start[i];
1151 if (start >= 0)
1152 {
1153 if (search_regs_from_string)
1154 {
1155 XFASTINT (data[2 * i]) = start;
1156 XFASTINT (data[2 * i + 1]) = search_regs.end[i];
1157 }
1158 else
1159 {
1160 data[2 * i] = Fmake_marker ();
1161 Fset_marker (data[2 * i], make_number (start), Qnil);
1162 data[2 * i + 1] = Fmake_marker ();
1163 Fset_marker (data[2 * i + 1],
1164 make_number (search_regs.end[i]), Qnil);
1165 }
1166 len = i;
1167 }
1168 else
1169 data[2 * i] = data [2 * i + 1] = Qnil;
1170 }
1171 return Flist (2 * len + 2, data);
1172 }
1173
1174
1175 DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1176 "Set internal data on last search match from elements of LIST.\n\
1177 LIST should have been created by calling `match-data' previously.")
1178 (list)
1179 register Lisp_Object list;
1180 {
1181 register int i;
1182 register Lisp_Object marker;
1183
1184 if (!CONSP (list) && !NILP (list))
1185 list = wrong_type_argument (Qconsp, list, 0);
1186
1187 for (i = 0; i < RE_NREGS; i++)
1188 {
1189 marker = Fcar (list);
1190 if (NILP (marker))
1191 {
1192 search_regs.start[i] = -1;
1193 list = Fcdr (list);
1194 }
1195 else
1196 {
1197 if (XTYPE (marker) == Lisp_Marker
1198 && XMARKER (marker)->buffer == 0)
1199 XFASTINT (marker) = 0;
1200
1201 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1202 search_regs.start[i] = XINT (marker);
1203 list = Fcdr (list);
1204
1205 marker = Fcar (list);
1206 if (XTYPE (marker) == Lisp_Marker
1207 && XMARKER (marker)->buffer == 0)
1208 XFASTINT (marker) = 0;
1209
1210 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1211 search_regs.end[i] = XINT (marker);
1212 }
1213 list = Fcdr (list);
1214 }
1215
1216 return Qnil;
1217 }
1218
1219 /* Quote a string to inactivate reg-expr chars */
1220
1221 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
1222 "Return a regexp string which matches exactly STRING and nothing else.")
1223 (str)
1224 Lisp_Object str;
1225 {
1226 register unsigned char *in, *out, *end;
1227 register unsigned char *temp;
1228
1229 CHECK_STRING (str, 0);
1230
1231 temp = (unsigned char *) alloca (XSTRING (str)->size * 2);
1232
1233 /* Now copy the data into the new string, inserting escapes. */
1234
1235 in = XSTRING (str)->data;
1236 end = in + XSTRING (str)->size;
1237 out = temp;
1238
1239 for (; in != end; in++)
1240 {
1241 if (*in == '[' || *in == ']'
1242 || *in == '*' || *in == '.' || *in == '\\'
1243 || *in == '?' || *in == '+'
1244 || *in == '^' || *in == '$')
1245 *out++ = '\\';
1246 *out++ = *in;
1247 }
1248
1249 return make_string (temp, out - temp);
1250 }
1251
1252 syms_of_search ()
1253 {
1254 register int i;
1255
1256 searchbuf.allocated = 100;
1257 searchbuf.buffer = (char *) malloc (searchbuf.allocated);
1258 searchbuf.fastmap = search_fastmap;
1259
1260 Qsearch_failed = intern ("search-failed");
1261 staticpro (&Qsearch_failed);
1262 Qinvalid_regexp = intern ("invalid-regexp");
1263 staticpro (&Qinvalid_regexp);
1264
1265 Fput (Qsearch_failed, Qerror_conditions,
1266 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
1267 Fput (Qsearch_failed, Qerror_message,
1268 build_string ("Search failed"));
1269
1270 Fput (Qinvalid_regexp, Qerror_conditions,
1271 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
1272 Fput (Qinvalid_regexp, Qerror_message,
1273 build_string ("Invalid regexp"));
1274
1275 last_regexp = Qnil;
1276 staticpro (&last_regexp);
1277
1278 defsubr (&Sstring_match);
1279 defsubr (&Slooking_at);
1280 defsubr (&Sskip_chars_forward);
1281 defsubr (&Sskip_chars_backward);
1282 defsubr (&Ssearch_forward);
1283 defsubr (&Ssearch_backward);
1284 defsubr (&Sword_search_forward);
1285 defsubr (&Sword_search_backward);
1286 defsubr (&Sre_search_forward);
1287 defsubr (&Sre_search_backward);
1288 defsubr (&Sreplace_match);
1289 defsubr (&Smatch_beginning);
1290 defsubr (&Smatch_end);
1291 defsubr (&Smatch_data);
1292 defsubr (&Sstore_match_data);
1293 defsubr (&Sregexp_quote);
1294 }