Mercurial > emacs
annotate lisp/emacs-lisp/regexp-opt.el @ 61263:56619c3aaf99
(fancy-splash-text): Shorten default text of
"Emacs Tutorial" line. Also, if the current language env
indicates an available tutorial file other than TUTORIAL,
extract its title and append it to the line in parentheses.
(fancy-splash-insert): If arg is a thunk, funcall it.
author | Thien-Thi Nguyen <ttn@gnuvola.org> |
---|---|
date | Mon, 04 Apr 2005 07:41:58 +0000 |
parents | da163770bb89 |
children | 8d59a5d179f2 f2ebccfa87d4 |
rev | line source |
---|---|
38412
253f761ad37b
Some fixes to follow coding conventions in files maintained by FSF.
Pavel Janík <Pavel@Janik.cz>
parents:
33209
diff
changeset
|
1 ;;; regexp-opt.el --- generate efficient regexps to match strings |
18014 | 2 |
58802
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
3 ;; Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2004 |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
4 ;; Free Software Foundation, Inc. |
18014 | 5 |
25278 | 6 ;; Author: Simon Marshall <simon@gnu.org> |
27589 | 7 ;; Maintainer: FSF |
28420 | 8 ;; Keywords: strings, regexps, extensions |
18014 | 9 |
10 ;; This file is part of GNU Emacs. | |
11 | |
12 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 ;; it under the terms of the GNU General Public License as published by | |
14 ;; the Free Software Foundation; either version 2, or (at your option) | |
15 ;; any later version. | |
16 | |
17 ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ;; GNU General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
25 ;; Boston, MA 02111-1307, USA. | |
26 | |
27 ;;; Commentary: | |
28 | |
25938
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
29 ;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i[sz]e\\)". |
18014 | 30 ;; |
18149
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
31 ;; This package generates a regexp from a given list of strings (which matches |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
32 ;; one of those strings) so that the regexp generated by: |
18014 | 33 ;; |
18149
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
34 ;; (regexp-opt strings) |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
35 ;; |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
36 ;; is equivalent to, but more efficient than, the regexp generated by: |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
37 ;; |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
38 ;; (mapconcat 'regexp-quote strings "\\|") |
18014 | 39 ;; |
40 ;; For example: | |
41 ;; | |
42 ;; (let ((strings '("cond" "if" "when" "unless" "while" | |
43 ;; "let" "let*" "progn" "prog1" "prog2" | |
44 ;; "save-restriction" "save-excursion" "save-window-excursion" | |
45 ;; "save-current-buffer" "save-match-data" | |
46 ;; "catch" "throw" "unwind-protect" "condition-case"))) | |
47 ;; (concat "(" (regexp-opt strings t) "\\>")) | |
48 ;; => "(\\(c\\(atch\\|ond\\(ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(less\\|wind-protect\\)\\|wh\\(en\\|ile\\)\\)\\>" | |
49 ;; | |
18149
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
50 ;; Searching using the above example `regexp-opt' regexp takes approximately |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
51 ;; two-thirds of the time taken using the equivalent `mapconcat' regexp. |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
52 |
18014 | 53 ;; Since this package was written to produce efficient regexps, not regexps |
54 ;; efficiently, it is probably not a good idea to in-line too many calls in | |
55 ;; your code, unless you use the following trick with `eval-when-compile': | |
56 ;; | |
57 ;; (defvar definition-regexp | |
58 ;; (eval-when-compile | |
59 ;; (concat "^(" | |
60 ;; (regexp-opt '("defun" "defsubst" "defmacro" "defalias" | |
61 ;; "defvar" "defconst") t) | |
62 ;; "\\>"))) | |
63 ;; | |
64 ;; The `byte-compile' code will be as if you had defined the variable thus: | |
65 ;; | |
66 ;; (defvar definition-regexp | |
67 ;; "^(\\(def\\(alias\\|const\\|macro\\|subst\\|un\\|var\\)\\)\\>") | |
68 ;; | |
18149
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
69 ;; Note that if you use this trick for all instances of `regexp-opt' and |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
70 ;; `regexp-opt-depth' in your code, regexp-opt.el would only have to be loaded |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
71 ;; at compile time. But note also that using this trick means that should |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
72 ;; regexp-opt.el be changed, perhaps to fix a bug or to add a feature to |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
73 ;; improve the efficiency of `regexp-opt' regexps, you would have to recompile |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
74 ;; your code for such changes to have effect in your code. |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
75 |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
76 ;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with |
25938
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
77 ;; thanks for ideas also to Michael Ernst, Bob Glickstein, Dan Nicolaescu and |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
78 ;; Stefan Monnier. |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
79 ;; No doubt `regexp-opt' doesn't always produce optimal regexps, so code, ideas |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
80 ;; or any other information to improve things are welcome. |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
81 ;; |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
82 ;; One possible improvement would be to compile '("aa" "ab" "ba" "bb") |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
83 ;; into "[ab][ab]" rather than "a[ab]\\|b[ab]". I'm not sure it's worth |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
84 ;; it but if someone knows how to do it without going through too many |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
85 ;; contortions, I'm all ears. |
18014 | 86 |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
87 ;;; Code: |
18014 | 88 |
89 ;;;###autoload | |
90 (defun regexp-opt (strings &optional paren) | |
91 "Return a regexp to match a string in STRINGS. | |
19782 | 92 Each string should be unique in STRINGS and should not contain any regexps, |
93 quoted or not. If optional PAREN is non-nil, ensure that the returned regexp | |
94 is enclosed by at least one regexp grouping construct. | |
18014 | 95 The returned regexp is typically more efficient than the equivalent regexp: |
96 | |
32304
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
97 (let ((open (if PAREN \"\\\\(\" \"\")) (close (if PAREN \"\\\\)\" \"\"))) |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
98 (concat open (mapconcat 'regexp-quote STRINGS \"\\\\|\") close)) |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
99 |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
100 If PAREN is `words', then the resulting regexp is additionally surrounded |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
101 by \\=\\< and \\>." |
18014 | 102 (save-match-data |
103 ;; Recurse on the sorted list. | |
32304
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
104 (let* ((max-lisp-eval-depth (* 1024 1024)) |
42625
ad0233037e24
(regexp-opt): Bind max-specpdl-size.
Richard M. Stallman <rms@gnu.org>
parents:
41754
diff
changeset
|
105 (max-specpdl-size (* 1024 1024)) |
32304
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
106 (completion-ignore-case nil) |
41754
e78fbcf9b878
(regexp-opt): Bind completion-regexp-list to nil.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
41617
diff
changeset
|
107 (completion-regexp-list nil) |
32304
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
108 (words (eq paren 'words)) |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
109 (open (cond ((stringp paren) paren) (paren "\\("))) |
58802
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
110 (sorted-strings (delete-dups |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
111 (sort (copy-sequence strings) 'string-lessp))) |
32304
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
112 (re (regexp-opt-group sorted-strings open))) |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
113 (if words (concat "\\<" re "\\>") re)))) |
18014 | 114 |
115 ;;;###autoload | |
116 (defun regexp-opt-depth (regexp) | |
117 "Return the depth of REGEXP. | |
58802
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
118 This means the number of non-shy regexp grouping constructs |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
119 \(parenthesised expressions) in REGEXP." |
18014 | 120 (save-match-data |
121 ;; Hack to signal an error if REGEXP does not have balanced parentheses. | |
122 (string-match regexp "") | |
123 ;; Count the number of open parentheses in REGEXP. | |
58802
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
124 (let ((count 0) start last) |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
125 (while (string-match "\\\\(\\(\\?:\\)?" regexp start) |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
126 (setq start (match-end 0)) ; Start of next search. |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
127 (when (and (not (match-beginning 1)) |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
128 (subregexp-context-p regexp (match-beginning 0) last)) |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
129 ;; It's not a shy group and it's not inside brackets or after |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
130 ;; a backslash: it's really a group-open marker. |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
131 (setq last start) ; Speed up next regexp-opt-re-context-p. |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
132 (setq count (1+ count)))) |
18014 | 133 count))) |
134 | |
135 ;;; Workhorse functions. | |
136 | |
137 (eval-when-compile | |
138 (require 'cl)) | |
139 | |
140 (defun regexp-opt-group (strings &optional paren lax) | |
49352
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
141 ;; Return a regexp to match a string in the sorted list STRINGS. |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
142 ;; If PAREN non-nil, output regexp parentheses around returned regexp. |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
143 ;; If LAX non-nil, don't output parentheses if it doesn't require them. |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
144 ;; Merges keywords to avoid backtracking in Emacs' regexp matcher. |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
145 |
49352
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
146 ;; The basic idea is to find the shortest common prefix or suffix, remove it |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
147 ;; and recurse. If there is no prefix, we divide the list into two so that |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
148 ;; \(at least) one half will have at least a one-character common prefix. |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
149 |
49352
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
150 ;; Also we delay the addition of grouping parenthesis as long as possible |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
151 ;; until we're sure we need them, and try to remove one-character sequences |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
152 ;; so we can use character sets rather than grouping parenthesis. |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
153 (let* ((open-group (cond ((stringp paren) paren) (paren "\\(?:") (t ""))) |
18014 | 154 (close-group (if paren "\\)" "")) |
155 (open-charset (if lax "" open-group)) | |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
156 (close-charset (if lax "" close-group))) |
18014 | 157 (cond |
25938
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
158 ;; |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
159 ;; If there are no strings, just return the empty string. |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
160 ((= (length strings) 0) |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
161 "") |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
162 ;; |
18014 | 163 ;; If there is only one string, just return it. |
164 ((= (length strings) 1) | |
165 (if (= (length (car strings)) 1) | |
166 (concat open-charset (regexp-quote (car strings)) close-charset) | |
167 (concat open-group (regexp-quote (car strings)) close-group))) | |
168 ;; | |
169 ;; If there is an empty string, remove it and recurse on the rest. | |
170 ((= (length (car strings)) 0) | |
171 (concat open-charset | |
172 (regexp-opt-group (cdr strings) t t) "?" | |
173 close-charset)) | |
174 ;; | |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
175 ;; If there are several one-char strings, use charsets |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
176 ((and (= (length (car strings)) 1) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
177 (let ((strs (cdr strings))) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
178 (while (and strs (/= (length (car strs)) 1)) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
179 (pop strs)) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
180 strs)) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
181 (let (letters rest) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
182 ;; Collect one-char strings |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
183 (dolist (s strings) |
30719
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
184 (if (= (length s) 1) (push (string-to-char s) letters) (push s rest))) |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
185 |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
186 (if rest |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
187 ;; several one-char strings: take them and recurse |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
188 ;; on the rest (first so as to match the longest). |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
189 (concat open-group |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
190 (regexp-opt-group (nreverse rest)) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
191 "\\|" (regexp-opt-charset letters) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
192 close-group) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
193 ;; all are one-char strings: just return a character set. |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
194 (concat open-charset |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
195 (regexp-opt-charset letters) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
196 close-charset)))) |
18014 | 197 ;; |
198 ;; We have a list of different length strings. | |
199 (t | |
45905
20781c152651
(regexp-opt-group): Don't cons uselessly.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
42625
diff
changeset
|
200 (let ((prefix (try-completion "" strings))) |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
201 (if (> (length prefix) 0) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
202 ;; common prefix: take it and recurse on the suffixes. |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
203 (let* ((n (length prefix)) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
204 (suffixes (mapcar (lambda (s) (substring s n)) strings))) |
32041
a055173cadf8
(regexp-opt-group): Put more parenthesis.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
30719
diff
changeset
|
205 (concat open-group |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
206 (regexp-quote prefix) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
207 (regexp-opt-group suffixes t t) |
32041
a055173cadf8
(regexp-opt-group): Put more parenthesis.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
30719
diff
changeset
|
208 close-group)) |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
209 |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
210 (let* ((sgnirts (mapcar (lambda (s) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
211 (concat (nreverse (string-to-list s)))) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
212 strings)) |
45905
20781c152651
(regexp-opt-group): Don't cons uselessly.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
42625
diff
changeset
|
213 (xiffus (try-completion "" sgnirts))) |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
214 (if (> (length xiffus) 0) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
215 ;; common suffix: take it and recurse on the prefixes. |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
216 (let* ((n (- (length xiffus))) |
33209
f94f82069336
(regexp-opt-group): Sort the strings when extracting a suffix.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32304
diff
changeset
|
217 (prefixes |
f94f82069336
(regexp-opt-group): Sort the strings when extracting a suffix.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32304
diff
changeset
|
218 ;; Sorting is necessary in cases such as ("ad" "d"). |
f94f82069336
(regexp-opt-group): Sort the strings when extracting a suffix.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32304
diff
changeset
|
219 (sort (mapcar (lambda (s) (substring s 0 n)) strings) |
f94f82069336
(regexp-opt-group): Sort the strings when extracting a suffix.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32304
diff
changeset
|
220 'string-lessp))) |
32041
a055173cadf8
(regexp-opt-group): Put more parenthesis.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
30719
diff
changeset
|
221 (concat open-group |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
222 (regexp-opt-group prefixes t t) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
223 (regexp-quote |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
224 (concat (nreverse (string-to-list xiffus)))) |
32041
a055173cadf8
(regexp-opt-group): Put more parenthesis.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
30719
diff
changeset
|
225 close-group)) |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
49352
diff
changeset
|
226 |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
227 ;; Otherwise, divide the list into those that start with a |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
228 ;; particular letter and those that do not, and recurse on them. |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
229 (let* ((char (char-to-string (string-to-char (car strings)))) |
45905
20781c152651
(regexp-opt-group): Don't cons uselessly.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
42625
diff
changeset
|
230 (half1 (all-completions char strings)) |
49352
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
231 (half2 (nthcdr (length half1) strings))) |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
232 (concat open-group |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
233 (regexp-opt-group half1) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
234 "\\|" (regexp-opt-group half2) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
235 close-group)))))))))) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
236 |
18014 | 237 |
238 (defun regexp-opt-charset (chars) | |
239 ;; | |
240 ;; Return a regexp to match a character in CHARS. | |
241 ;; | |
242 ;; The basic idea is to find character ranges. Also we take care in the | |
243 ;; position of character set meta characters in the character set regexp. | |
244 ;; | |
30719
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
245 (let* ((charmap (make-char-table 'case-table)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
246 (start -1) (end -2) |
18014 | 247 (charset "") |
248 (bracket "") (dash "") (caret "")) | |
249 ;; | |
250 ;; Make a character map but extract character set meta characters. | |
30719
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
251 (dolist (char chars) |
18149
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
252 (case char |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
253 (?\] |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
254 (setq bracket "]")) |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
255 (?^ |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
256 (setq caret "^")) |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
257 (?- |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
258 (setq dash "-")) |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
259 (otherwise |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
260 (aset charmap char t)))) |
18014 | 261 ;; |
262 ;; Make a character set from the map using ranges where applicable. | |
30719
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
263 (map-char-table |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
264 (lambda (c v) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
265 (when v |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
266 (if (= (1- c) end) (setq end c) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
267 (if (> end (+ start 2)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
268 (setq charset (format "%s%c-%c" charset start end)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
269 (while (>= end start) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
270 (setq charset (format "%s%c" charset start)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
271 (incf start))) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
272 (setq start c end c)))) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
273 charmap) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
274 (when (>= end start) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
275 (if (> end (+ start 2)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
276 (setq charset (format "%s%c-%c" charset start end)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
277 (while (>= end start) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
278 (setq charset (format "%s%c" charset start)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
279 (incf start)))) |
18014 | 280 ;; |
281 ;; Make sure a caret is not first and a dash is first or last. | |
282 (if (and (string-equal charset "") (string-equal bracket "")) | |
283 (concat "[" dash caret "]") | |
284 (concat "[" bracket charset caret dash "]")))) | |
285 | |
286 (provide 'regexp-opt) | |
287 | |
58802
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
288 ;; arch-tag: 6c5a66f4-29af-4fd6-8c3b-4b554d5b4370 |
18014 | 289 ;;; regexp-opt.el ends here |