Mercurial > emacs
annotate lisp/emacs-lisp/regexp-opt.el @ 99613:785924da433d
(Splitting Windows, Deleting Windows)
(Selecting Windows, Cyclic Window Ordering)
(Buffers and Windows, Displaying Buffers, Choosing Window)
(Dedicated Windows, Window Point, Window Start and End)
(Textual Scrolling, Vertical Scrolling, Horizontal Scrolling)
(Size of Window, Resizing Windows, Window Configurations)
(Window Parameters): Avoid @var at beginning of sentences and
reword accordingly.
author | Martin Rudalics <rudalics@gmx.at> |
---|---|
date | Sun, 16 Nov 2008 10:15:30 +0000 |
parents | 4ec6f3d46f27 |
children | a9dc0e7c3f2b |
rev | line source |
---|---|
38412
253f761ad37b
Some fixes to follow coding conventions in files maintained by FSF.
Pavel Janík <Pavel@Janik.cz>
parents:
33209
diff
changeset
|
1 ;;; regexp-opt.el --- generate efficient regexps to match strings |
18014 | 2 |
74466 | 3 ;; Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, |
79704 | 4 ;; 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. |
18014 | 5 |
25278 | 6 ;; Author: Simon Marshall <simon@gnu.org> |
27589 | 7 ;; Maintainer: FSF |
28420 | 8 ;; Keywords: strings, regexps, extensions |
18014 | 9 |
10 ;; This file is part of GNU Emacs. | |
11 | |
94655
90a2847062be
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93920
diff
changeset
|
12 ;; GNU Emacs is free software: you can redistribute it and/or modify |
18014 | 13 ;; it under the terms of the GNU General Public License as published by |
94655
90a2847062be
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93920
diff
changeset
|
14 ;; the Free Software Foundation, either version 3 of the License, or |
90a2847062be
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93920
diff
changeset
|
15 ;; (at your option) any later version. |
18014 | 16 |
17 ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ;; GNU General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
94655
90a2847062be
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93920
diff
changeset
|
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
18014 | 24 |
25 ;;; Commentary: | |
26 | |
25938
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
27 ;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i[sz]e\\)". |
18014 | 28 ;; |
18149
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
29 ;; This package generates a regexp from a given list of strings (which matches |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
30 ;; one of those strings) so that the regexp generated by: |
18014 | 31 ;; |
18149
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
32 ;; (regexp-opt strings) |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
33 ;; |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
34 ;; is equivalent to, but more efficient than, the regexp generated by: |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
35 ;; |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
36 ;; (mapconcat 'regexp-quote strings "\\|") |
18014 | 37 ;; |
38 ;; For example: | |
39 ;; | |
40 ;; (let ((strings '("cond" "if" "when" "unless" "while" | |
41 ;; "let" "let*" "progn" "prog1" "prog2" | |
42 ;; "save-restriction" "save-excursion" "save-window-excursion" | |
43 ;; "save-current-buffer" "save-match-data" | |
44 ;; "catch" "throw" "unwind-protect" "condition-case"))) | |
45 ;; (concat "(" (regexp-opt strings t) "\\>")) | |
46 ;; => "(\\(c\\(atch\\|ond\\(ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(less\\|wind-protect\\)\\|wh\\(en\\|ile\\)\\)\\>" | |
47 ;; | |
18149
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
48 ;; Searching using the above example `regexp-opt' regexp takes approximately |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
49 ;; two-thirds of the time taken using the equivalent `mapconcat' regexp. |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
50 |
18014 | 51 ;; Since this package was written to produce efficient regexps, not regexps |
52 ;; efficiently, it is probably not a good idea to in-line too many calls in | |
53 ;; your code, unless you use the following trick with `eval-when-compile': | |
54 ;; | |
55 ;; (defvar definition-regexp | |
56 ;; (eval-when-compile | |
57 ;; (concat "^(" | |
58 ;; (regexp-opt '("defun" "defsubst" "defmacro" "defalias" | |
59 ;; "defvar" "defconst") t) | |
60 ;; "\\>"))) | |
61 ;; | |
62 ;; The `byte-compile' code will be as if you had defined the variable thus: | |
63 ;; | |
64 ;; (defvar definition-regexp | |
65 ;; "^(\\(def\\(alias\\|const\\|macro\\|subst\\|un\\|var\\)\\)\\>") | |
66 ;; | |
18149
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
67 ;; Note that if you use this trick for all instances of `regexp-opt' and |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
68 ;; `regexp-opt-depth' in your code, regexp-opt.el would only have to be loaded |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
69 ;; at compile time. But note also that using this trick means that should |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
70 ;; regexp-opt.el be changed, perhaps to fix a bug or to add a feature to |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
71 ;; improve the efficiency of `regexp-opt' regexps, you would have to recompile |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
72 ;; your code for such changes to have effect in your code. |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
73 |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
74 ;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with |
25938
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
75 ;; thanks for ideas also to Michael Ernst, Bob Glickstein, Dan Nicolaescu and |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
76 ;; Stefan Monnier. |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
77 ;; No doubt `regexp-opt' doesn't always produce optimal regexps, so code, ideas |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
78 ;; or any other information to improve things are welcome. |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
79 ;; |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
80 ;; One possible improvement would be to compile '("aa" "ab" "ba" "bb") |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
81 ;; into "[ab][ab]" rather than "a[ab]\\|b[ab]". I'm not sure it's worth |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
82 ;; it but if someone knows how to do it without going through too many |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
83 ;; contortions, I'm all ears. |
18014 | 84 |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
85 ;;; Code: |
18014 | 86 |
87 ;;;###autoload | |
88 (defun regexp-opt (strings &optional paren) | |
74081
9faf984a83f8
(regexp-opt): Doc fix.
Juanma Barranquero <lekktu@gmail.com>
parents:
68648
diff
changeset
|
89 "Return a regexp to match a string in the list STRINGS. |
19782 | 90 Each string should be unique in STRINGS and should not contain any regexps, |
91 quoted or not. If optional PAREN is non-nil, ensure that the returned regexp | |
92 is enclosed by at least one regexp grouping construct. | |
18014 | 93 The returned regexp is typically more efficient than the equivalent regexp: |
94 | |
32304
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
95 (let ((open (if PAREN \"\\\\(\" \"\")) (close (if PAREN \"\\\\)\" \"\"))) |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
96 (concat open (mapconcat 'regexp-quote STRINGS \"\\\\|\") close)) |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
97 |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
98 If PAREN is `words', then the resulting regexp is additionally surrounded |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
99 by \\=\\< and \\>." |
18014 | 100 (save-match-data |
101 ;; Recurse on the sorted list. | |
93920
22cfb604a455
(regexp-opt): Reduce max-lisp-eval-depth and max-specpdl-size to
Chong Yidong <cyd@stupidchicken.com>
parents:
93892
diff
changeset
|
102 (let* ((max-lisp-eval-depth 10000) |
22cfb604a455
(regexp-opt): Reduce max-lisp-eval-depth and max-specpdl-size to
Chong Yidong <cyd@stupidchicken.com>
parents:
93892
diff
changeset
|
103 (max-specpdl-size 10000) |
32304
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
104 (completion-ignore-case nil) |
41754
e78fbcf9b878
(regexp-opt): Bind completion-regexp-list to nil.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
41617
diff
changeset
|
105 (completion-regexp-list nil) |
32304
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
106 (words (eq paren 'words)) |
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
107 (open (cond ((stringp paren) paren) (paren "\\("))) |
58802
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
108 (sorted-strings (delete-dups |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
109 (sort (copy-sequence strings) 'string-lessp))) |
95267
4ec6f3d46f27
(regexp-opt): Always return a properly-grouped regexp.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
94655
diff
changeset
|
110 (re (regexp-opt-group sorted-strings (or open t) (not open)))) |
32304
e8fca08bb4cc
(regexp-opt): Add \< and \> if PAREN=`words'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32041
diff
changeset
|
111 (if words (concat "\\<" re "\\>") re)))) |
18014 | 112 |
113 ;;;###autoload | |
114 (defun regexp-opt-depth (regexp) | |
115 "Return the depth of REGEXP. | |
58802
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
116 This means the number of non-shy regexp grouping constructs |
63512
8d59a5d179f2
(regexp-opt-depth): Fix spelling in docstrings.
Juanma Barranquero <lekktu@gmail.com>
parents:
58802
diff
changeset
|
117 \(parenthesized expressions) in REGEXP." |
18014 | 118 (save-match-data |
119 ;; Hack to signal an error if REGEXP does not have balanced parentheses. | |
120 (string-match regexp "") | |
121 ;; Count the number of open parentheses in REGEXP. | |
58802
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
122 (let ((count 0) start last) |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
123 (while (string-match "\\\\(\\(\\?:\\)?" regexp start) |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
124 (setq start (match-end 0)) ; Start of next search. |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
125 (when (and (not (match-beginning 1)) |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
126 (subregexp-context-p regexp (match-beginning 0) last)) |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
127 ;; It's not a shy group and it's not inside brackets or after |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
128 ;; a backslash: it's really a group-open marker. |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
129 (setq last start) ; Speed up next regexp-opt-re-context-p. |
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
130 (setq count (1+ count)))) |
18014 | 131 count))) |
132 | |
133 ;;; Workhorse functions. | |
134 | |
135 (eval-when-compile | |
136 (require 'cl)) | |
137 | |
138 (defun regexp-opt-group (strings &optional paren lax) | |
49352
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
139 ;; Return a regexp to match a string in the sorted list STRINGS. |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
140 ;; If PAREN non-nil, output regexp parentheses around returned regexp. |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
141 ;; If LAX non-nil, don't output parentheses if it doesn't require them. |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
142 ;; Merges keywords to avoid backtracking in Emacs' regexp matcher. |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
143 |
49352
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
144 ;; The basic idea is to find the shortest common prefix or suffix, remove it |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
145 ;; and recurse. If there is no prefix, we divide the list into two so that |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
146 ;; \(at least) one half will have at least a one-character common prefix. |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
147 |
49352
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
148 ;; Also we delay the addition of grouping parenthesis as long as possible |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
149 ;; until we're sure we need them, and try to remove one-character sequences |
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
150 ;; so we can use character sets rather than grouping parenthesis. |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
151 (let* ((open-group (cond ((stringp paren) paren) (paren "\\(?:") (t ""))) |
18014 | 152 (close-group (if paren "\\)" "")) |
153 (open-charset (if lax "" open-group)) | |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
154 (close-charset (if lax "" close-group))) |
18014 | 155 (cond |
25938
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
156 ;; |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
157 ;; If there are no strings, just return the empty string. |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
158 ((= (length strings) 0) |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
159 "") |
6f591e2d9c0d
(regexp-opt-try-suffix): New function.
Gerd Moellmann <gerd@gnu.org>
parents:
25278
diff
changeset
|
160 ;; |
18014 | 161 ;; If there is only one string, just return it. |
162 ((= (length strings) 1) | |
163 (if (= (length (car strings)) 1) | |
164 (concat open-charset (regexp-quote (car strings)) close-charset) | |
165 (concat open-group (regexp-quote (car strings)) close-group))) | |
166 ;; | |
167 ;; If there is an empty string, remove it and recurse on the rest. | |
168 ((= (length (car strings)) 0) | |
169 (concat open-charset | |
170 (regexp-opt-group (cdr strings) t t) "?" | |
171 close-charset)) | |
172 ;; | |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
173 ;; If there are several one-char strings, use charsets |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
174 ((and (= (length (car strings)) 1) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
175 (let ((strs (cdr strings))) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
176 (while (and strs (/= (length (car strs)) 1)) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
177 (pop strs)) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
178 strs)) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
179 (let (letters rest) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
180 ;; Collect one-char strings |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
181 (dolist (s strings) |
30719
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
182 (if (= (length s) 1) (push (string-to-char s) letters) (push s rest))) |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
183 |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
184 (if rest |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
185 ;; several one-char strings: take them and recurse |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
186 ;; on the rest (first so as to match the longest). |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
187 (concat open-group |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
188 (regexp-opt-group (nreverse rest)) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
189 "\\|" (regexp-opt-charset letters) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
190 close-group) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
191 ;; all are one-char strings: just return a character set. |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
192 (concat open-charset |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
193 (regexp-opt-charset letters) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
194 close-charset)))) |
18014 | 195 ;; |
196 ;; We have a list of different length strings. | |
197 (t | |
45905
20781c152651
(regexp-opt-group): Don't cons uselessly.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
42625
diff
changeset
|
198 (let ((prefix (try-completion "" strings))) |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
199 (if (> (length prefix) 0) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
200 ;; common prefix: take it and recurse on the suffixes. |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
201 (let* ((n (length prefix)) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
202 (suffixes (mapcar (lambda (s) (substring s n)) strings))) |
32041
a055173cadf8
(regexp-opt-group): Put more parenthesis.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
30719
diff
changeset
|
203 (concat open-group |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
204 (regexp-quote prefix) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
205 (regexp-opt-group suffixes t t) |
32041
a055173cadf8
(regexp-opt-group): Put more parenthesis.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
30719
diff
changeset
|
206 close-group)) |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
207 |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
208 (let* ((sgnirts (mapcar (lambda (s) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
209 (concat (nreverse (string-to-list s)))) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
210 strings)) |
45905
20781c152651
(regexp-opt-group): Don't cons uselessly.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
42625
diff
changeset
|
211 (xiffus (try-completion "" sgnirts))) |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
212 (if (> (length xiffus) 0) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
213 ;; common suffix: take it and recurse on the prefixes. |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
214 (let* ((n (- (length xiffus))) |
33209
f94f82069336
(regexp-opt-group): Sort the strings when extracting a suffix.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32304
diff
changeset
|
215 (prefixes |
f94f82069336
(regexp-opt-group): Sort the strings when extracting a suffix.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32304
diff
changeset
|
216 ;; Sorting is necessary in cases such as ("ad" "d"). |
f94f82069336
(regexp-opt-group): Sort the strings when extracting a suffix.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32304
diff
changeset
|
217 (sort (mapcar (lambda (s) (substring s 0 n)) strings) |
f94f82069336
(regexp-opt-group): Sort the strings when extracting a suffix.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
32304
diff
changeset
|
218 'string-lessp))) |
32041
a055173cadf8
(regexp-opt-group): Put more parenthesis.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
30719
diff
changeset
|
219 (concat open-group |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
220 (regexp-opt-group prefixes t t) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
221 (regexp-quote |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
222 (concat (nreverse (string-to-list xiffus)))) |
32041
a055173cadf8
(regexp-opt-group): Put more parenthesis.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
30719
diff
changeset
|
223 close-group)) |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
49352
diff
changeset
|
224 |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
225 ;; Otherwise, divide the list into those that start with a |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
226 ;; particular letter and those that do not, and recurse on them. |
93892
877d364e5ff6
(regexp-opt-group): Use substring-no-properties for correct handling
Chong Yidong <cyd@stupidchicken.com>
parents:
91327
diff
changeset
|
227 (let* ((char (substring-no-properties (car strings) 0 1)) |
45905
20781c152651
(regexp-opt-group): Don't cons uselessly.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
42625
diff
changeset
|
228 (half1 (all-completions char strings)) |
49352
8bfc6a0f6b3e
(regexp-opt-group): Undo last change. Fix the docstring instead.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
49335
diff
changeset
|
229 (half2 (nthcdr (length half1) strings))) |
28067
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
230 (concat open-group |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
231 (regexp-opt-group half1) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
232 "\\|" (regexp-opt-group half2) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
233 close-group)))))))))) |
e09db52da018
Update copyright and leading comment.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
27589
diff
changeset
|
234 |
18014 | 235 |
236 (defun regexp-opt-charset (chars) | |
237 ;; | |
238 ;; Return a regexp to match a character in CHARS. | |
239 ;; | |
240 ;; The basic idea is to find character ranges. Also we take care in the | |
241 ;; position of character set meta characters in the character set regexp. | |
242 ;; | |
30719
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
243 (let* ((charmap (make-char-table 'case-table)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
244 (start -1) (end -2) |
18014 | 245 (charset "") |
246 (bracket "") (dash "") (caret "")) | |
247 ;; | |
248 ;; Make a character map but extract character set meta characters. | |
30719
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
249 (dolist (char chars) |
18149
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
250 (case char |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
251 (?\] |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
252 (setq bracket "]")) |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
253 (?^ |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
254 (setq caret "^")) |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
255 (?- |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
256 (setq dash "-")) |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
257 (otherwise |
2fec7f622b82
emit charsets after strings so that the final regexp finds the longest match.
Simon Marshall <simon@gnu.org>
parents:
18014
diff
changeset
|
258 (aset charmap char t)))) |
18014 | 259 ;; |
260 ;; Make a character set from the map using ranges where applicable. | |
30719
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
261 (map-char-table |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
262 (lambda (c v) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
263 (when v |
88401
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
264 (if (consp c) |
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
265 (if (= (1- (car c)) end) (setq end (cdr c)) |
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
266 (if (> end (+ start 2)) |
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
267 (setq charset (format "%s%c-%c" charset start end)) |
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
268 (while (>= end start) |
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
269 (setq charset (format "%s%c" charset start)) |
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
270 (incf start))) |
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
271 (setq start (car c) end (cdr c))) |
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
272 (if (= (1- c) end) (setq end c) |
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
273 (if (> end (+ start 2)) |
30719
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
274 (setq charset (format "%s%c-%c" charset start end)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
275 (while (>= end start) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
276 (setq charset (format "%s%c" charset start)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
277 (incf start))) |
88401
8eba780a3a36
(regexp-opt-charset): Adjusted for the
Kenichi Handa <handa@m17n.org>
parents:
42625
diff
changeset
|
278 (setq start c end c))))) |
30719
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
279 charmap) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
280 (when (>= end start) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
281 (if (> end (+ start 2)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
282 (setq charset (format "%s%c-%c" charset start end)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
283 (while (>= end start) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
284 (setq charset (format "%s%c" charset start)) |
fd7db1cf7adf
(make-bool-vector): Remove.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
28864
diff
changeset
|
285 (incf start)))) |
18014 | 286 ;; |
287 ;; Make sure a caret is not first and a dash is first or last. | |
288 (if (and (string-equal charset "") (string-equal bracket "")) | |
289 (concat "[" dash caret "]") | |
290 (concat "[" bracket charset caret dash "]")))) | |
291 | |
292 (provide 'regexp-opt) | |
293 | |
58802
da163770bb89
(regexp-opt-depth): Use subregexp-context-p.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
52401
diff
changeset
|
294 ;; arch-tag: 6c5a66f4-29af-4fd6-8c3b-4b554d5b4370 |
18014 | 295 ;;; regexp-opt.el ends here |