Mercurial > emacs
annotate lisp/emacs-lisp/rx.el @ 112295:2108d829c749
* xfns.c (x_real_positions): Fix signedness of local var 'ign'.
XGetGeometry wants unsigned int *, not int *, for its last 4 args,
so change the type of 'ign' to unsigned int from int.
author | Paul Eggert <eggert@cs.ucla.edu> |
---|---|
date | Sun, 16 Jan 2011 23:45:28 -0800 |
parents | a28fc1581b94 |
children |
rev | line source |
---|---|
39516 | 1 ;;; rx.el --- sexp notation for regular expressions |
2 | |
68648
067115a6e738
Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents:
64751
diff
changeset
|
3 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, |
112122
a28fc1581b94
* lisp/emacs-lisp/rx.el (rx-repeat): Replace CL function.
Glenn Morris <rgm@gnu.org>
parents:
112038
diff
changeset
|
4 ;; 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. |
39516 | 5 |
6 ;; Author: Gerd Moellmann <gerd@gnu.org> | |
7 ;; Maintainer: FSF | |
8 ;; Keywords: strings, regexps, extensions | |
9 | |
10 ;; This file is part of GNU Emacs. | |
11 | |
94655
90a2847062be
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87649
diff
changeset
|
12 ;; GNU Emacs is free software: you can redistribute it and/or modify |
39516 | 13 ;; it under the terms of the GNU General Public License as published by |
94655
90a2847062be
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87649
diff
changeset
|
14 ;; the Free Software Foundation, either version 3 of the License, or |
90a2847062be
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87649
diff
changeset
|
15 ;; (at your option) any later version. |
39516 | 16 |
17 ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ;; GNU General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
94655
90a2847062be
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87649
diff
changeset
|
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
39516 | 24 |
25 ;;; Commentary: | |
26 | |
27 ;; This is another implementation of sexp-form regular expressions. | |
28 ;; It was unfortunately written without being aware of the Sregex | |
29 ;; package coming with Emacs, but as things stand, Rx completely | |
30 ;; covers all regexp features, which Sregex doesn't, doesn't suffer | |
31 ;; from the bugs mentioned in the commentary section of Sregex, and | |
32 ;; uses a nicer syntax (IMHO, of course :-). | |
33 | |
55102 | 34 ;; This significantly extended version of the original, is almost |
35 ;; compatible with Sregex. The only incompatibility I (fx) know of is | |
36 ;; that the `repeat' form can't have multiple regexp args. | |
37 | |
38 ;; Now alternative forms are provided for a degree of compatibility | |
39 ;; with Shivers' attempted definitive SRE notation | |
40 ;; <URL:http://www.ai.mit.edu/~/shivers/sre.txt>. SRE forms not | |
41 ;; catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>, | |
42 ;; ,<exp>, (word ...), word+, posix-string, and character class forms. | |
43 ;; Some forms are inconsistent with SRE, either for historical reasons | |
44 ;; or because of the implementation -- simple translation into Emacs | |
45 ;; regexp strings. These include: any, word. Also, case-sensitivity | |
46 ;; and greediness are controlled by variables external to the regexp, | |
47 ;; and you need to feed the forms to the `posix-' functions to get | |
48 ;; SRE's POSIX semantics. There are probably more difficulties. | |
49 | |
39516 | 50 ;; Rx translates a sexp notation for regular expressions into the |
51 ;; usual string notation. The translation can be done at compile-time | |
52 ;; by using the `rx' macro. It can be done at run-time by calling | |
53 ;; function `rx-to-string'. See the documentation of `rx' for a | |
54 ;; complete description of the sexp notation. | |
55 ;; | |
56 ;; Some examples of string regexps and their sexp counterparts: | |
57 ;; | |
58 ;; "^[a-z]*" | |
59 ;; (rx (and line-start (0+ (in "a-z")))) | |
60 ;; | |
61 ;; "\n[^ \t]" | |
62 ;; (rx (and "\n" (not blank))), or | |
63 ;; (rx (and "\n" (not (any " \t")))) | |
64 ;; | |
65 ;; "\\*\\*\\* EOOH \\*\\*\\*\n" | |
66 ;; (rx "*** EOOH ***\n") | |
67 ;; | |
68 ;; "\\<\\(catch\\|finally\\)\\>[^_]" | |
69 ;; (rx (and word-start (submatch (or "catch" "finally")) word-end | |
70 ;; (not (any ?_)))) | |
71 ;; | |
72 ;; "[ \t\n]*:\\([^:]+\\|$\\)" | |
73 ;; (rx (and (zero-or-more (in " \t\n")) ":" | |
74 ;; (submatch (or line-end (one-or-more (not (any ?:))))))) | |
75 ;; | |
76 ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" | |
77 ;; (rx (and line-start | |
78 ;; "content-transfer-encoding:" | |
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
79 ;; (+ (? ?\n)) blank |
39516 | 80 ;; "quoted-printable" |
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
81 ;; (+ (? ?\n)) blank)) |
39516 | 82 ;; |
83 ;; (concat "^\\(?:" something-else "\\)") | |
84 ;; (rx (and line-start (eval something-else))), statically or | |
85 ;; (rx-to-string '(and line-start ,something-else)), dynamically. | |
86 ;; | |
87 ;; (regexp-opt '(STRING1 STRING2 ...)) | |
88 ;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically | |
89 ;; calls `regexp-opt' as needed. | |
90 ;; | |
91 ;; "^;;\\s-*\n\\|^\n" | |
92 ;; (rx (or (and line-start ";;" (0+ space) ?\n) | |
93 ;; (and line-start ?\n))) | |
94 ;; | |
95 ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " | |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
96 ;; (rx (and "$Id: " |
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
97 ;; (1+ (not (in " "))) |
39516 | 98 ;; " " |
99 ;; (submatch (1+ (not (in " ")))) | |
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
100 ;; " ")) |
39516 | 101 ;; |
102 ;; "\\\\\\\\\\[\\w+" | |
103 ;; (rx (and ?\\ ?\\ ?\[ (1+ word))) | |
104 ;; | |
105 ;; etc. | |
106 | |
107 ;;; History: | |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
108 ;; |
39516 | 109 |
110 ;;; Code: | |
111 | |
112 (defconst rx-constituents | |
113 '((and . (rx-and 1 nil)) | |
55102 | 114 (seq . and) ; SRE |
115 (: . and) ; SRE | |
116 (sequence . and) ; sregex | |
39516 | 117 (or . (rx-or 1 nil)) |
55102 | 118 (| . or) ; SRE |
39516 | 119 (not-newline . ".") |
55102 | 120 (nonl . not-newline) ; SRE |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
121 (anything . (rx-anything 0 nil)) |
55102 | 122 (any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
123 (any . ".") ; sregex |
39516 | 124 (in . any) |
55102 | 125 (char . any) ; sregex |
126 (not-char . (rx-not-char 1 nil rx-check-any)) ; sregex | |
39516 | 127 (not . (rx-not 1 1 rx-check-not)) |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
128 (repeat . (rx-repeat 2 nil)) |
55102 | 129 (= . (rx-= 2 nil)) ; SRE |
130 (>= . (rx->= 2 nil)) ; SRE | |
131 (** . (rx-** 2 nil)) ; SRE | |
132 (submatch . (rx-submatch 1 nil)) ; SRE | |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
133 (group . submatch) ; sregex |
55102 | 134 (zero-or-more . (rx-kleene 1 nil)) |
135 (one-or-more . (rx-kleene 1 nil)) | |
136 (zero-or-one . (rx-kleene 1 nil)) | |
137 (\? . zero-or-one) ; SRE | |
39516 | 138 (\?? . zero-or-one) |
55102 | 139 (* . zero-or-more) ; SRE |
39516 | 140 (*? . zero-or-more) |
141 (0+ . zero-or-more) | |
55102 | 142 (+ . one-or-more) ; SRE |
39516 | 143 (+? . one-or-more) |
144 (1+ . one-or-more) | |
145 (optional . zero-or-one) | |
55102 | 146 (opt . zero-or-one) ; sregex |
39516 | 147 (minimal-match . (rx-greedy 1 1)) |
148 (maximal-match . (rx-greedy 1 1)) | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
149 (backref . (rx-backref 1 1 rx-check-backref)) |
39516 | 150 (line-start . "^") |
55102 | 151 (bol . line-start) ; SRE |
39516 | 152 (line-end . "$") |
55102 | 153 (eol . line-end) ; SRE |
39516 | 154 (string-start . "\\`") |
55102 | 155 (bos . string-start) ; SRE |
156 (bot . string-start) ; sregex | |
39516 | 157 (string-end . "\\'") |
55102 | 158 (eos . string-end) ; SRE |
159 (eot . string-end) ; sregex | |
39516 | 160 (buffer-start . "\\`") |
161 (buffer-end . "\\'") | |
162 (point . "\\=") | |
163 (word-start . "\\<") | |
55102 | 164 (bow . word-start) ; SRE |
39516 | 165 (word-end . "\\>") |
55102 | 166 (eow . word-end) ; SRE |
39516 | 167 (word-boundary . "\\b") |
55102 | 168 (not-word-boundary . "\\B") ; sregex |
60930
a6ae354aa8ef
(rx-constituents): Add symbol-start and symbol-end.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
55103
diff
changeset
|
169 (symbol-start . "\\_<") |
a6ae354aa8ef
(rx-constituents): Add symbol-start and symbol-end.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
55103
diff
changeset
|
170 (symbol-end . "\\_>") |
39516 | 171 (syntax . (rx-syntax 1 1)) |
55102 | 172 (not-syntax . (rx-not-syntax 1 1)) ; sregex |
39516 | 173 (category . (rx-category 1 1 rx-check-category)) |
174 (eval . (rx-eval 1 1)) | |
175 (regexp . (rx-regexp 1 1 stringp)) | |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
176 (regex . regexp) ; sregex |
39516 | 177 (digit . "[[:digit:]]") |
55102 | 178 (numeric . digit) ; SRE |
179 (num . digit) ; SRE | |
180 (control . "[[:cntrl:]]") ; SRE | |
181 (cntrl . control) ; SRE | |
182 (hex-digit . "[[:xdigit:]]") ; SRE | |
183 (hex . hex-digit) ; SRE | |
184 (xdigit . hex-digit) ; SRE | |
185 (blank . "[[:blank:]]") ; SRE | |
186 (graphic . "[[:graph:]]") ; SRE | |
187 (graph . graphic) ; SRE | |
188 (printing . "[[:print:]]") ; SRE | |
189 (print . printing) ; SRE | |
190 (alphanumeric . "[[:alnum:]]") ; SRE | |
191 (alnum . alphanumeric) ; SRE | |
39516 | 192 (letter . "[[:alpha:]]") |
55102 | 193 (alphabetic . letter) ; SRE |
194 (alpha . letter) ; SRE | |
195 (ascii . "[[:ascii:]]") ; SRE | |
39516 | 196 (nonascii . "[[:nonascii:]]") |
55102 | 197 (lower . "[[:lower:]]") ; SRE |
198 (lower-case . lower) ; SRE | |
199 (punctuation . "[[:punct:]]") ; SRE | |
200 (punct . punctuation) ; SRE | |
201 (space . "[[:space:]]") ; SRE | |
202 (whitespace . space) ; SRE | |
203 (white . space) ; SRE | |
204 (upper . "[[:upper:]]") ; SRE | |
205 (upper-case . upper) ; SRE | |
206 (word . "[[:word:]]") ; inconsistent with SRE | |
207 (wordchar . word) ; sregex | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
208 (not-wordchar . "\\W")) |
39516 | 209 "Alist of sexp form regexp constituents. |
210 Each element of the alist has the form (SYMBOL . DEFN). | |
211 SYMBOL is a valid constituent of sexp regular expressions. | |
212 If DEFN is a string, SYMBOL is translated into DEFN. | |
213 If DEFN is a symbol, use the definition of DEFN, recursively. | |
214 Otherwise, DEFN must be a list (FUNCTION MIN-ARGS MAX-ARGS PREDICATE). | |
215 FUNCTION is used to produce code for SYMBOL. MIN-ARGS and MAX-ARGS | |
216 are the minimum and maximum number of arguments the function-form | |
217 sexp constituent SYMBOL may have in sexp regular expressions. | |
218 MAX-ARGS nil means no limit. PREDICATE, if specified, means that | |
219 all arguments must satisfy PREDICATE.") | |
220 | |
221 | |
222 (defconst rx-syntax | |
223 '((whitespace . ?-) | |
224 (punctuation . ?.) | |
225 (word . ?w) | |
226 (symbol . ?_) | |
227 (open-parenthesis . ?\() | |
228 (close-parenthesis . ?\)) | |
229 (expression-prefix . ?\') | |
230 (string-quote . ?\") | |
231 (paired-delimiter . ?$) | |
232 (escape . ?\\) | |
233 (character-quote . ?/) | |
234 (comment-start . ?<) | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
235 (comment-end . ?>) |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
236 (string-delimiter . ?|) |
55103
93f6ab2a0eb5
(rx-syntax): Move sregex style syntax to code.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
55102
diff
changeset
|
237 (comment-delimiter . ?!)) |
39516 | 238 "Alist mapping Rx syntax symbols to syntax characters. |
239 Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid | |
240 symbol in `(syntax SYMBOL)', and CHAR is the syntax character | |
241 corresponding to SYMBOL, as it would be used with \\s or \\S in | |
242 regular expressions.") | |
243 | |
244 | |
245 (defconst rx-categories | |
246 '((consonant . ?0) | |
247 (base-vowel . ?1) | |
248 (upper-diacritical-mark . ?2) | |
249 (lower-diacritical-mark . ?3) | |
250 (tone-mark . ?4) | |
251 (symbol . ?5) | |
252 (digit . ?6) | |
253 (vowel-modifying-diacritical-mark . ?7) | |
254 (vowel-sign . ?8) | |
255 (semivowel-lower . ?9) | |
256 (not-at-end-of-line . ?<) | |
257 (not-at-beginning-of-line . ?>) | |
258 (alpha-numeric-two-byte . ?A) | |
259 (chinse-two-byte . ?C) | |
260 (greek-two-byte . ?G) | |
261 (japanese-hiragana-two-byte . ?H) | |
262 (indian-two-byte . ?I) | |
263 (japanese-katakana-two-byte . ?K) | |
264 (korean-hangul-two-byte . ?N) | |
265 (cyrillic-two-byte . ?Y) | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
266 (combining-diacritic . ?^) |
39516 | 267 (ascii . ?a) |
268 (arabic . ?b) | |
269 (chinese . ?c) | |
270 (ethiopic . ?e) | |
271 (greek . ?g) | |
272 (korean . ?h) | |
273 (indian . ?i) | |
274 (japanese . ?j) | |
275 (japanese-katakana . ?k) | |
276 (latin . ?l) | |
277 (lao . ?o) | |
278 (tibetan . ?q) | |
279 (japanese-roman . ?r) | |
280 (thai . ?t) | |
281 (vietnamese . ?v) | |
282 (hebrew . ?w) | |
283 (cyrillic . ?y) | |
284 (can-break . ?|)) | |
285 "Alist mapping symbols to category characters. | |
286 Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid | |
287 symbol in `(category SYMBOL)', and CHAR is the category character | |
288 corresponding to SYMBOL, as it would be used with `\\c' or `\\C' in | |
289 regular expression strings.") | |
290 | |
291 | |
292 (defvar rx-greedy-flag t | |
293 "Non-nil means produce greedy regular expressions for `zero-or-one', | |
294 `zero-or-more', and `one-or-more'. Dynamically bound.") | |
295 | |
296 | |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
297 (defun rx-info (op head) |
39516 | 298 "Return parsing/code generation info for OP. |
299 If OP is the space character ASCII 32, return info for the symbol `?'. | |
300 If OP is the character `?', return info for the symbol `??'. | |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
301 See also `rx-constituents'. |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
302 If HEAD is non-nil, then OP is the head of a sexp, otherwise it's |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
303 a standalone symbol." |
39516 | 304 (cond ((eq op ? ) (setq op '\?)) |
305 ((eq op ??) (setq op '\??))) | |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
306 (let (old-op) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
307 (while (and (not (null op)) (symbolp op)) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
308 (setq old-op op) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
309 (setq op (cdr (assq op rx-constituents))) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
310 (when (if head (stringp op) (consp op)) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
311 ;; We found something but of the wrong kind. Let's look for an |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
312 ;; alternate definition for the other case. |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
313 (let ((new-op |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
314 (cdr (assq old-op (cdr (memq (assq old-op rx-constituents) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
315 rx-constituents)))))) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
316 (if (and new-op (not (if head (stringp new-op) (consp new-op)))) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
317 (setq op new-op)))))) |
39516 | 318 op) |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
319 |
39516 | 320 |
321 (defun rx-check (form) | |
322 "Check FORM according to its car's parsing info." | |
55102 | 323 (unless (listp form) |
324 (error "rx `%s' needs argument(s)" form)) | |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
325 (let* ((rx (rx-info (car form) 'head)) |
39516 | 326 (nargs (1- (length form))) |
327 (min-args (nth 1 rx)) | |
328 (max-args (nth 2 rx)) | |
329 (type-pred (nth 3 rx))) | |
330 (when (and (not (null min-args)) | |
331 (< nargs min-args)) | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
332 (error "rx form `%s' requires at least %d args" |
39516 | 333 (car form) min-args)) |
334 (when (and (not (null max-args)) | |
335 (> nargs max-args)) | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
336 (error "rx form `%s' accepts at most %d args" |
39516 | 337 (car form) max-args)) |
338 (when (not (null type-pred)) | |
339 (dolist (sub-form (cdr form)) | |
340 (unless (funcall type-pred sub-form) | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
341 (error "rx form `%s' requires args satisfying `%s'" |
39516 | 342 (car form) type-pred)))))) |
343 | |
344 | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
345 (defun rx-group-if (regexp group) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
346 "Put shy groups around REGEXP if seemingly necessary when GROUP |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
347 is non-nil." |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
348 (cond |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
349 ;; for some repetition |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
350 ((eq group '*) (if (rx-atomic-p regexp) (setq group nil))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
351 ;; for concatenation |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
352 ((eq group ':) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
353 (if (rx-atomic-p |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
354 (if (string-match |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
355 "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
356 (substring regexp 0 (match-beginning 0)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
357 regexp)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
358 (setq group nil))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
359 ;; for OR |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
360 ((eq group '|) (setq group nil)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
361 ;; do anyway |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
362 ((eq group t)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
363 ((rx-atomic-p regexp t) (setq group nil))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
364 (if group |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
365 (concat "\\(?:" regexp "\\)") |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
366 regexp)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
367 |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
368 |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
369 (defvar rx-parent) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
370 ;; dynamically bound in some functions. |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
371 |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
372 |
39516 | 373 (defun rx-and (form) |
374 "Parse and produce code from FORM. | |
375 FORM is of the form `(and FORM1 ...)'." | |
376 (rx-check form) | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
377 (rx-group-if |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
378 (mapconcat (lambda (x) (rx-form x ':)) (cdr form) nil) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
379 (and (memq rx-parent '(* t)) rx-parent))) |
39516 | 380 |
381 | |
382 (defun rx-or (form) | |
383 "Parse and produce code from FORM, which is `(or FORM1 ...)'." | |
384 (rx-check form) | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
385 (rx-group-if |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
386 (if (memq nil (mapcar 'stringp (cdr form))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
387 (mapconcat (lambda (x) (rx-form x '|)) (cdr form) "\\|") |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
388 (regexp-opt (cdr form))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
389 (and (memq rx-parent '(: * t)) rx-parent))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
390 |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
391 |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
392 (defun rx-anything (form) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
393 "Match any character." |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
394 (if (consp form) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
395 (error "rx `anythng' syntax error: %s" form)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
396 (rx-or (list 'or 'not-newline ?\n))) |
39516 | 397 |
398 | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
399 (defun rx-any-delete-from-range (char ranges) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
400 "Delete by side effect character CHAR from RANGES. |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
401 Only both edges of each range is checked." |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
402 (let (m) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
403 (cond |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
404 ((memq char ranges) (setq ranges (delq char ranges))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
405 ((setq m (assq char ranges)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
406 (if (eq (1+ char) (cdr m)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
407 (setcar (memq m ranges) (1+ char)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
408 (setcar m (1+ char)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
409 ((setq m (rassq char ranges)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
410 (if (eq (1- char) (car m)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
411 (setcar (memq m ranges) (1- char)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
412 (setcdr m (1- char))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
413 ranges)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
414 |
112122
a28fc1581b94
* lisp/emacs-lisp/rx.el (rx-repeat): Replace CL function.
Glenn Morris <rgm@gnu.org>
parents:
112038
diff
changeset
|
415 |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
416 (defun rx-any-condense-range (args) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
417 "Condense by side effect ARGS as range for Rx `any'." |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
418 (let (str |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
419 l) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
420 ;; set STR list of all strings |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
421 ;; set L list of all ranges |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
422 (mapc (lambda (e) (cond ((stringp e) (push e str)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
423 ((numberp e) (push (cons e e) l)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
424 (t (push e l)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
425 args) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
426 ;; condense overlapped ranges in L |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
427 (let ((tail (setq l (sort l #'car-less-than-car))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
428 d) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
429 (while (setq d (cdr tail)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
430 (if (>= (cdar tail) (1- (caar d))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
431 (progn |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
432 (setcdr (car tail) (max (cdar tail) (cdar d))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
433 (setcdr tail (cdr d))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
434 (setq tail d)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
435 ;; Separate small ranges to single number, and delete dups. |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
436 (nconc |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
437 (apply #'nconc |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
438 (mapcar (lambda (e) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
439 (cond |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
440 ((= (car e) (cdr e)) (list (car e))) |
110231
6d4cb5e730cb
* emacs-lisp/rx.el (rx-any): Don't explode ranges that end in - or ].
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
109285
diff
changeset
|
441 ((= (1+ (car e)) (cdr e)) (list (car e) (cdr e))) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
442 ((list e)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
443 l)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
444 (delete-dups str)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
445 |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
446 |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
447 (defun rx-check-any-string (str) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
448 "Check string argument STR for Rx `any'." |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
449 (let ((i 0) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
450 c1 c2 l) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
451 (if (= 0 (length str)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
452 (error "String arg for Rx `any' must not be empty")) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
453 (while (string-match ".-." str i) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
454 ;; string before range: convert it to characters |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
455 (if (< i (match-beginning 0)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
456 (setq l (nconc |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
457 l |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
458 (append (substring str i (match-beginning 0)) nil)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
459 ;; range |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
460 (setq i (match-end 0) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
461 c1 (aref str (match-beginning 0)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
462 c2 (aref str (1- i))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
463 (cond |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
464 ((< c1 c2) (setq l (nconc l (list (cons c1 c2))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
465 ((= c1 c2) (setq l (nconc l (list c1)))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
466 ;; rest? |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
467 (if (< i (length str)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
468 (setq l (nconc l (append (substring str i) nil)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
469 l)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
470 |
39516 | 471 |
472 (defun rx-check-any (arg) | |
473 "Check arg ARG for Rx `any'." | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
474 (cond |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
475 ((integerp arg) (list arg)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
476 ((symbolp arg) |
55102 | 477 (let ((translation (condition-case nil |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
478 (rx-form arg) |
55102 | 479 (error nil)))) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
480 (if (or (null translation) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
481 (null (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'" translation))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
482 (error "Invalid char class `%s' in Rx `any'" arg)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
483 (list (substring translation 1 -1)))) ; strip outer brackets |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
484 ((and (integerp (car-safe arg)) (integerp (cdr-safe arg))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
485 (list arg)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
486 ((stringp arg) (rx-check-any-string arg)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
487 ((error |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
488 "rx `any' requires string, character, char pair or char class args")))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
489 |
39516 | 490 |
491 (defun rx-any (form) | |
55102 | 492 "Parse and produce code from FORM, which is `(any ARG ...)'. |
493 ARG is optional." | |
39516 | 494 (rx-check form) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
495 (let* ((args (rx-any-condense-range |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
496 (apply |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
497 #'nconc |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
498 (mapcar #'rx-check-any (cdr form))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
499 m |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
500 s) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
501 (cond |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
502 ;; single close bracket |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
503 ;; => "[]...-]" or "[]...--.]" |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
504 ((memq ?\] args) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
505 ;; set ] at the beginning |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
506 (setq args (cons ?\] (delq ?\] args))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
507 ;; set - at the end |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
508 (if (or (memq ?- args) (assq ?- args)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
509 (setq args (nconc (rx-any-delete-from-range ?- args) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
510 (list ?-))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
511 ;; close bracket starts a range |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
512 ;; => "[]-....-]" or "[]-.--....]" |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
513 ((setq m (assq ?\] args)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
514 ;; bring it to the beginning |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
515 (setq args (cons m (delq m args))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
516 (cond ((memq ?- args) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
517 ;; to the end |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
518 (setq args (nconc (delq ?- args) (list ?-)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
519 ((setq m (assq ?- args)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
520 ;; next to the bracket's range, make the second range |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
521 (setcdr args (cons m (delq m args)))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
522 ;; bracket in the end range |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
523 ;; => "[]...-]" |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
524 ((setq m (rassq ?\] args)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
525 ;; set ] at the beginning |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
526 (setq args (cons ?\] (rx-any-delete-from-range ?\] args))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
527 ;; set - at the end |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
528 (if (or (memq ?- args) (assq ?- args)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
529 (setq args (nconc (rx-any-delete-from-range ?- args) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
530 (list ?-))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
531 ;; {no close bracket appears} |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
532 ;; |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
533 ;; bring single bar to the beginning |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
534 ((memq ?- args) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
535 (setq args (cons ?- (delq ?- args)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
536 ;; bar start a range, bring it to the beginning |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
537 ((setq m (assq ?- args)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
538 (setq args (cons m (delq m args)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
539 ;; |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
540 ;; hat at the beginning? |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
541 ((or (eq (car args) ?^) (eq (car-safe (car args)) ?^)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
542 (setq args (if (cdr args) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
543 `(,(cadr args) ,(car args) ,@(cddr args)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
544 (nconc (rx-any-delete-from-range ?^ args) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
545 (list ?^)))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
546 ;; some 1-char? |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
547 (if (and (null (cdr args)) (numberp (car args)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
548 (or (= 1 (length |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
549 (setq s (regexp-quote (string (car args)))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
550 (and (equal (car args) ?^) ;; unnecessary predicate? |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
551 (null (eq rx-parent '!))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
552 s |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
553 (concat "[" |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
554 (mapconcat |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
555 (lambda (e) (cond |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
556 ((numberp e) (string e)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
557 ((consp e) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
558 (if (and (= (1+ (car e)) (cdr e)) |
110231
6d4cb5e730cb
* emacs-lisp/rx.el (rx-any): Don't explode ranges that end in - or ].
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
109285
diff
changeset
|
559 ;; rx-any-condense-range should |
6d4cb5e730cb
* emacs-lisp/rx.el (rx-any): Don't explode ranges that end in - or ].
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
109285
diff
changeset
|
560 ;; prevent this case from happening. |
6d4cb5e730cb
* emacs-lisp/rx.el (rx-any): Don't explode ranges that end in - or ].
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
109285
diff
changeset
|
561 (null (memq (car e) '(?\] ?-))) |
6d4cb5e730cb
* emacs-lisp/rx.el (rx-any): Don't explode ranges that end in - or ].
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
109285
diff
changeset
|
562 (null (memq (cdr e) '(?\] ?-)))) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
563 (string (car e) (cdr e)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
564 (string (car e) ?- (cdr e)))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
565 (e))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
566 args |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
567 nil) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
568 "]")))) |
39516 | 569 |
570 | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
571 (defun rx-check-not (arg) |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
572 "Check arg ARG for Rx `not'." |
55102 | 573 (unless (or (and (symbolp arg) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
574 (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'" |
55102 | 575 (condition-case nil |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
576 (rx-form arg) |
55102 | 577 (error "")))) |
112122
a28fc1581b94
* lisp/emacs-lisp/rx.el (rx-repeat): Replace CL function.
Glenn Morris <rgm@gnu.org>
parents:
112038
diff
changeset
|
578 (eq arg 'word-boundary) |
55102 | 579 (and (consp arg) |
580 (memq (car arg) '(not any in syntax category)))) | |
581 (error "rx `not' syntax error: %s" arg)) | |
582 t) | |
39516 | 583 |
584 | |
585 (defun rx-not (form) | |
586 "Parse and produce code from FORM. FORM is `(not ...)'." | |
587 (rx-check form) | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
588 (let ((result (rx-form (cadr form) '!)) |
53974
818e19ae4c5a
(rx-not): Bind case-fold-search to nil.
Eli Zaretskii <eliz@is.elta.co.il>
parents:
52971
diff
changeset
|
589 case-fold-search) |
39516 | 590 (cond ((string-match "\\`\\[^" result) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
591 (cond |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
592 ((equal result "[^]") "[^^]") |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
593 ((and (= (length result) 4) (null (eq rx-parent '!))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
594 (regexp-quote (substring result 2 3))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
595 ((concat "[" (substring result 2))))) |
55102 | 596 ((eq ?\[ (aref result 0)) |
39516 | 597 (concat "[^" (substring result 1))) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
598 ((string-match "\\`\\\\[scbw]" result) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
599 (concat (upcase (substring result 0 2)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
600 (substring result 2))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
601 ((string-match "\\`\\\\[SCBW]" result) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
602 (concat (downcase (substring result 0 2)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
603 (substring result 2))) |
39516 | 604 (t |
605 (concat "[^" result "]"))))) | |
606 | |
607 | |
55102 | 608 (defun rx-not-char (form) |
609 "Parse and produce code from FORM. FORM is `(not-char ...)'." | |
610 (rx-check form) | |
611 (rx-not `(not (in ,@(cdr form))))) | |
612 | |
613 | |
614 (defun rx-not-syntax (form) | |
615 "Parse and produce code from FORM. FORM is `(not-syntax SYNTAX)'." | |
616 (rx-check form) | |
617 (rx-not `(not (syntax ,@(cdr form))))) | |
618 | |
619 | |
620 (defun rx-trans-forms (form &optional skip) | |
621 "If FORM's length is greater than two, transform it to length two. | |
622 A form (HEAD REST ...) becomes (HEAD (and REST ...)). | |
623 If SKIP is non-nil, allow that number of items after the head, i.e. | |
624 `(= N REST ...)' becomes `(= N (and REST ...))' if SKIP is 1." | |
625 (unless skip (setq skip 0)) | |
626 (let ((tail (nthcdr (1+ skip) form))) | |
627 (if (= (length tail) 1) | |
628 form | |
629 (let ((form (copy-sequence form))) | |
630 (setcdr (nthcdr skip form) (list (cons 'and tail))) | |
631 form)))) | |
632 | |
633 | |
634 (defun rx-= (form) | |
635 "Parse and produce code from FORM `(= N ...)'." | |
636 (rx-check form) | |
637 (setq form (rx-trans-forms form 1)) | |
638 (unless (and (integerp (nth 1 form)) | |
639 (> (nth 1 form) 0)) | |
640 (error "rx `=' requires positive integer first arg")) | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
641 (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form))) |
55102 | 642 |
643 | |
644 (defun rx->= (form) | |
645 "Parse and produce code from FORM `(>= N ...)'." | |
646 (rx-check form) | |
647 (setq form (rx-trans-forms form 1)) | |
648 (unless (and (integerp (nth 1 form)) | |
649 (> (nth 1 form) 0)) | |
650 (error "rx `>=' requires positive integer first arg")) | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
651 (format "%s\\{%d,\\}" (rx-form (nth 2 form) '*) (nth 1 form))) |
55102 | 652 |
653 | |
654 (defun rx-** (form) | |
655 "Parse and produce code from FORM `(** N M ...)'." | |
656 (rx-check form) | |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
657 (rx-form (cons 'repeat (cdr (rx-trans-forms form 2))) '*)) |
55102 | 658 |
659 | |
39516 | 660 (defun rx-repeat (form) |
661 "Parse and produce code from FORM. | |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
662 FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'." |
39516 | 663 (rx-check form) |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
664 (if (> (length form) 4) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
665 (setq form (rx-trans-forms form 2))) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
666 (if (null (nth 2 form)) |
112122
a28fc1581b94
* lisp/emacs-lisp/rx.el (rx-repeat): Replace CL function.
Glenn Morris <rgm@gnu.org>
parents:
112038
diff
changeset
|
667 (setq form (cons (nth 0 form) (cons (nth 1 form) (nthcdr 3 form))))) |
39516 | 668 (cond ((= (length form) 3) |
669 (unless (and (integerp (nth 1 form)) | |
670 (> (nth 1 form) 0)) | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
671 (error "rx `repeat' requires positive integer first arg")) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
672 (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form))) |
39516 | 673 ((or (not (integerp (nth 2 form))) |
674 (< (nth 2 form) 0) | |
675 (not (integerp (nth 1 form))) | |
676 (< (nth 1 form) 0) | |
677 (< (nth 2 form) (nth 1 form))) | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
678 (error "rx `repeat' range error")) |
39516 | 679 (t |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
680 (format "%s\\{%d,%d\\}" (rx-form (nth 3 form) '*) |
39516 | 681 (nth 1 form) (nth 2 form))))) |
682 | |
683 | |
684 (defun rx-submatch (form) | |
685 "Parse and produce code from FORM, which is `(submatch ...)'." | |
100293
bb1d2d686d04
(rx-submatch): Pass : to rx-form (bug#1518).
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
98557
diff
changeset
|
686 (concat "\\(" |
bb1d2d686d04
(rx-submatch): Pass : to rx-form (bug#1518).
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
98557
diff
changeset
|
687 (if (= 2 (length form)) |
bb1d2d686d04
(rx-submatch): Pass : to rx-form (bug#1518).
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
98557
diff
changeset
|
688 ;; Only one sub-form. |
bb1d2d686d04
(rx-submatch): Pass : to rx-form (bug#1518).
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
98557
diff
changeset
|
689 (rx-form (cadr form)) |
bb1d2d686d04
(rx-submatch): Pass : to rx-form (bug#1518).
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
98557
diff
changeset
|
690 ;; Several sub-forms implicitly concatenated. |
bb1d2d686d04
(rx-submatch): Pass : to rx-form (bug#1518).
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
98557
diff
changeset
|
691 (mapconcat (lambda (re) (rx-form re ':)) (cdr form) nil)) |
bb1d2d686d04
(rx-submatch): Pass : to rx-form (bug#1518).
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
98557
diff
changeset
|
692 "\\)")) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
693 |
39516 | 694 |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
695 (defun rx-backref (form) |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
696 "Parse and produce code from FORM, which is `(backref N)'." |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
697 (rx-check form) |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
698 (format "\\%d" (nth 1 form))) |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
699 |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
700 (defun rx-check-backref (arg) |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
701 "Check arg ARG for Rx `backref'." |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
702 (or (and (integerp arg) (>= arg 1) (<= arg 9)) |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
703 (error "rx `backref' requires numeric 1<=arg<=9: %s" arg))) |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
704 |
39516 | 705 (defun rx-kleene (form) |
706 "Parse and produce code from FORM. | |
707 FORM is `(OP FORM1)', where OP is one of the `zero-or-one', | |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
708 `zero-or-more' etc. operators. |
39516 | 709 If OP is one of `*', `+', `?', produce a greedy regexp. |
710 If OP is one of `*?', `+?', `??', produce a non-greedy regexp. | |
711 If OP is anything else, produce a greedy regexp if `rx-greedy-flag' | |
712 is non-nil." | |
713 (rx-check form) | |
55102 | 714 (setq form (rx-trans-forms form)) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
715 (let ((suffix (cond ((memq (car form) '(* + ?\s)) "") |
39516 | 716 ((memq (car form) '(*? +? ??)) "?") |
717 (rx-greedy-flag "") | |
718 (t "?"))) | |
719 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") | |
720 ((memq (car form) '(+ +? 1+ one-or-more)) "+") | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
721 (t "?")))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
722 (rx-group-if |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
723 (concat (rx-form (cadr form) '*) op suffix) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
724 (and (memq rx-parent '(t *)) rx-parent)))) |
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
725 |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
726 |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
727 (defun rx-atomic-p (r &optional lax) |
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
728 "Return non-nil if regexp string R is atomic. |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
729 An atomic regexp R is one such that a suffix operator |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
730 appended to R will apply to all of R. For example, \"a\" |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
731 \"[abc]\" and \"\\(ab\\|ab*c\\)\" are atomic and \"ab\", |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
732 \"[ab]c\", and \"ab\\|ab*c\" are not atomic. |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
733 |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
734 This function may return false negatives, but it will not |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
735 return false positives. It is nevertheless useful in |
78474
88c9f4e4160e
Replace `iff' in doc-strings and comments.
Glenn Morris <rgm@gnu.org>
parents:
78217
diff
changeset
|
736 situations where an efficiency shortcut can be taken only if a |
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
737 regexp is atomic. The function can be improved to detect |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
738 more cases of atomic regexps. Presently, this function |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
739 detects the following categories of atomic regexp; |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
740 |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
741 a group or shy group: \\(...\\) |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
742 a character class: [...] |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
743 a single character: a |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
744 |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
745 On the other hand, false negatives will be returned for |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
746 regexps that are atomic but end in operators, such as |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
747 \"a+\". I think these are rare. Probably such cases could |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
748 be detected without much effort. A guarantee of no false |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
749 negatives would require a theoretic specification of the set |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
750 of all atomic regexps." |
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
751 (let ((l (length r))) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
752 (cond |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
753 ((<= l 1)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
754 ((= l 2) (= (aref r 0) ?\\)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
755 ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
756 ((null lax) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
757 (cond |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
758 ((string-match "\\`\\[^?\]?\\(?:\\[:[a-z]+:]\\|[^\]]\\)*\\]\\'" r)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
759 ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^\)]\\)*\\\\)\\'" r))))))) |
39516 | 760 |
761 | |
762 (defun rx-syntax (form) | |
763 "Parse and produce code from FORM, which is `(syntax SYMBOL)'." | |
764 (rx-check form) | |
55103
93f6ab2a0eb5
(rx-syntax): Move sregex style syntax to code.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
55102
diff
changeset
|
765 (let* ((sym (cadr form)) |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
766 (syntax (cdr (assq sym rx-syntax)))) |
39516 | 767 (unless syntax |
55103
93f6ab2a0eb5
(rx-syntax): Move sregex style syntax to code.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
55102
diff
changeset
|
768 ;; Try sregex compatibility. |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
769 (cond |
112038
ad2a8fce0d7a
* lisp/emacs-lisp/rx.el (rx-syntax): Fix typo.
Andreas Schwab <schwab@linux-m68k.org>
parents:
112017
diff
changeset
|
770 ((characterp sym) (setq syntax sym)) |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
771 ((symbolp sym) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
772 (let ((name (symbol-name sym))) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
773 (if (= 1 (length name)) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
774 (setq syntax (aref name 0)))))) |
55103
93f6ab2a0eb5
(rx-syntax): Move sregex style syntax to code.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
55102
diff
changeset
|
775 (unless syntax |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
776 (error "Unknown rx syntax `%s'" sym))) |
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
777 (format "\\s%c" syntax))) |
39516 | 778 |
779 | |
780 (defun rx-check-category (form) | |
781 "Check the argument FORM of a `(category FORM)'." | |
782 (unless (or (integerp form) | |
783 (cdr (assq form rx-categories))) | |
784 (error "Unknown category `%s'" form)) | |
785 t) | |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
786 |
39516 | 787 |
788 (defun rx-category (form) | |
55102 | 789 "Parse and produce code from FORM, which is `(category SYMBOL)'." |
39516 | 790 (rx-check form) |
791 (let ((char (if (integerp (cadr form)) | |
792 (cadr form) | |
793 (cdr (assq (cadr form) rx-categories))))) | |
794 (format "\\c%c" char))) | |
795 | |
796 | |
797 (defun rx-eval (form) | |
798 "Parse and produce code from FORM, which is `(eval FORM)'." | |
799 (rx-check form) | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
800 (rx-form (eval (cadr form)) rx-parent)) |
39516 | 801 |
802 | |
803 (defun rx-greedy (form) | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
804 "Parse and produce code from FORM. |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
805 If FORM is '(minimal-match FORM1)', non-greedy versions of `*', |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
806 `+', and `?' operators will be used in FORM1. If FORM is |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
807 '(maximal-match FORM1)', greedy operators will be used." |
39516 | 808 (rx-check form) |
809 (let ((rx-greedy-flag (eq (car form) 'maximal-match))) | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
810 (rx-form (cadr form) rx-parent))) |
39516 | 811 |
812 | |
813 (defun rx-regexp (form) | |
814 "Parse and produce code from FORM, which is `(regexp STRING)'." | |
815 (rx-check form) | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
816 (rx-group-if (cadr form) rx-parent)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
817 |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
818 |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
819 (defun rx-form (form &optional rx-parent) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
820 "Parse and produce code for regular expression FORM. |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
821 FORM is a regular expression in sexp form. |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
822 RX-PARENT shows which type of expression calls and controls putting of |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
823 shy groups around the result and some more in other functions." |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
824 (if (stringp form) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
825 (rx-group-if (regexp-quote form) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
826 (if (and (eq rx-parent '*) (< 1 (length form))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
827 rx-parent)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
828 (cond ((integerp form) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
829 (regexp-quote (char-to-string form))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
830 ((symbolp form) |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
831 (let ((info (rx-info form nil))) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
832 (cond ((stringp info) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
833 info) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
834 ((null info) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
835 (error "Unknown rx form `%s'" form)) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
836 (t |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
837 (funcall (nth 0 info) form))))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
838 ((consp form) |
112017
db006527425b
* lisp/emacs-lisp/rx.el: Make it a superset of sregex.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
110231
diff
changeset
|
839 (let ((info (rx-info (car form) 'head))) |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
840 (unless (consp info) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
841 (error "Unknown rx form `%s'" (car form))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
842 (funcall (nth 0 info) form))) |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
843 (t |
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
844 (error "rx syntax error at `%s'" form))))) |
39516 | 845 |
846 | |
847 ;;;###autoload | |
848 (defun rx-to-string (form &optional no-group) | |
849 "Parse and produce code for regular expression FORM. | |
850 FORM is a regular expression in sexp form. | |
851 NO-GROUP non-nil means don't put shy groups around the result." | |
98557
73eaaf9adee6
(rx-constituents): Change `anything' to call
Chong Yidong <cyd@stupidchicken.com>
parents:
98489
diff
changeset
|
852 (rx-group-if (rx-form form) (null no-group))) |
39516 | 853 |
854 | |
855 ;;;###autoload | |
55102 | 856 (defmacro rx (&rest regexps) |
857 "Translate regular expressions REGEXPS in sexp form to a regexp string. | |
858 REGEXPS is a non-empty sequence of forms of the sort listed below. | |
100727 | 859 |
860 Note that `rx' is a Lisp macro; when used in a Lisp program being | |
861 compiled, the translation is performed by the compiler. | |
862 See `rx-to-string' for how to do such a translation at run-time. | |
39516 | 863 |
864 The following are valid subforms of regular expressions in sexp | |
865 notation. | |
866 | |
867 STRING | |
868 matches string STRING literally. | |
869 | |
870 CHAR | |
871 matches character CHAR literally. | |
872 | |
55102 | 873 `not-newline', `nonl' |
39516 | 874 matches any character except a newline. |
83899
bdeef0472e21
(rx): Fix typo in docstring.
Michaël Cadilhac <michael.cadilhac@lrde.org>
parents:
82365
diff
changeset
|
875 |
39516 | 876 `anything' |
877 matches any character | |
878 | |
55102 | 879 `(any SET ...)' |
880 `(in SET ...)' | |
881 `(char SET ...)' | |
882 matches any character in SET .... SET may be a character or string. | |
39516 | 883 Ranges of characters can be specified as `A-Z' in strings. |
55102 | 884 Ranges may also be specified as conses like `(?A . ?Z)'. |
39516 | 885 |
55102 | 886 SET may also be the name of a character class: `digit', |
887 `control', `hex-digit', `blank', `graph', `print', `alnum', | |
888 `alpha', `ascii', `nonascii', `lower', `punct', `space', `upper', | |
889 `word', or one of their synonyms. | |
39516 | 890 |
55102 | 891 `(not (any SET ...))' |
892 matches any character not in SET ... | |
39516 | 893 |
55102 | 894 `line-start', `bol' |
39516 | 895 matches the empty string, but only at the beginning of a line |
896 in the text being matched | |
897 | |
55102 | 898 `line-end', `eol' |
39516 | 899 is similar to `line-start' but matches only at the end of a line |
900 | |
55102 | 901 `string-start', `bos', `bot' |
39516 | 902 matches the empty string, but only at the beginning of the |
903 string being matched against. | |
904 | |
55102 | 905 `string-end', `eos', `eot' |
39516 | 906 matches the empty string, but only at the end of the |
907 string being matched against. | |
908 | |
909 `buffer-start' | |
910 matches the empty string, but only at the beginning of the | |
55102 | 911 buffer being matched against. Actually equivalent to `string-start'. |
39516 | 912 |
913 `buffer-end' | |
914 matches the empty string, but only at the end of the | |
55102 | 915 buffer being matched against. Actually equivalent to `string-end'. |
39516 | 916 |
917 `point' | |
918 matches the empty string, but only at point. | |
919 | |
55102 | 920 `word-start', `bow' |
77829
d858d80ae609
Nikolaj Schumacher <n_schumacher at web.de> (tiny change)
Glenn Morris <rgm@gnu.org>
parents:
75346
diff
changeset
|
921 matches the empty string, but only at the beginning of a word. |
39516 | 922 |
55102 | 923 `word-end', `eow' |
39516 | 924 matches the empty string, but only at the end of a word. |
925 | |
926 `word-boundary' | |
927 matches the empty string, but only at the beginning or end of a | |
928 word. | |
929 | |
930 `(not word-boundary)' | |
55102 | 931 `not-word-boundary' |
39516 | 932 matches the empty string, but not at the beginning or end of a |
933 word. | |
934 | |
77829
d858d80ae609
Nikolaj Schumacher <n_schumacher at web.de> (tiny change)
Glenn Morris <rgm@gnu.org>
parents:
75346
diff
changeset
|
935 `symbol-start' |
d858d80ae609
Nikolaj Schumacher <n_schumacher at web.de> (tiny change)
Glenn Morris <rgm@gnu.org>
parents:
75346
diff
changeset
|
936 matches the empty string, but only at the beginning of a symbol. |
d858d80ae609
Nikolaj Schumacher <n_schumacher at web.de> (tiny change)
Glenn Morris <rgm@gnu.org>
parents:
75346
diff
changeset
|
937 |
d858d80ae609
Nikolaj Schumacher <n_schumacher at web.de> (tiny change)
Glenn Morris <rgm@gnu.org>
parents:
75346
diff
changeset
|
938 `symbol-end' |
d858d80ae609
Nikolaj Schumacher <n_schumacher at web.de> (tiny change)
Glenn Morris <rgm@gnu.org>
parents:
75346
diff
changeset
|
939 matches the empty string, but only at the end of a symbol. |
d858d80ae609
Nikolaj Schumacher <n_schumacher at web.de> (tiny change)
Glenn Morris <rgm@gnu.org>
parents:
75346
diff
changeset
|
940 |
55102 | 941 `digit', `numeric', `num' |
39516 | 942 matches 0 through 9. |
943 | |
55102 | 944 `control', `cntrl' |
39516 | 945 matches ASCII control characters. |
946 | |
55102 | 947 `hex-digit', `hex', `xdigit' |
39516 | 948 matches 0 through 9, a through f and A through F. |
949 | |
950 `blank' | |
951 matches space and tab only. | |
952 | |
55102 | 953 `graphic', `graph' |
39516 | 954 matches graphic characters--everything except ASCII control chars, |
955 space, and DEL. | |
956 | |
55102 | 957 `printing', `print' |
39516 | 958 matches printing characters--everything except ASCII control chars |
959 and DEL. | |
960 | |
55102 | 961 `alphanumeric', `alnum' |
39516 | 962 matches letters and digits. (But at present, for multibyte characters, |
963 it matches anything that has word syntax.) | |
964 | |
55102 | 965 `letter', `alphabetic', `alpha' |
39516 | 966 matches letters. (But at present, for multibyte characters, |
967 it matches anything that has word syntax.) | |
968 | |
969 `ascii' | |
970 matches ASCII (unibyte) characters. | |
971 | |
972 `nonascii' | |
973 matches non-ASCII (multibyte) characters. | |
974 | |
55102 | 975 `lower', `lower-case' |
39516 | 976 matches anything lower-case. |
977 | |
55102 | 978 `upper', `upper-case' |
39516 | 979 matches anything upper-case. |
980 | |
55102 | 981 `punctuation', `punct' |
39516 | 982 matches punctuation. (But at present, for multibyte characters, |
983 it matches anything that has non-word syntax.) | |
984 | |
55102 | 985 `space', `whitespace', `white' |
39516 | 986 matches anything that has whitespace syntax. |
987 | |
55102 | 988 `word', `wordchar' |
39516 | 989 matches anything that has word syntax. |
990 | |
55102 | 991 `not-wordchar' |
992 matches anything that has non-word syntax. | |
993 | |
39516 | 994 `(syntax SYNTAX)' |
995 matches a character with syntax SYNTAX. SYNTAX must be one | |
55102 | 996 of the following symbols, or a symbol corresponding to the syntax |
997 character, e.g. `\\.' for `\\s.'. | |
39516 | 998 |
999 `whitespace' (\\s- in string notation) | |
1000 `punctuation' (\\s.) | |
1001 `word' (\\sw) | |
1002 `symbol' (\\s_) | |
1003 `open-parenthesis' (\\s() | |
1004 `close-parenthesis' (\\s)) | |
1005 `expression-prefix' (\\s') | |
1006 `string-quote' (\\s\") | |
1007 `paired-delimiter' (\\s$) | |
1008 `escape' (\\s\\) | |
1009 `character-quote' (\\s/) | |
1010 `comment-start' (\\s<) | |
1011 `comment-end' (\\s>) | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
1012 `string-delimiter' (\\s|) |
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
1013 `comment-delimiter' (\\s!) |
39516 | 1014 |
1015 `(not (syntax SYNTAX))' | |
55102 | 1016 matches a character that doesn't have syntax SYNTAX. |
39516 | 1017 |
1018 `(category CATEGORY)' | |
1019 matches a character with category CATEGORY. CATEGORY must be | |
1020 either a character to use for C, or one of the following symbols. | |
1021 | |
1022 `consonant' (\\c0 in string notation) | |
1023 `base-vowel' (\\c1) | |
1024 `upper-diacritical-mark' (\\c2) | |
1025 `lower-diacritical-mark' (\\c3) | |
1026 `tone-mark' (\\c4) | |
1027 `symbol' (\\c5) | |
1028 `digit' (\\c6) | |
1029 `vowel-modifying-diacritical-mark' (\\c7) | |
1030 `vowel-sign' (\\c8) | |
1031 `semivowel-lower' (\\c9) | |
1032 `not-at-end-of-line' (\\c<) | |
1033 `not-at-beginning-of-line' (\\c>) | |
1034 `alpha-numeric-two-byte' (\\cA) | |
1035 `chinse-two-byte' (\\cC) | |
1036 `greek-two-byte' (\\cG) | |
1037 `japanese-hiragana-two-byte' (\\cH) | |
1038 `indian-tow-byte' (\\cI) | |
1039 `japanese-katakana-two-byte' (\\cK) | |
1040 `korean-hangul-two-byte' (\\cN) | |
1041 `cyrillic-two-byte' (\\cY) | |
55102 | 1042 `combining-diacritic' (\\c^) |
39516 | 1043 `ascii' (\\ca) |
1044 `arabic' (\\cb) | |
1045 `chinese' (\\cc) | |
1046 `ethiopic' (\\ce) | |
1047 `greek' (\\cg) | |
1048 `korean' (\\ch) | |
1049 `indian' (\\ci) | |
1050 `japanese' (\\cj) | |
1051 `japanese-katakana' (\\ck) | |
1052 `latin' (\\cl) | |
1053 `lao' (\\co) | |
1054 `tibetan' (\\cq) | |
1055 `japanese-roman' (\\cr) | |
1056 `thai' (\\ct) | |
1057 `vietnamese' (\\cv) | |
1058 `hebrew' (\\cw) | |
1059 `cyrillic' (\\cy) | |
1060 `can-break' (\\c|) | |
1061 | |
1062 `(not (category CATEGORY))' | |
55102 | 1063 matches a character that doesn't have category CATEGORY. |
39516 | 1064 |
1065 `(and SEXP1 SEXP2 ...)' | |
55102 | 1066 `(: SEXP1 SEXP2 ...)' |
1067 `(seq SEXP1 SEXP2 ...)' | |
1068 `(sequence SEXP1 SEXP2 ...)' | |
39516 | 1069 matches what SEXP1 matches, followed by what SEXP2 matches, etc. |
1070 | |
1071 `(submatch SEXP1 SEXP2 ...)' | |
55102 | 1072 `(group SEXP1 SEXP2 ...)' |
39516 | 1073 like `and', but makes the match accessible with `match-end', |
1074 `match-beginning', and `match-string'. | |
1075 | |
1076 `(or SEXP1 SEXP2 ...)' | |
55102 | 1077 `(| SEXP1 SEXP2 ...)' |
39516 | 1078 matches anything that matches SEXP1 or SEXP2, etc. If all |
1079 args are strings, use `regexp-opt' to optimize the resulting | |
1080 regular expression. | |
1081 | |
1082 `(minimal-match SEXP)' | |
1083 produce a non-greedy regexp for SEXP. Normally, regexps matching | |
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
1084 zero or more occurrences of something are \"greedy\" in that they |
39516 | 1085 match as much as they can, as long as the overall regexp can |
1086 still match. A non-greedy regexp matches as little as possible. | |
1087 | |
1088 `(maximal-match SEXP)' | |
47257 | 1089 produce a greedy regexp for SEXP. This is the default. |
39516 | 1090 |
55102 | 1091 Below, `SEXP ...' represents a sequence of regexp forms, treated as if |
1092 enclosed in `(and ...)'. | |
39516 | 1093 |
55102 | 1094 `(zero-or-more SEXP ...)' |
1095 `(0+ SEXP ...)' | |
1096 matches zero or more occurrences of what SEXP ... matches. | |
39516 | 1097 |
55102 | 1098 `(* SEXP ...)' |
1099 like `zero-or-more', but always produces a greedy regexp, independent | |
1100 of `rx-greedy-flag'. | |
39516 | 1101 |
55102 | 1102 `(*? SEXP ...)' |
1103 like `zero-or-more', but always produces a non-greedy regexp, | |
1104 independent of `rx-greedy-flag'. | |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
1105 |
55102 | 1106 `(one-or-more SEXP ...)' |
1107 `(1+ SEXP ...)' | |
1108 matches one or more occurrences of SEXP ... | |
39516 | 1109 |
55102 | 1110 `(+ SEXP ...)' |
39516 | 1111 like `one-or-more', but always produces a greedy regexp. |
1112 | |
55102 | 1113 `(+? SEXP ...)' |
39516 | 1114 like `one-or-more', but always produces a non-greedy regexp. |
1115 | |
55102 | 1116 `(zero-or-one SEXP ...)' |
1117 `(optional SEXP ...)' | |
1118 `(opt SEXP ...)' | |
39516 | 1119 matches zero or one occurrences of A. |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
1120 |
55102 | 1121 `(? SEXP ...)' |
39516 | 1122 like `zero-or-one', but always produces a greedy regexp. |
1123 | |
55102 | 1124 `(?? SEXP ...)' |
39516 | 1125 like `zero-or-one', but always produces a non-greedy regexp. |
1126 | |
1127 `(repeat N SEXP)' | |
55102 | 1128 `(= N SEXP ...)' |
1129 matches N occurrences. | |
1130 | |
1131 `(>= N SEXP ...)' | |
1132 matches N or more occurrences. | |
39516 | 1133 |
1134 `(repeat N M SEXP)' | |
55102 | 1135 `(** N M SEXP ...)' |
1136 matches N to M occurrences. | |
1137 | |
1138 `(backref N)' | |
54461
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
1139 matches what was matched previously by submatch N. |
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
1140 |
39516 | 1141 `(eval FORM)' |
54461
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
1142 evaluate FORM and insert result. If result is a string, |
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
1143 `regexp-quote' it. |
39516 | 1144 |
1145 `(regexp REGEXP)' | |
54461
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
1146 include REGEXP in string notation in the result." |
55102 | 1147 (cond ((null regexps) |
1148 (error "No regexp")) | |
1149 ((cdr regexps) | |
1150 (rx-to-string `(and ,@regexps) t)) | |
1151 (t | |
1152 (rx-to-string (car regexps) t)))) | |
1153 | |
1154 ;; ;; sregex.el replacement | |
39516 | 1155 |
55102 | 1156 ;; ;;;###autoload (provide 'sregex) |
1157 ;; ;;;###autoload (autoload 'sregex "rx") | |
1158 ;; (defalias 'sregex 'rx-to-string) | |
1159 ;; ;;;###autoload (autoload 'sregexq "rx" nil nil 'macro) | |
1160 ;; (defalias 'sregexq 'rx) | |
1161 | |
39516 | 1162 (provide 'rx) |
1163 | |
1164 ;;; rx.el ends here |